In []:
# a one-line function to compute a Suffix array
def argsort(text):
    return sorted(range(len(text)), cmp=lambda i,j: -1 if text[i:] < text[j:] else 1)
In []:
text = "mississippi$"
suffixArray = argsort(text)
for i, j in enumerate(suffixArray):
    print i, text[j:]
In []:
def findFirst(pattern, text, sfa):
    """ Finds the index of the first occurrence of pattern in the suffix array """
    hi = len(text)
    lo = 0
    while (lo < hi):
        mid = (lo+hi)//2
        if (pattern > text[sfa[mid]:]):
            lo = mid + 1
        else:
            hi = mid
    return lo

def findLast(pattern, text, sfa):
    """ Finds the index of the last occurrence of pattern in the suffix array """
    hi = len(text)
    lo = 0
    m = len(pattern)
    while (lo < hi):
        mid = (lo+hi)//2
        i = sfa[mid]
        if (pattern >= text[i:i+m]):
            lo = mid + 1
        else:
            hi = mid
    return lo
In []:
target = "is"
start = findFirst(target, text, suffixArray)
end = findLast(target, text, suffixArray)
print str((start, end))
for i in xrange(start,end):
    print i, text[suffixArray[i]:]
In []:
mGene = open("MouseGene.seq", 'r').read()
print mGene
In [36]:
%timeit argsort(mGene)
100 loops, best of 3: 13.5 ms per loop

In []:
suffixArray = argsort(mGene)
target = "CAT"
start = findFirst(target, mGene, suffixArray)
end = findLast(target, mGene, suffixArray)
print str((start, end))
for i in xrange(start,end):
    print "%6d %s" % (suffixArray[i], mGene[suffixArray[i]:min(len(suffixArray), suffixArray[i]+20)])
In []:
def BWT(s):
    # create a table, with rows of all possible rotations of s
    rotation = [s[i:] + s[:i] for i in xrange(len(s))] 
    # sort rows alphabetically
    rotation.sort() 
    # return (last column of the table)
    return "".join([r[-1] for r in rotation])

def inverseBWT(s):
    # initialize table from s
    table = [c for c in s]
    # repeat length(s) - 1 times
    for j in xrange(len(s)-1):
        # sort rows of the table alphabetically
        table.sort()
        # insert s as the first column
        table = [s[i]+table[i] for i in xrange(len(s))]
    # return (row that ends with the 'EOS' character)
    return table[[r[-1] for r in table].index('$')]
In []:
bwt = BWT("mississippi$")
print bwt
In []:
print inverseBWT(bwt)
In []: