In [1]:
from __future__ import division
from __future__ import print_function

## Pattern Matching 

* <u>**Goal:**</u> Find all occurrences of a pattern in a text

* <u>**Input:**</u> Pattern $p = p_1, p_2, … p_n$ and text $t = t_1, t_2, … t_m$

* <u>**Output:**</u> All positions 1 < i < (m – n + 1) such that the *n*-letter substring of t starting at i matches p

In [23]:
def bruteForcePatternMatching(p, t):
    locations = []
    for i in range(0, len(t)-len(p)+1):
        if t[i:i+len(p)] == p:
            locations.append(i)
    return locations

print(bruteForcePatternMatching("ssi", "imissmissmississippi"))

[11, 14]


## Prefix *Trie* Match

In [24]:
def path(string, parent):
    """ A recursive function to insert the first character 
    of string into the parent node. If characters remain, 
    insert the remaining suffix into a child of the parent 
    creating new child nodes as needed. Inserts a '$' when
    the end of the string is reached."""
    if (len(string) > 0):
        if (string[0] in parent):
            child = parent[string[0]]
        else:
            child = {}
            parent[string[0]] = child
        path(string[1:], child)
    else:
        parent['$'] = True

class PrefixTrie:
    def __init__(self):
        """ Tree is a dictionary of the children at each node"""
        self.root = {}
    def add(self, string):
        """ Add a path from the Trie's root"""
        path(string, self.root)
    def match(self, string):
        """ Check if there is a path from the root to a '$' """
        parent = self.root
        for c in string:
            if c not in parent:
                break
            parent = parent[c]
        else:
            return '$' in parent
        return False

##  Examples

In [25]:
T = PrefixTrie()
T.add("apple")
print(T.root)
T.add("banana")
print(T.root)
T.add("apricot")
print(T.root)

{'a': {'p': {'p': {'l': {'e': {'$': True}}}}}}
{'a': {'p': {'p': {'l': {'e': {'$': True}}}}}, 'b': {'a': {'n': {'a': {'n': {'a': {'$': True}}}}}}}
{'a': {'p': {'p': {'l': {'e': {'$': True}}}, 'r': {'i': {'c': {'o': {'t': {'$': True}}}}}}}, 'b': {'a': {'n': {'a': {'n': {'a': {'$': True}}}}}}}


In [26]:
# Build Tree
T.add("bandana")
T.add("orange")

# Dump and use it
print(T.root)
print(T.match('orange'))
print([T.match(v) for v in ['apple', 'banana', 'apricot', 'orange', 'band', 'april', 'bandana', 'bananapple']])

{'a': {'p': {'p': {'l': {'e': {'$': True}}}, 'r': {'i': {'c': {'o': {'t': {'$': True}}}}}}}, 'b': {'a': {'n': {'a': {'n': {'a': {'$': True}}}, 'd': {'a': {'n': {'a': {'$': True}}}}}}}, 'o': {'r': {'a': {'n': {'g': {'e': {'$': True}}}}}}}
True
[True, True, True, True, False, False, True, False]


## Code for Arg Sorting

In [27]:
def argsort(input):
    return sorted(range(len(input)), key=lambda i: input[i:])

A = [72,27,45,36,18,54,9,63]
print(argsort(A))
print([A[i] for i in argsort(A)])

print()
B = ["TAGACAT", "AGACAT", "GACAT", "ACAT", "CAT", "AT", "T"]
print(argsort(B))
print([B[i] for i in argsort(B)])

[6, 4, 1, 3, 2, 5, 7, 0]
[9, 18, 27, 36, 45, 54, 63, 72]

[3, 1, 5, 4, 2, 6, 0]
['ACAT', 'AGACAT', 'AT', 'CAT', 'GACAT', 'T', 'TAGACAT']
