| 
						
						
							
								
							
						
						
					 | 
				
				 | 
				
					@ -2,8 +2,32 @@ from typing import Dict | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					def make_km_table(pattern: str) -> Dict[str, int]: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    """ | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    Build a lookup table from a pattern string. | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    Iterates over the string and creates a dictionary where the key is a | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    search character and the value is the distance from the end of the string | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    to the *last* occurrence in the pattern. We ignore the final character. | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    Example: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					      pattern = "ABCDAB" | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                 543210 <- distance from end of pattern | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					      table = { | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        "A": 1,   # The second 'A' | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        "B": 4,   # The first 'B', because the other 'B' is the end of the string | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        "C": 3, | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        "D": 2, | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					      } | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    """ | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    PATTERN_LENGTH = len(pattern) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    table = {} | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    # Note: Because we always start each loop in the search by comparing the | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    # last character in the pattern, we can skip it in the table. This is | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    # especially helpful if the last character also appears elsewhere in the | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    # pattern because when using this table we will have already tried matching | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    # the last character, so we want to try the *next* occurrence of that | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    # character in the pattern, if it exists. | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    for index_from_left, char in enumerate(pattern[:-1]): | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        table[char] = PATTERN_LENGTH - index_from_left - 1 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    return table | 
				
			
			
		
	
	
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
				
				 | 
				
					@ -16,22 +40,51 @@ class Bm(object): | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        self.table = make_km_table(pattern) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    def decide_slide_width(self, c: str) -> int: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        """ | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        If a character 'c' doesn't match in the search, this decides how far | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        down to "slide" the pattern for the next search. | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        """ | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        # Note: The lookup table only has characters from the pattern in it. | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        # If the test character 'c' is not in the table, then we should skip | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        # down the entire length of the pattern | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        try: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            return self.table[c] | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        except KeyError: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            return len(self.pattern) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    def search(self) -> int: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        """ | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        Return the index of the *first* occurrence of the pattern in the | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        text, or '-1' if the pattern is not in the text. | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        """ | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        # The search works on one character at a time in the text. The current | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        # location is the "head" (like a read-head on a disk) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        # It starts at the last character in the pattern, and gets moved in the | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        # loop until we reach the end of the text | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        PATTERN_LENGTH = len(self.pattern) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        head = PATTERN_LENGTH - 1 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        while head < len(self.text): | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            # When the last character of the pattern matches the current head | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            # position, walk backwards through the text and match each | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            # character one by one until either they all match (return) or one | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            # doesn't match (slide the head down) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            if self.text[head] == self.pattern[-1]: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                for index_from_right, char in enumerate(reversed(self.pattern)): | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                    if self.text[head - index_from_right] != char: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                        head += self.decide_slide_width(self.text[head]) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                        break | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                # If the for loop finished without breaking we found a full | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                # match! | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                else: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                    # Note: head is always at the end of the pattern, but the | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                    # search should return the index of the start of the pattern | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                    return head - PATTERN_LENGTH + 1 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            # When the last character of the pattern *doesn't* match the current | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            # head, always slide down the head | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					            else: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					                head += self.decide_slide_width(self.text[head]) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        # If the main loop doesn't return, there was no match | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        return -1 |