|
@ -2,8 +2,10 @@ from typing import Dict |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def make_km_table(pattern: str) -> Dict[str, int]: |
|
|
def make_km_table(pattern: str) -> Dict[str, int]: |
|
|
table = dict() |
|
|
|
|
|
raise Exception("TODO") |
|
|
|
|
|
|
|
|
PATTERN_LENGTH = len(pattern) |
|
|
|
|
|
table = {} |
|
|
|
|
|
for index_from_left, char in enumerate(pattern[:-1]): |
|
|
|
|
|
table[char] = PATTERN_LENGTH - index_from_left - 1 |
|
|
return table |
|
|
return table |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -11,44 +13,25 @@ class Bm(object): |
|
|
def __init__(self, text: str, pattern: str): |
|
|
def __init__(self, text: str, pattern: str): |
|
|
self.text = text |
|
|
self.text = text |
|
|
self.pattern = pattern |
|
|
self.pattern = pattern |
|
|
# self.table = make_km_table(pattern) |
|
|
|
|
|
|
|
|
self.table = make_km_table(pattern) |
|
|
|
|
|
|
|
|
def decide_slide_width(self, c: str) -> int: |
|
|
def decide_slide_width(self, c: str) -> int: |
|
|
assert len(c) == 1 |
|
|
|
|
|
raise Exception("TODO") |
|
|
|
|
|
return -1 |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
return self.table[c] |
|
|
|
|
|
except KeyError: |
|
|
|
|
|
return len(self.pattern) |
|
|
|
|
|
|
|
|
def search(self) -> int: |
|
|
def search(self) -> int: |
|
|
PATTERN_LENGTH = len(self.pattern) |
|
|
PATTERN_LENGTH = len(self.pattern) |
|
|
TEXT_LENGTH = len(self.text) |
|
|
|
|
|
|
|
|
|
|
|
END_OF_PATTERN = PATTERN_LENGTH - 1 |
|
|
|
|
|
LAST_CHAR_OF_PATTERN = self.pattern[-1] |
|
|
|
|
|
|
|
|
|
|
|
offset = {c: PATTERN_LENGTH - i -1 for i, c in enumerate(self.pattern[:-1])} |
|
|
|
|
|
print(offset) |
|
|
|
|
|
|
|
|
|
|
|
matches = [] |
|
|
|
|
|
head = END_OF_PATTERN |
|
|
|
|
|
|
|
|
|
|
|
while head < TEXT_LENGTH: |
|
|
|
|
|
print(f'{" " * (head - PATTERN_LENGTH + 1)}{self.pattern}') |
|
|
|
|
|
print(self.text[:head + 1]) |
|
|
|
|
|
print(f'{" " * head}^') |
|
|
|
|
|
|
|
|
|
|
|
if self.text[head] == LAST_CHAR_OF_PATTERN: |
|
|
|
|
|
for i, c in enumerate(reversed(self.pattern)): |
|
|
|
|
|
# print(text[head - i], c) |
|
|
|
|
|
if self.text[head - i] != c: |
|
|
|
|
|
head += offset[self.text[head]] |
|
|
|
|
|
|
|
|
head = PATTERN_LENGTH - 1 |
|
|
|
|
|
while head < len(self.text): |
|
|
|
|
|
if self.text[head] == self.pattern[-1]: |
|
|
|
|
|
for index_from_right, char in enumerate(reversed(self.pattern)): |
|
|
|
|
|
if self.text[head - index_from_right] != char: |
|
|
|
|
|
head += self.decide_slide_width(self.text[head]) |
|
|
break |
|
|
break |
|
|
else: |
|
|
else: |
|
|
print("!!!") |
|
|
|
|
|
return head - PATTERN_LENGTH + 1 |
|
|
return head - PATTERN_LENGTH + 1 |
|
|
else: |
|
|
else: |
|
|
try: |
|
|
|
|
|
head += offset[self.text[head]] |
|
|
|
|
|
except KeyError: |
|
|
|
|
|
head += PATTERN_LENGTH |
|
|
|
|
|
print('----------------------------------') |
|
|
|
|
|
|
|
|
head += self.decide_slide_width(self.text[head]) |
|
|
return -1 |
|
|
return -1 |