| from __future__ import annotations |
|
|
| from warnings import warn |
|
|
| from Levenshtein import distance, editops, matching_blocks, opcodes, ratio |
|
|
|
|
| class StringMatcher: |
| """A SequenceMatcher-like class built on the top of Levenshtein""" |
|
|
| def _reset_cache(self): |
| self._ratio = self._distance = None |
| self._opcodes = self._editops = self._matching_blocks = None |
|
|
| def __init__(self, isjunk=None, seq1="", seq2="", autojunk=False): |
| if isjunk: |
| warn("isjunk NOT implemented, it will be ignored", stacklevel=1) |
| if autojunk: |
| warn("autojunk NOT implemented, it will be ignored", stacklevel=1) |
| self._str1, self._str2 = seq1, seq2 |
| self._reset_cache() |
|
|
| def set_seqs(self, seq1, seq2): |
| self._str1, self._str2 = seq1, seq2 |
| self._reset_cache() |
|
|
| def set_seq1(self, seq1): |
| self._str1 = seq1 |
| self._reset_cache() |
|
|
| def set_seq2(self, seq2): |
| self._str2 = seq2 |
| self._reset_cache() |
|
|
| def get_opcodes(self): |
| if not self._opcodes: |
| if self._editops: |
| self._opcodes = opcodes(self._editops, self._str1, self._str2) |
| else: |
| self._opcodes = opcodes(self._str1, self._str2) |
| return self._opcodes |
|
|
| def get_editops(self): |
| if not self._editops: |
| if self._opcodes: |
| self._editops = editops(self._opcodes, self._str1, self._str2) |
| else: |
| self._editops = editops(self._str1, self._str2) |
| return self._editops |
|
|
| def get_matching_blocks(self): |
| if not self._matching_blocks: |
| self._matching_blocks = matching_blocks(self.get_opcodes(), self._str1, self._str2) |
| return self._matching_blocks |
|
|
| def ratio(self): |
| if not self._ratio: |
| self._ratio = ratio(self._str1, self._str2) |
| return self._ratio |
|
|
| def quick_ratio(self): |
| |
| if not self._ratio: |
| self._ratio = ratio(self._str1, self._str2) |
| return self._ratio |
|
|
| def real_quick_ratio(self): |
| len1, len2 = len(self._str1), len(self._str2) |
| return 2.0 * min(len1, len2) / (len1 + len2) |
|
|
| def distance(self): |
| if not self._distance: |
| self._distance = distance(self._str1, self._str2) |
| return self._distance |
|
|