Ignore case and punctuation when comparing texts
Browse files- lib/utils.py +6 -4
lib/utils.py
CHANGED
|
@@ -40,7 +40,7 @@ def cmd(command: str, check=True, capture_output=False) -> CompletedProcess:
|
|
| 40 |
def replace_symbol(text):
|
| 41 |
symbol_pattern = "[,.,。!?\n]"
|
| 42 |
to = ""
|
| 43 |
-
return re.sub(symbol_pattern, to, text)
|
| 44 |
|
| 45 |
|
| 46 |
def run_textdistance(text1, text2):
|
|
@@ -52,6 +52,7 @@ def run_textdistance(text1, text2):
|
|
| 52 |
return d, nd
|
| 53 |
|
| 54 |
def highlight_diff(a, b):
|
|
|
|
| 55 |
matcher = difflib.SequenceMatcher(None, a, b)
|
| 56 |
output = []
|
| 57 |
for tag, a_start, a_end, b_start, b_end in matcher.get_opcodes():
|
|
@@ -72,6 +73,7 @@ def time_to_float(s: str):
|
|
| 72 |
|
| 73 |
|
| 74 |
if __name__ == '__main__':
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
| 40 |
def replace_symbol(text):
|
| 41 |
symbol_pattern = "[,.,。!?\n]"
|
| 42 |
to = ""
|
| 43 |
+
return re.sub(symbol_pattern, to, text).lower()
|
| 44 |
|
| 45 |
|
| 46 |
def run_textdistance(text1, text2):
|
|
|
|
| 52 |
return d, nd
|
| 53 |
|
| 54 |
def highlight_diff(a, b):
|
| 55 |
+
a, b = replace_symbol(a), replace_symbol(b)
|
| 56 |
matcher = difflib.SequenceMatcher(None, a, b)
|
| 57 |
output = []
|
| 58 |
for tag, a_start, a_end, b_start, b_end in matcher.get_opcodes():
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
if __name__ == '__main__':
|
| 76 |
+
text_1 = "This sequence of events is an example of what is known as the Butterfly Effect, a manifestation of Chaos Theory. For many centuries, the world was explained through the laws of Isaac Newton in classical physics. According to these laws, if the current state of an object is known, its future behavior can be predicted with relative ease. Chaos Theory questions this deterministic vision. Not everything is predictable anymore, nor does it work like a quirk. Since the 1800s, mathematicians have raised the idea that not all phenomena could be predicted by Newtonian laws. But a meteorologist named Edward Lawrence made Chaos Theory a visible phenomenon. It all started in 1961, when he was working on a mathematical model to forecast the weather. Lawrence entered data such as temperature, humidity, pressure, and wind direction into his computer. His computer would draw a graph modeling what the weather would be like. Not always accurate, but very close to reality. G."
|
| 77 |
+
text_2 = "This sequence of events is an example of what is known as the butterfly effect, a manifestation of chaos theory. For many centuries, the world was explained through the laws of Isaac Newton in classical physics. According to these laws, if the current state of an object is known, Its future behavior can be predicted with relative ease. Chaos theory questions this deterministic vision. Not everything is predictable anymore. nor does it work like clockwork. Since the 1800s, mathematicians have raised the idea that not all phenomena could be predicted by Newtonian laws. But a meteorologist named Edward Lawrence made Kale's theory a visible phenomenon. It all started in 1961, when he was working on a mathematical model to forecast the weather. Lawrence entered data such as temperature, humidity, pressure, and wind direction into his computer. His computer would draw a graph, modeling what the weather would be like. Not always accurate, but very close to reality. "
|
| 78 |
+
print(run_textdistance(text_1, text_2))
|
| 79 |
+
print(highlight_diff(text_1, text_2))
|