|
|
import csv |
|
|
from datetime import datetime |
|
|
import subprocess |
|
|
from subprocess import CompletedProcess |
|
|
from typing import Literal |
|
|
import re |
|
|
import difflib |
|
|
|
|
|
import textdistance |
|
|
|
|
|
|
|
|
def get_time_str(level:Literal["d","s","ms"]="d"): |
|
|
time = datetime.now() |
|
|
if level == "d": |
|
|
return time.strftime("%Y-%m-%d") |
|
|
if level == "s": |
|
|
return time.strftime("%H%M%S") |
|
|
if level == "ms": |
|
|
return time.strftime("%H%M%S.%f") |
|
|
|
|
|
|
|
|
def save_csv(file_path, header, rows): |
|
|
with open(file_path, "w", encoding="utf-8") as f: |
|
|
writer = csv.writer(f) |
|
|
if header: |
|
|
writer.writerow(header) |
|
|
writer.writerows(rows) |
|
|
print(f"write csv to {file_path}") |
|
|
|
|
|
def cmd(command: str, check=True, capture_output=False) -> CompletedProcess: |
|
|
print(command) |
|
|
if capture_output: |
|
|
ret = subprocess.run(command, shell=True, check=check, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, |
|
|
universal_newlines=True) |
|
|
else: |
|
|
ret = subprocess.run(command, shell=True, check=check) |
|
|
print(ret.stdout) |
|
|
return ret |
|
|
|
|
|
def clean_text_for_comparison_zh(text): |
|
|
symbol_pattern = "[ ,。、!?::‘’-《》!?;,\n]" |
|
|
to = "" |
|
|
return re.sub(symbol_pattern, to, text).lower() |
|
|
|
|
|
def clean_text_for_comparison_en(text): |
|
|
symbol_pattern = "[,.\n]" |
|
|
to = "" |
|
|
return re.sub(symbol_pattern, to, text).lower() |
|
|
|
|
|
|
|
|
def run_textdistance(text1, text2): |
|
|
d = textdistance.levenshtein.distance(text1, text2) |
|
|
nd = d / len(text1) |
|
|
|
|
|
return d, nd |
|
|
|
|
|
def highlight_diff(a, b, spliter=""): |
|
|
if spliter: |
|
|
a = a.split(spliter) |
|
|
b = b.split(spliter) |
|
|
matcher = difflib.SequenceMatcher(None, a, b) |
|
|
output = [] |
|
|
|
|
|
for tag, a_start, a_end, b_start, b_end in matcher.get_opcodes(): |
|
|
if tag == 'equal': |
|
|
output.append(spliter.join(a[a_start:a_end])) |
|
|
elif tag == 'delete': |
|
|
deleted = spliter.join(a[a_start:a_end]) |
|
|
output.append(f"[-{deleted}-]") |
|
|
elif tag == 'insert': |
|
|
inserted = spliter.join(b[b_start:b_end]) |
|
|
output.append(f"{{+{inserted}+}}") |
|
|
elif tag == 'replace': |
|
|
deleted = spliter.join(a[a_start:a_end]) |
|
|
inserted = spliter.join(b[b_start:b_end]) |
|
|
output.append(f"[-{deleted}-]{{+{inserted}+}}") |
|
|
|
|
|
return spliter.join(output) |
|
|
|
|
|
def time_to_float(s: str): |
|
|
if d := s.replace("s", ""): |
|
|
return float(d) |
|
|
return 0.0 |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
text_1 = "This sequence of events is an example of what is known as the Butterfly Effect, a manifestation of Chaos Theory. For many centuries, the world was explained through the laws of Isaac Newton in classical physics. According to these laws, if the current state of an object is known, its future behavior can be predicted with relative ease. Chaos Theory questions this deterministic vision. Not everything is predictable anymore, nor does it work like a clockwork. Since the 1800s, mathematicians have raised the idea that not all phenomena could be predicted by Newtonian laws. But a meteorologist named Edward Lawrence made Chaos Theory a visible phenomenon. It all started in 1961, when he was working on a mathematical model to forecast the weather. Lawrence entered data such as temperature, humidity, pressure, and wind direction into his computer. His computer would draw a graph modeling what the weather would be like. Not always accurate, but very close to reality." |
|
|
text_2 = "of events is an example of what is known as the butterfly effect, a manifestation of chaos theory. For many centuries, the world was explained through the laws of Isaac Newton in classical physics. According to these laws, if the current state of an object is known, Its future behavior can be predicted with relative ease. Chaos theory questions this deterministic vision. Not everything is predictable anymore. nor does it work like clockwork. Since the 1800s, mathematicians have raised the idea that not all phenomena could be predicted by Newtonian laws. but a meteorologist named Edward Lawrence made chaos theory a visible phenomenon. It all started in 1961. when he was working on a mathematical model to forecast the weather. Lawrence entered data such as temperature, humidity, pressure and wind direction into his computer. His computer would draw a graph, modeling what the weather would be like. not always accurate, but very close to reality. " |
|
|
text_3 = "后来我自己总结啊,微积分这么难,入门主要有几个原因。首先呢大部分的教材为了追求严谨,从一开始就使用了现代数学的这个所谓极限的概念。在它的基础之上向你介绍微积分。呃,问题是它是一个非常抽象的概念。对于大部分在接触微积分之前啊,主要的学习经验就是刷题啊,甚至是连题也不刷的。同学们来说呢,这种抽象语言会很陌生。而且如果你去了解微积分的历史的时候,你会发现极限这个概念啊是微积分创立之后大概一两百年才出现的这么个东西。你等于说我们现在公认的这些微积分的创始人,这些大佬们、牛顿啊、莱布尼茨、欧拉啊,连他们都不知道极限是什么。但是人家就是凭着直觉创建了微积分,当然作为教科书嘛,追求严谨无可厚非啊。虽然对于我来讲,过早的追求这种严谨,导致学习的人入门困难甚至入不了门。" |
|
|
text_4 = "后来我自己总结啊,微积分这么难,入门主要有几个原因。首先呢大部分的教材为了追求严谨。从一开始就使用了现代数学的这个所谓极限的概念在它的基础之上。向你介绍微积分呃,问题是它是一个非常抽象的概念。对于大部分在接触微积分之前。主要的学习经验就是刷题,甚至是连题也不刷的。同学们来说呢,这种抽象语言会很陌生。而且如果你去了解微积分的历史的时候,你会发现极限这个概念啊是微积分创立之后大概一两百年。才出现的这么个东西。你等于说我们现在公认的这些微积分的创始人,这些大佬。牛顿啊、莱布尼茨啊、欧拉啊啊,连他们都不知道极限是谁,但是人家就是凭着直觉。创建了危机,当然作为教科书嘛追求严谨,无可厚非啊,虽然对于我来讲。过早的追求这种严谨,导致学习的人入门困难,甚至入不了门。" |
|
|
|
|
|
text_1, text_2 = clean_text_for_comparison_en(text_1), clean_text_for_comparison_en(text_2) |
|
|
text_3, text_4 = clean_text_for_comparison_zh(text_3), clean_text_for_comparison_zh(text_4) |
|
|
print(run_textdistance(text_1, text_2)) |
|
|
print(highlight_diff(text_1, text_2, " ")) |
|
|
print(run_textdistance(text_3, text_4)) |
|
|
print(highlight_diff(text_3, text_4)) |