|
|
import re |
|
|
import difflib |
|
|
|
|
|
|
|
|
def replace_symbol(text): |
|
|
symbol_pattern = "[,.,。、!?\n]" |
|
|
to = "" |
|
|
return re.sub(symbol_pattern, to, text).lower() |
|
|
def tokenize(text): |
|
|
|
|
|
return text.split(" ") |
|
|
def highlight_diff(a, b): |
|
|
a, b = replace_symbol(a), replace_symbol(b) |
|
|
tokens_a = tokenize(a) |
|
|
tokens_b = tokenize(b) |
|
|
print(tokens_a) |
|
|
print(tokens_b) |
|
|
matcher = difflib.SequenceMatcher(None, tokens_a, tokens_b) |
|
|
output = [] |
|
|
|
|
|
for tag, a_start, a_end, b_start, b_end in matcher.get_opcodes(): |
|
|
if tag == 'equal': |
|
|
output.append(' '.join(tokens_a[a_start:a_end])) |
|
|
elif tag == 'delete': |
|
|
deleted = ' '.join(tokens_a[a_start:a_end]) |
|
|
output.append(f"[-{deleted}-]") |
|
|
elif tag == 'insert': |
|
|
inserted = ' '.join(tokens_b[b_start:b_end]) |
|
|
output.append(f"{{+{inserted}+}}") |
|
|
elif tag == 'replace': |
|
|
deleted = ' '.join(tokens_a[a_start:a_end]) |
|
|
inserted = ' '.join(tokens_b[b_start:b_end]) |
|
|
output.append(f"[-{deleted}-]{{+{inserted}+}}") |
|
|
|
|
|
return ' '.join(output) |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
text_1 = "We built computers to expand our brains. Originally scientists built computers to solve arithmetic, but they turned out to be incredibly useful for many other things as well. Running the entire internet, lifelike graphics, artificial brains, or simulating the universe. But amazingly all of it boils down to just flipping zeros and ones. Computers have become smaller and more powerful at an incredible rate. There was more computing power in your cell phone than there was in the entire world in the mid-1960s. And the entire Apollo moon landing could have been run on a couple of Nintendos. Computer science is a subject that studies what computers can do. It's a diverse and overlapping field, but I'm going to split it into three parts. The fundamental theory of computer science, computer engineering, and applications." |
|
|
text_2 = "We built computers to expand our brains. Originally, scientists built computers to solve arithmetic, but they turned out to be incredibly useful for many other things as well. running the entire internet, life-like graphics, artificial brains, or simulating the universe. But amazingly, all of it boils down to just flipping zeros and ones. Computers have become smaller, more powerful and at an incredible rate. There's more computing power in your cell phone than there was in the entire world in the mid-1960s. And the entire Apollo moon landing could have been run on a couple of Nintendos. Computer science is a subject that studies what computers can do. It's a diverse and overlapping field, but I'm going to split it into three parts. The fundamental theory of computer science, computer engineering, and communications. " |
|
|
print(highlight_diff(text_1, text_2)) |
|
|
|