import streamlit as st import base64 import unicodedata import diff_match_patch as dmp_module from enum import Enum class Action(Enum): INSERTION = 1 DELETION = -1 EQUAL = 0 def compare_string(text1:str, text2: str) -> list: text1Normalized = unicodedata.normalize("NFKC", text1) text2Normalized = unicodedata.normalize("NFKC", text2) dmp = dmp_module.diff_match_patch() diff = dmp.diff_main(text1Normalized, text2Normalized) dmp.diff_cleanupSemantic(diff) return diff def style_text(diff): fullText="" for action, text in diff: if action == Action.INSERTION.value: fullText += f"{text}" elif action == Action.DELETION.value: fullText += f"{text}" elif action == Action.EQUAL.value: fullText += f"{text}" else: raise Exception("Not Implemented") fullText = fullText.replace('](', ']\(').replace('~', '\~') return fullText if __name__=="__main__": col1, col2 = st.columns(2) with col1: text1 = st.text_area("Input Text", value= """The quick brown fox jumps over the lazy dog. この竹垣に竹立てかけたのは竹立てかけたかったからか竹立てかけ""" ) with col2: text2 = st.text_area("Source Text", value= """The quick brown fox jumps over the lazy cat. この竹垣に竹立てかけたのは竹立てかけたかったから竹立てかけた""" ) diff = compare_string(text2, text1) fullText = style_text(diff) st.markdown("削除 追加", unsafe_allow_html=True) st.markdown(fullText, unsafe_allow_html=True)