Spaces:
Runtime error
Runtime error
| # %load apps/slider.py | |
| from cgitb import html | |
| import time | |
| import streamlit as st | |
| from annotated_text import annotated_text | |
| import json | |
| import random | |
| import corrector | |
| import classifier | |
| from difflib import SequenceMatcher | |
| def underline_diff(a,b): | |
| # source: https://stackoverflow.com/questions/774316/python-difflib-highlighting-differences-inline | |
| seqm = SequenceMatcher(None, a.split(),b.split()) | |
| output= [] | |
| for opcode, a0, a1, b0, b1 in seqm.get_opcodes(): | |
| if opcode == 'equal': | |
| output.append(' '.join(seqm.a[a0:a1])) | |
| elif opcode == 'insert': | |
| output.append("<ins>" + ' '.join(seqm.b[b0:b1]) + "</ins>") | |
| elif opcode == 'delete': | |
| output.append("<del>" + ' '.join(seqm.a[a0:a1]) + "</del>") | |
| elif opcode == 'replace': | |
| output.append("<ins>" + ' '.join(seqm.b[b0:b1]) + "</ins>") # + " <del>" + ' '.join(seqm.a[a0:a1]) + "</del>") | |
| return ' '.join(output) | |
| with open("examples.txt", "r") as f: | |
| example_sentences = f.readlines() | |
| st.title("Virtual tutor - Proof of concept") | |
| with st.sidebar: | |
| st.image("seedlang_logo.png", width=60) | |
| st.write("You can test here the Grammatical Error Correction and Classification models I developed as part of my internship at Seedlang.") | |
| ## include 5 error types | |
| st.subheader("Error types") | |
| st.write("This PoC corrects and detects 5 type of French beginner mistakes:") | |
| error_descriptions = { | |
| "ADJ": """adjective has wrong gender agreement (<i>*la grand table</i>)""", | |
| "ART": """article has wrong gender agreement (<i>*le table</i>)""", | |
| "ELI": """elision is missing (<i>*je adore</i>)""", | |
| "FIN": """non-finite form of the verb instead of a finite form (<i>*je adorer</i>)""", | |
| "NEG": """problem with the position of the negation (<i>*je ne pas adore</i>)""" | |
| } | |
| for error_type, colour in classifier.COLOURS.items(): | |
| label = f"""<span style="background: {colour}; border-radius: 0.33rem; padding: 0.125rem 0.5rem; overflow: hidden;"><span style="padding-left: 0.5rem; text-transform: uppercase;"><span style="font-size: 0.67em; opacity: 0.8;">{error_type}</span></span></span> - {error_descriptions[error_type]}""" | |
| st.markdown(label, unsafe_allow_html=True) | |
| st.caption("The example sentences are taken from the development dataset. The errors were artificially generated, and many of the original sentences were too.") | |
| if "value" not in st.session_state: | |
| st.session_state["value"] = random.choice(example_sentences) | |
| random_ex = st.button("Get another random example") | |
| if random_ex: | |
| st.session_state["value"] = random.choice(example_sentences) | |
| sentence = st.text_input("Write a French sentence to correct and label", value=st.session_state["value"]) | |
| submit = st.button("Correct") | |
| if submit: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Labelled errors") | |
| with st.spinner("Labelling errors"): | |
| annotated_text(*classifier.annotate(sentence)) | |
| with col2: | |
| st.subheader("Corrected sentence") | |
| with st.spinner("Correcting the sentence"): | |
| corrected_sent = corrector.correct(sentence) | |
| st.markdown(underline_diff(a=sentence, b=corrected_sent), unsafe_allow_html=True) | |
| st.caption("Those two models are completely independent, but I eventually want to only correct the character spans where an error was detected and classified.") |