import streamlit as st
from multiprocessing import Process
from annotated_text import annotated_text
from bs4 import BeautifulSoup
import pandas as pd
import torch
import math
import re
import json
import requests
import spacy
import errant
import time
import os
def start_server():
os.system("python3 -m spacy download en_core_web_sm")
os.system("uvicorn InferenceServer:app --port 8080 --host 0.0.0.0 --workers 2")
def load_models():
if not is_port_in_use(8080):
with st.spinner(text="Loading models, please wait..."):
proc = Process(target=start_server, args=(), daemon=True)
proc.start()
while not is_port_in_use(8080):
time.sleep(1)
st.success("Model server started.")
else:
st.success("Model server already running...")
st.session_state['models_loaded'] = True
def is_port_in_use(port):
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(('0.0.0.0', port)) == 0
if 'models_loaded' not in st.session_state:
st.session_state['models_loaded'] = False
def show_highlights(input_text, corrected_sentence):
try:
strikeout = lambda x: '\u0336'.join(x) + '\u0336'
highlight_text = highlight(input_text, corrected_sentence)
# Updated colors for better visibility on both themes
color_map = {
'd': '#ff6b6b', # Soft red for deletions (works on both themes)
'a': '#51cf66', # Soft green for additions (works on both themes)
'c': '#ffd43b' # Soft yellow for changes (works on both themes)
}
tokens = re.split(r'(<[dac]\s.*?<\/[dac]>)', highlight_text)
annotations = []
for token in tokens:
soup = BeautifulSoup(token, 'html.parser')
tags = soup.findAll()
if tags:
_tag = tags[0].name
_type = tags[0]['type']
_text = tags[0]['edit']
_color = color_map[_tag]
if _tag == 'd':
_text = strikeout(tags[0].text)
annotations.append((_text, _type, _color))
else:
annotations.append(token)
annotated_text(*annotations)
except Exception as e:
st.error('Some error occured!' + str(e))
st.stop()
def show_edits(input_text, corrected_sentence):
try:
edits = get_edits(input_text, corrected_sentence)
df = pd.DataFrame(edits, columns=['type','original word', 'original start', 'original end', 'correct word', 'correct start', 'correct end'])
df = df.set_index('type')
st.table(df)
except Exception as e:
st.error('Some error occured!')
st.stop()
def highlight(orig, cor):
edits = _get_edits(orig, cor)
orig_tokens = orig.split()
ignore_indexes = []
for edit in edits:
edit_type = edit[0]
edit_str_start = edit[1]
edit_spos = edit[2]
edit_epos = edit[3]
edit_str_end = edit[4]
# if no_of_tokens(edit_str_start) > 1 ==> excluding the first token, mark all other tokens for deletion
for i in range(edit_spos+1, edit_epos):
ignore_indexes.append(i)
if edit_str_start == "":
if edit_spos - 1 >= 0:
new_edit_str = orig_tokens[edit_spos - 1]
edit_spos -= 1
else:
new_edit_str = orig_tokens[edit_spos + 1]
edit_spos += 1
if edit_type == "PUNCT":
st = "" + new_edit_str + ""
else:
st = "" + new_edit_str + ""
orig_tokens[edit_spos] = st
elif edit_str_end == "":
st = "
Powered by Transformers & FastAPI | Deployed on Hugging Face Spaces
© 2024 Muhammad Abdullah Rasheed