Spaces:
Sleeping
Sleeping
File size: 3,177 Bytes
1d39b8a cb2415a 4524238 3ee1970 4235ba5 11c10f2 1d39b8a 3ee1970 4524238 d0e1ffd 4524238 3ee1970 ed2703c 4524238 11c10f2 d0e1ffd ed2703c d0e1ffd 11c10f2 3ee1970 ed2703c 3ee1970 ed2703c cfb6a26 094d492 a385b05 330dfff 11c10f2 abdc326 3ee1970 094d492 3ee1970 ed2703c d0e1ffd ed2703c 094d492 3ee1970 a385b05 3ee1970 708dbc3 ed2703c 708dbc3 ed2703c 708dbc3 1d39b8a 674efd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import gradio as gr
from transformers import pipeline
import spacy
from gradio_client import Client
import re
# Initialize models
nlp = spacy.load("en_core_web_sm")
spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
def preprocess_capitalization(text: str) -> str:
"""Preprocess input text to handle capitalization rules."""
words = text.split(" ")
processed_words = []
for word in words:
if re.match(r"^[A-Z]+$", word):
processed_words.append(word) # Leave acronyms unchanged
elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
else:
processed_words.append(word) # Leave other words unchanged
return " ".join(processed_words)
def preprocess_text(text: str, is_suggestion_applied: bool = False):
"""Process text and return corrections with position information."""
result = {
"spell_suggestions": [],
"other_suggestions": [],
"entities": [],
"tags": []
}
# Only generate suggestions if no suggestion has been applied
if not is_suggestion_applied:
# Apply capitalization preprocessing (spell suggestions)
capitalized_text = preprocess_capitalization(text)
if capitalized_text != text:
result["spell_suggestions"].append({
"original": text,
"corrected": capitalized_text
})
text = capitalized_text # Update text for further processing
# Transformer spell check (other suggestions)
spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
if spell_checked != text:
result["other_suggestions"].append({
"original": text,
"corrected": spell_checked
})
# Translate the text (after preprocessing if first pass, or as-is if suggestion applied)
client = Client("Frenchizer/space_21")
try:
translation = client.predict(text)
except Exception as e:
translation = f"Error: {str(e)}"
# Add entities and tags
doc = nlp(text)
result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
return translation, result
def preprocess_and_forward(text: str, is_suggestion_applied: bool = False):
"""Process text and forward to translation service."""
translation, preprocessing_result = preprocess_text(text, is_suggestion_applied)
return translation, preprocessing_result
# Gradio interface
with gr.Blocks() as demo:
input_text = gr.Textbox(label="Input Text")
is_suggestion_applied = gr.Checkbox(label="Suggestion Applied", value=False, visible=False) # Hidden flag
output_text = gr.Textbox(label="Output Text")
preprocess_button = gr.Button("Process")
preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text, is_suggestion_applied], outputs=[output_text])
if __name__ == "__main__":
demo.launch() |