Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,65 +5,61 @@ from textblob import TextBlob
|
|
| 5 |
from gradio_client import Client
|
| 6 |
|
| 7 |
# Initialize models
|
| 8 |
-
nlp = spacy.load("en_core_web_sm")
|
| 9 |
spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
|
| 10 |
|
| 11 |
def preprocess_text(text: str):
|
| 12 |
-
"""
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
token_suggestions = {"original": token, "suggestions": []}
|
| 21 |
-
|
| 22 |
-
# Basic spell checking
|
| 23 |
-
corrected = str(TextBlob(token).correct())
|
| 24 |
-
if corrected != token:
|
| 25 |
-
token_suggestions["suggestions"].append(corrected)
|
| 26 |
-
|
| 27 |
-
# Transformer-based spell checking
|
| 28 |
-
spell_checked = spell_checker(token, max_length=20)[0]['generated_text']
|
| 29 |
-
if spell_checked != token and spell_checked not in token_suggestions["suggestions"]:
|
| 30 |
-
token_suggestions["suggestions"].append(spell_checked)
|
| 31 |
-
|
| 32 |
-
suggestions.append(token_suggestions)
|
| 33 |
-
|
| 34 |
-
# Named Entity Recognition (NER)
|
| 35 |
doc = nlp(text)
|
| 36 |
-
entities = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
|
| 37 |
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
|
| 41 |
-
"""
|
| 42 |
-
|
| 43 |
-
"""
|
| 44 |
-
processed_data = preprocess_text(text)
|
| 45 |
-
final_text = " ".join([t['suggestions'][0] if t['suggestions'] else t['original'] for t in processed_data["tokens"]])
|
| 46 |
-
translation = forward_to_translation(final_text)
|
| 47 |
-
return processed_data, translation # Unpacking dictionary values separately
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 53 |
client = Client("Frenchizer/space_17")
|
| 54 |
try:
|
| 55 |
-
|
|
|
|
| 56 |
except Exception as e:
|
| 57 |
-
return f"Error: {str(e)}"
|
| 58 |
|
| 59 |
# Gradio interface
|
| 60 |
with gr.Blocks() as demo:
|
| 61 |
input_text = gr.Textbox(label="Input Text")
|
| 62 |
-
output_text = gr.Textbox(label="
|
| 63 |
-
suggestion_output = gr.JSON(label="Suggestions")
|
| 64 |
-
|
| 65 |
preprocess_button = gr.Button("Process")
|
| 66 |
-
preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[
|
| 67 |
|
| 68 |
if __name__ == "__main__":
|
| 69 |
demo.launch()
|
|
|
|
| 5 |
from gradio_client import Client
|
| 6 |
|
| 7 |
# Initialize models
|
| 8 |
+
nlp = spacy.load("en_core_web_sm")
|
| 9 |
spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
|
| 10 |
|
| 11 |
def preprocess_text(text: str):
|
| 12 |
+
"""Process text and return corrections with position information"""
|
| 13 |
+
result = {
|
| 14 |
+
"spell_suggestions": [],
|
| 15 |
+
"entities": [],
|
| 16 |
+
"tags": []
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
# Find and record positions of corrections
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
doc = nlp(text)
|
|
|
|
| 21 |
|
| 22 |
+
# TextBlob spell check with position tracking
|
| 23 |
+
blob = TextBlob(text)
|
| 24 |
+
corrected = str(blob.correct())
|
| 25 |
+
if corrected != text:
|
| 26 |
+
result["spell_suggestions"].append({
|
| 27 |
+
"original": text,
|
| 28 |
+
"corrected": corrected
|
| 29 |
+
})
|
| 30 |
+
|
| 31 |
+
# Transformer spell check
|
| 32 |
+
spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
|
| 33 |
+
if spell_checked != text and spell_checked != corrected:
|
| 34 |
+
result["spell_suggestions"].append({
|
| 35 |
+
"original": text,
|
| 36 |
+
"corrected": spell_checked
|
| 37 |
+
})
|
| 38 |
|
| 39 |
+
# Add entities and tags
|
| 40 |
+
result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
|
| 41 |
+
result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
return text, result
|
| 44 |
+
|
| 45 |
+
def preprocess_and_forward(text: str):
|
| 46 |
+
"""Process text and forward to translation service"""
|
| 47 |
+
original_text, preprocessing_result = preprocess_text(text)
|
| 48 |
+
|
| 49 |
+
# Forward original text to translation service
|
| 50 |
client = Client("Frenchizer/space_17")
|
| 51 |
try:
|
| 52 |
+
translation = client.predict(original_text)
|
| 53 |
+
return [translation, preprocessing_result]
|
| 54 |
except Exception as e:
|
| 55 |
+
return [f"Error: {str(e)}", preprocessing_result]
|
| 56 |
|
| 57 |
# Gradio interface
|
| 58 |
with gr.Blocks() as demo:
|
| 59 |
input_text = gr.Textbox(label="Input Text")
|
| 60 |
+
output_text = gr.Textbox(label="Output Text")
|
|
|
|
|
|
|
| 61 |
preprocess_button = gr.Button("Process")
|
| 62 |
+
preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[output_text])
|
| 63 |
|
| 64 |
if __name__ == "__main__":
|
| 65 |
demo.launch()
|