space_22

Sleeping

App Files Files Community

Frenchizer commited on Feb 13, 2025

Commit

11c10f2

verified ·

1 Parent(s): ee21c59

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -44

app.py CHANGED Viewed

@@ -5,65 +5,61 @@ from textblob import TextBlob
 from gradio_client import Client
 # Initialize models
-nlp = spacy.load("en_core_web_sm")  # NER model
 spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
 def preprocess_text(text: str):
-    """
-    Applies spell-checking and named entity recognition (NER) to preprocess text.
-    Returns token-level suggestions.
-    """
-    tokens = text.split()
-    suggestions = []
-    for token in tokens:
-        token_suggestions = {"original": token, "suggestions": []}
-        # Basic spell checking
-        corrected = str(TextBlob(token).correct())
-        if corrected != token:
-            token_suggestions["suggestions"].append(corrected)
-        # Transformer-based spell checking
-        spell_checked = spell_checker(token, max_length=20)[0]['generated_text']
-        if spell_checked != token and spell_checked not in token_suggestions["suggestions"]:
-            token_suggestions["suggestions"].append(spell_checked)
-        suggestions.append(token_suggestions)
-    # Named Entity Recognition (NER)
     doc = nlp(text)
-    entities = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
-    return {"tokens": suggestions, "entities": entities}
-def preprocess_and_forward(text: str):
-    """
-    Processes the input text, returns suggestions, and forwards the cleaned version for translation.
-    """
-    processed_data = preprocess_text(text)
-    final_text = " ".join([t['suggestions'][0] if t['suggestions'] else t['original'] for t in processed_data["tokens"]])
-    translation = forward_to_translation(final_text)
-    return processed_data, translation  # Unpacking dictionary values separately
-def forward_to_translation(text: str) -> str:
-    """
-    Sends preprocessed text for translation and returns only the translated text.
-    """
     client = Client("Frenchizer/space_17")
     try:
-        return client.predict(text)
     except Exception as e:
-        return f"Error: {str(e)}"
 # Gradio interface
 with gr.Blocks() as demo:
     input_text = gr.Textbox(label="Input Text")
-    output_text = gr.Textbox(label="Translated Text")
-    suggestion_output = gr.JSON(label="Suggestions")
     preprocess_button = gr.Button("Process")
-    preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[suggestion_output, output_text])
 if __name__ == "__main__":
     demo.launch()

 from gradio_client import Client
 # Initialize models
+nlp = spacy.load("en_core_web_sm")
 spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
 def preprocess_text(text: str):
+    """Process text and return corrections with position information"""
+    result = {
+        "spell_suggestions": [],
+        "entities": [],
+        "tags": []
+    }
+    # Find and record positions of corrections
     doc = nlp(text)
+    # TextBlob spell check with position tracking
+    blob = TextBlob(text)
+    corrected = str(blob.correct())
+    if corrected != text:
+        result["spell_suggestions"].append({
+            "original": text,
+            "corrected": corrected
+        })
+    # Transformer spell check
+    spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
+    if spell_checked != text and spell_checked != corrected:
+        result["spell_suggestions"].append({
+            "original": text,
+            "corrected": spell_checked
+        })
+    # Add entities and tags
+    result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
+    result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
+    return text, result
+def preprocess_and_forward(text: str):
+    """Process text and forward to translation service"""
+    original_text, preprocessing_result = preprocess_text(text)
+    # Forward original text to translation service
     client = Client("Frenchizer/space_17")
     try:
+        translation = client.predict(original_text)
+        return [translation, preprocessing_result]
     except Exception as e:
+        return [f"Error: {str(e)}", preprocessing_result]
 # Gradio interface
 with gr.Blocks() as demo:
     input_text = gr.Textbox(label="Input Text")
+    output_text = gr.Textbox(label="Output Text")
     preprocess_button = gr.Button("Process")
+    preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[output_text])
 if __name__ == "__main__":
     demo.launch()