space_22

Sleeping

App Files Files Community

Frenchizer commited on Feb 9, 2025

Commit

1d39b8a

verified ·

1 Parent(s): 4737205

Create app.py

Browse files

Files changed (1) hide show

app.py +86 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import gradio as gr
+from transformers import pipeline
+import spacy
+import language_tool_python
+import json
+import requests
+# Initialize models and tools
+nlp = spacy.load("en_core_web_sm")
+language_tool = language_tool_python.LanguageTool('en-US')
+spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
+def preprocess_and_forward(text: str) -> str:
+    processed_text, preprocessing_results = preprocess_text(text)
+    try:
+        # Forward preprocessed text to context detection (space_9)
+        context_response = requests.post(
+            "https://api.gradio.app/v2/Frenchizer/space_9/predict",
+            json={"data": [processed_text]}
+        ).json()
+        if "error" in context_response:
+            return json.dumps({
+                "error": "Context detection failed",
+                "preprocessing_results": preprocessing_results
+            })
+        context = context_response["data"][0]
+        # Return preprocessing and detected context
+        result = {
+            "preprocessing": preprocessing_results,
+            "context": context
+        }
+        return json.dumps(result)
+    except Exception as e:
+        return json.dumps({
+            "error": str(e),
+            "preprocessing_results": preprocessing_results
+        })
+def preprocess_text(text: str):
+    result = {
+        "corrections": [],
+        "entities": [],
+        "tags": [],
+        "spell_suggestions": []
+    }
+    # Spell checking
+    matches = language_tool.check(text)
+    for match in matches:
+        if match.replacements:
+            result["corrections"].append({
+                "original": match.context[match.offsetInContext:match.offsetInContext + match.errorLength],
+                "suggestion": match.replacements[0]
+            })
+    # Transformer-based spell check
+    spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
+    if spell_checked != text:
+        result["spell_suggestions"].append({
+            "original": text,
+            "corrected": spell_checked
+        })
+    # NER with spaCy
+    doc = nlp(text)
+    result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
+    # Extract potential tags
+    result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
+    return text, result
+# Gradio interface
+with gr.Blocks() as demo:
+    input_text = gr.Textbox(label="Input Text")
+    output_json = gr.JSON(label="Processing Results")
+    preprocess_button = gr.Button("Process")
+    preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[output_json])
+if __name__ == "__main__":
+    demo.launch()