Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import spacy | |
| import language_tool_python | |
| import json | |
| import requests | |
| # Initialize models and tools | |
| nlp = spacy.load("en_core_web_sm") | |
| language_tool = language_tool_python.LanguageTool('en-US') | |
| spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base") | |
| def preprocess_and_forward(text: str) -> str: | |
| processed_text, preprocessing_results = preprocess_text(text) | |
| try: | |
| # Forward preprocessed text to context detection (space_9) | |
| context_response = requests.post( | |
| "https://api.gradio.app/v2/Frenchizer/space_9/predict", | |
| json={"data": [processed_text]} | |
| ).json() | |
| if "error" in context_response: | |
| return json.dumps({ | |
| "error": "Context detection failed", | |
| "preprocessing_results": preprocessing_results | |
| }) | |
| context = context_response["data"][0] | |
| # Return preprocessing and detected context | |
| result = { | |
| "preprocessing": preprocessing_results, | |
| "context": context | |
| } | |
| return json.dumps(result) | |
| except Exception as e: | |
| return json.dumps({ | |
| "error": str(e), | |
| "preprocessing_results": preprocessing_results | |
| }) | |
| def preprocess_text(text: str): | |
| result = { | |
| "corrections": [], | |
| "entities": [], | |
| "tags": [], | |
| "spell_suggestions": [] | |
| } | |
| # Spell checking | |
| matches = language_tool.check(text) | |
| for match in matches: | |
| if match.replacements: | |
| result["corrections"].append({ | |
| "original": match.context[match.offsetInContext:match.offsetInContext + match.errorLength], | |
| "suggestion": match.replacements[0] | |
| }) | |
| # Transformer-based spell check | |
| spell_checked = spell_checker(text, max_length=512)[0]['generated_text'] | |
| if spell_checked != text: | |
| result["spell_suggestions"].append({ | |
| "original": text, | |
| "corrected": spell_checked | |
| }) | |
| # NER with spaCy | |
| doc = nlp(text) | |
| result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents] | |
| # Extract potential tags | |
| result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))] | |
| return text, result | |
| # Gradio interface | |
| with gr.Blocks() as demo: | |
| input_text = gr.Textbox(label="Input Text") | |
| output_json = gr.JSON(label="Processing Results") | |
| preprocess_button = gr.Button("Process") | |
| preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[output_json]) | |
| if __name__ == "__main__": | |
| demo.launch() | |