import gradio as gr import spacy from spacy import displacy import base64 import traceback import subprocess import sys import os from pathlib import Path from typing import Dict, Optional, Tuple import importlib import site # ============================================================================ # CONFIGURATION # ============================================================================ MODEL_INFO: Dict[str, Tuple[str, str, str]] = { "de": ("German", "de_core_news_md", "spacy"), "en": ("English", "en_core_web_md", "spacy"), "es": ("Spanish", "es_core_news_md", "spacy"), "grc-proiel-trf": ("Ancient Greek (PROIEL TRF)", "grc_proiel_trf", "grecy"), "grc-perseus-trf": ("Ancient Greek (Perseus TRF)", "grc_perseus_trf", "grecy"), "grc_ner_trf": ("Ancient Greek (NER TRF)", "grc_ner_trf", "grecy"), "grc-proiel-lg": ("Ancient Greek (PROIEL LG)", "grc_proiel_lg", "grecy"), "grc-perseus-lg": ("Ancient Greek (Perseus LG)", "grc_perseus_lg", "grecy"), "grc-proiel-sm": ("Ancient Greek (PROIEL SM)", "grc_proiel_sm", "grecy"), "grc-perseus-sm": ("Ancient Greek (Perseus SM)", "grc_perseus_sm", "grecy"), } # --- *** THE FIX IS HERE *** --- # Added the 'table_headers' key to each language UI_TEXT = { "de": { "title": "# 🔍 Mehrsprachiger Morpho-Syntaktischer Analysator", "subtitle": "Analysieren Sie Texte auf Deutsch, Englisch, Spanisch und Altgriechisch", "ui_lang_label": "Benutzeroberflächensprache", "model_lang_label": "Textsprache für Analyse", "input_label": "Text eingeben", "input_placeholder": "Geben Sie hier Ihren Text ein...", "button_text": "Text analysieren", "button_processing_text": "Verarbeitung läuft...", "tab_graphic": "Grafische Darstellung", "tab_table": "Tabelle", "tab_json": "JSON", "tab_ner": "Entitäten", "html_label": "Abhängigkeitsparsing", "table_label": "Morphologische Analyse", "table_headers": ["Wort", "Lemma", "POS", "Tag", "Morphologie", "Abhängigkeit"], # <-- WAS MISSING "json_label": "JSON-Ausgabe", "ner_label": "Benannte Entitäten", "error_message": "Fehler: " }, "en": { "title": "# 🔍 Multilingual Morpho-Syntactic Analyzer", "subtitle": "Analyze texts in German, English, Spanish, and Ancient Greek", "ui_lang_label": "Interface Language", "model_lang_label": "Text Language for Analysis", "input_label": "Enter Text", "input_placeholder": "Enter your text here...", "button_text": "Analyze Text", "button_processing_text": "Processing...", "tab_graphic": "Graphic View", "tab_table": "Table", "tab_json": "JSON", "tab_ner": "Entities", "html_label": "Dependency Parsing", "table_label": "Morphological Analysis", "table_headers": ["Word", "Lemma", "POS", "Tag", "Morphology", "Dependency"], # <-- WAS MISSING "json_label": "JSON Output", "ner_label": "Named Entities", "error_message": "Error: " }, "es": { "title": "# 🔍 Analizador Morfo-Sintáctico Multilingüe", "subtitle": "Analice textos en alemán, inglés, español y griego antiguo", "ui_lang_label": "Idioma de la Interfaz", "model_lang_label": "Idioma del Texto para Análisis", "input_label": "Introducir Texto", "input_placeholder": "Ingrese su texto aquí...", "button_text": "Analizar Texto", "button_processing_text": "Procesando...", "tab_graphic": "Vista Gráfica", "tab_table": "Tabla", "tab_json": "JSON", "tab_ner": "Entidades", "html_label": "Análisis de Dependencias", "table_label": "Análisis Morfológico", "table_headers": ["Palabra", "Lema", "POS", "Etiqueta", "Morfología", "Dependencia"], # <-- WAS MISSING "json_label": "Salida JSON", "ner_label": "Entidades Nombradas", "error_message": "Error: " } } # --- *** END FIX *** --- MODELS: Dict[str, Optional[spacy.Language]] = {} # ============================================================================ # DEPENDENCY INSTALLATION # ============================================================================ def install_spacy_transformers_once(): """ Installs spacy-transformers, required for all _trf models. """ marker_file = Path(".spacy_transformers_installed") if marker_file.exists(): print("✓ spacy-transformers already installed (marker found)") return True print("Installing spacy-transformers (for _trf models)...") cmd = [sys.executable, "-m", "pip", "install", "spacy-transformers"] try: subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=300) print("✓ Successfully installed spacy-transformers") marker_file.touch() return True except Exception as e: print(f"✗ FAILED to install spacy-transformers: {e}") if hasattr(e, 'stderr'): print(e.stderr) return False def install_grecy_model_from_github(model_name: str) -> bool: """ Installs a greCy model from your specific GitHub Release. """ marker_file = Path(f".{model_name}_installed") if marker_file.exists(): print(f"✓ {model_name} already installed (marker found)") return True print(f"Installing grecy model: {model_name}...") if model_name == "grc_proiel_trf": wheel_filename = "grc_proiel_trf-3.7.5-py3-none-any.whl" elif model_name in ["grc_perseus_trf", "grc_proiel_lg", "grc_perseus_lg", "grc_proiel_sm", "grc_perseus_sm", "grc_ner_trf"]: # Note: Wheel name uses underscore (grc_ner_trf), not hyphen wheel_filename = f"{model_name}-0.0.0-py3-none-any.whl" else: print(f"✗ Unknown grecy model: {model_name}") return False install_url = f"https://github.com/CrispStrobe/greCy/releases/download/v1.0-models/{wheel_filename}" cmd = [sys.executable, "-m", "pip", "install", install_url, "--no-deps"] print(f"Running: {' '.join(cmd)}") try: result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=900) if result.stdout: print("STDOUT:", result.stdout) if result.stderr: print("STDERR:", result.stderr) print(f"✓ Successfully installed {model_name} from GitHub") marker_file.touch() return True except subprocess.CalledProcessError as e: print(f"✗ Installation subprocess FAILED with code {e.returncode}") print("STDOUT:", e.stdout) print("STDERR:", e.stderr) return False except Exception as e: print(f"✗ Installation exception: {e}") traceback.print_exc() return False # ============================================================================ # MODEL LOADING (LAZY LOADING) # ============================================================================ def load_spacy_model(model_name: str) -> Optional[spacy.Language]: """Load or install a standard spaCy model.""" try: return spacy.load(model_name) except OSError: print(f"Installing {model_name}...") try: subprocess.check_call([sys.executable, "-m", "spacy", "download", model_name]) return spacy.load(model_name) except Exception as e: print(f"✗ Failed to install {model_name}: {e}") return None def load_grecy_model(model_name: str) -> Optional[spacy.Language]: """ Load a grecy model, installing from GitHub if needed. """ if not install_grecy_model_from_github(model_name): print(f"✗ Cannot load {model_name} because installation failed.") return None try: print("Refreshing importlib to find new package...") importlib.invalidate_caches() try: importlib.reload(site) except Exception: pass print(f"Trying: spacy.load('{model_name}')") nlp = spacy.load(model_name) print(f"✓ Successfully loaded {model_name}") return nlp except Exception as e: print(f"✗ Model {model_name} is installed but FAILED to load.") print(f" Error: {e}") traceback.print_exc() return None def initialize_models(): """ Pre-load standard models and ensure _trf dependencies are ready. """ print("\n" + "="*70) print("INITIALIZING MODELS") print("="*70 + "\n") install_spacy_transformers_once() loaded_count = 0 spacy_model_count = 0 for lang_code, (lang_name, model_name, model_type) in MODEL_INFO.items(): if model_type == "spacy": spacy_model_count += 1 print(f"Loading {lang_name} ({model_name})...") nlp = load_spacy_model(model_name) MODELS[lang_code] = nlp if nlp: print(f"✓ {lang_name} ready\n") loaded_count += 1 else: print(f"✗ {lang_name} FAILED\n") else: print(f"✓ {lang_name} ({model_name}) will be loaded on first use.\n") MODELS[lang_code] = None print(f"Pre-loaded {loaded_count}/{spacy_model_count} standard models.") print("="*70 + "\n") # ============================================================================ # ANALYSIS (WITH NER) # ============================================================================ def get_analysis(ui_lang: str, model_lang_key: str, text: str): """Analyze text and return results.""" ui_config = UI_TEXT.get(ui_lang.lower(), UI_TEXT["en"]) error_prefix = ui_config["error_message"] try: if not text.strip(): # Return empty values for all outputs return ([], [], "

No text provided.

", "", gr.Button(value=ui_config["button_text"], interactive=True)) nlp = MODELS.get(model_lang_key) if nlp is None: print(f"First use of {model_lang_key}. Loading model...") if model_lang_key not in MODEL_INFO: raise ValueError(f"Unknown model key: {model_lang_key}") _, model_name, model_type = MODEL_INFO[model_lang_key] if model_type == "grecy": nlp = load_grecy_model(model_name) else: nlp = load_spacy_model(model_name) if nlp is None: MODELS.pop(model_lang_key, None) raise ValueError(f"Model for {model_lang_key} ({model_name}) FAILED to load. Check logs.") else: MODELS[model_lang_key] = nlp print(f"âś“ {model_lang_key} is now loaded and cached.") doc = nlp(text) dataframe_output = [] json_output = [] for token in doc: lemma_str = token.lemma_ morph_str = str(token.morph) if token.has_morph() else '' dep_str = token.dep_ if doc.has_annotation("DEP") else '' tag_str = token.tag_ if token.tag_ != "" else '' pos_str = token.pos_ if token.pos_ != "" else '' json_output.append({ "word": token.text, "lemma": lemma_str, "pos": pos_str, "tag": tag_str, "morphology": morph_str, "dependency": dep_str, "is_stopword": token.is_stop }) dataframe_output.append([token.text, lemma_str, pos_str, tag_str, morph_str, dep_str]) # --- DEPENDENCY PARSE VISUALIZATION --- html_dep_out = "" if "parser" in nlp.pipe_names: try: options = {"compact": True, "bg": "#ffffff", "color": "#000000", "font": "Source Sans Pro"} html_svg = displacy.render(doc, style="dep", jupyter=False, options=options) svg_b64 = base64.b64encode(html_svg.encode("utf-8")).decode("utf-8") html_dep_out = f'
' except Exception as e: html_dep_out = f"

Visualization error (DEP): {e}

" else: html_dep_out = "

Dependency parsing ('parser') not available for this model.

" # --- NAMED ENTITY VISUALIZATION (NEW) --- html_ner_out = "" if "ner" in nlp.pipe_names: if doc.ents: try: # Let displacy use its default colors html_ner_out = displacy.render(doc, style="ent", jupyter=False) html_ner_out = f'
{html_ner_out}
' except Exception as e: html_ner_out = f"

Visualization error (NER): {e}

" else: html_ner_out = "

No named entities found in this text.

" else: html_ner_out = "

Named Entity Recognition ('ner') not available for this model.

" return (dataframe_output, json_output, html_dep_out, html_ner_out, gr.Button(value=ui_config["button_text"], interactive=True)) except Exception as e: traceback.print_exc() error_html = f"
{error_prefix} {str(e)}
" # Return error for all 4 outputs return ([[f"{error_prefix}{str(e)}"]], {"error": str(e)}, error_html, error_html, gr.Button(value=ui_config["button_text"], interactive=True)) # ============================================================================ # UI (UPDATED FOR NER) # ============================================================================ def update_ui(ui_lang: str): """Update UI language.""" ui_config = UI_TEXT.get(ui_lang.lower(), UI_TEXT["en"]) return [ gr.Markdown(value=ui_config["title"]), gr.Markdown(value=ui_config["subtitle"]), gr.Radio(label=ui_config["ui_lang_label"]), gr.Radio(label=ui_config["model_lang_label"]), gr.Textbox(label=ui_config["input_label"], placeholder=ui_config["input_placeholder"]), gr.Button(value=ui_config["button_text"]), gr.Tab(label=ui_config["tab_graphic"]), gr.Tab(label=ui_config["tab_table"]), gr.Tab(label=ui_config["tab_json"]), gr.Tab(label=ui_config["tab_ner"]), gr.HTML(label=ui_config["html_label"]), gr.DataFrame(label=ui_config["table_label"], headers=ui_config["table_headers"], interactive=False), gr.JSON(label=ui_config["json_label"]), gr.HTML(label=ui_config["ner_label"]) ] def create_interface(): """Create Gradio interface.""" config = UI_TEXT["en"] model_choices = list(MODEL_INFO.keys()) with gr.Blocks(title="Multilingual Morpho-Syntactic Analyzer") as demo: with gr.Row(): ui_lang_radio = gr.Radio(["DE", "EN", "ES"], label=config["ui_lang_label"], value="EN") model_lang_radio = gr.Radio( choices=[(MODEL_INFO[k][0], k) for k in model_choices], label=config["model_lang_label"], value=model_choices[0] ) markdown_title = gr.Markdown(config["title"]) markdown_subtitle = gr.Markdown(config["subtitle"]) text_input = gr.Textbox(label=config["input_label"], placeholder=config["input_placeholder"], lines=5) analyze_button = gr.Button(config["button_text"], variant="primary") with gr.Tabs(): with gr.Tab(config["tab_graphic"]) as tab_graphic: html_dep_out = gr.HTML(label=config["html_label"]) with gr.Tab(config["tab_ner"]) as tab_ner: html_ner_out = gr.HTML(label=config["ner_label"]) with gr.Tab(config["tab_table"]) as tab_table: # This is the line that was crashing df_out = gr.DataFrame(label=config["table_label"], headers=config["table_headers"], interactive=False) with gr.Tab(config["tab_json"]) as tab_json: json_out = gr.JSON(label=config["json_label"]) analyze_button.click(fn=get_analysis, inputs=[ui_lang_radio, model_lang_radio, text_input], outputs=[df_out, json_out, html_dep_out, html_ner_out, analyze_button], api_name="get_morphology") ui_lang_radio.change(fn=update_ui, inputs=ui_lang_radio, outputs=[markdown_title, markdown_subtitle, ui_lang_radio, model_lang_radio, text_input, analyze_button, tab_graphic, tab_table, tab_json, tab_ner, html_dep_out, df_out, json_out, html_ner_out]) return demo # ============================================================================ # MAIN # ============================================================================ if __name__ == "__main__": print("\n" + "="*70) print("MULTILINGUAL MORPHO-SYNTACTIC ANALYZER") print("="*70 + "\n") initialize_models() demo = create_interface() demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)