cstr commited on
Commit
750053f
·
verified ·
1 Parent(s): 49b7374

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +500 -344
app.py CHANGED
@@ -1,357 +1,513 @@
1
  import gradio as gr
2
  import spacy
3
- # Removed: import spacy_transformers
4
- import os
5
- import subprocess # Keep for spacy download fallback
6
- import sys
7
  from spacy import displacy
8
  import base64
9
  import traceback
10
- import shlex # Import shlex for safer command splitting
11
-
12
- # --- 1. UI Translations ---
13
- # (Keep UI text dictionary as before, but only DE, EN, ES needed now)
14
- UI_TEXT = {
15
-     "de": {
16
-         "title": "# NLP-Analysator (mit spaCy)",
17
-         "subtitle": "Geben Sie einen Text ein, um die morphologischen Details für jedes Wort zu erhalten.\n**Um dies als API zu verwenden, klicken Sie auf den \"View API\"-Link unten.**",
18
-         "ui_lang_label": "UI Sprache",
19
-         "model_lang_label": "Textsprache (Modell)",
20
-         "input_label": "Eingabetext",
21
-         "input_placeholder": "Die schnellen braunen Füchse...",
22
-         "button_text": "Analysieren",
23
-         "button_processing_text": "Verarbeite...",
24
-         "tab_graphic": "Syntaktische Analyse (Grafik)",
25
-         "tab_table": "Visuelle Tabelle (Tokens)",
26
-         "tab_json": "Roh-JSON (für API)",
27
-         "html_label": "Abhängigkeits-Parse",
28
-         "table_label": "Analyse-Ergebnisse (Tabelle)",
29
-         "table_headers": ["Wort", "Lemma", "POS", "Tag (detailliert)", "Morphologie", "Abhängigkeit"],
30
-         "json_label": "Analyse-Ergebnisse (JSON)",
31
-         "error_message": "Fehler bei der Textverarbeitung: "
32
-     },
33
-     "en": {
34
-         "title": "# NLP Analyzer (with spaCy)",
35
-         "subtitle": "Enter any text to get the morphological details for each word.\n**To use this as an API, click the \"View API\" link at the bottom.**",
36
-         "ui_lang_label": "UI Language",
37
-         "model_lang_label": "Text Language (Model)",
38
-         "input_label": "Input Text",
39
-         "input_placeholder": "The quick brown foxes...",
40
-         "button_text": "Analyze",
41
-         "button_processing_text": "Processing...",
42
-         "tab_graphic": "Syntactic Analysis (Graphic)",
43
-         "tab_table": "Visual Table (Tokens)",
44
-         "tab_json": "Raw JSON (for API)",
45
-         "html_label": "Dependency Parse",
46
-         "table_label": "Analysis Results (Table)",
47
-         "table_headers": ["Word", "Lemma", "POS", "Tag (detailed)", "Morphology", "Dependency"],
48
-         "json_label": "Analysis Results (JSON)",
49
-         "error_message": "Error processing text: "
50
-     },
51
-     "es": {
52
-         "title": "# Analizador NLP (con spaCy)",
53
-         "subtitle": "Ingrese cualquier texto para obtener los detalles morfológicos de cada palabra.\n**Para usar esto como API, haga clic en el enlace \"View API\" en la parte inferior.**",
54
-         "ui_lang_label": "Idioma de UI",
55
-         "model_lang_label": "Idioma del Texto (Modelo)",
56
-         "input_label": "Texto de entrada",
57
-         "input_placeholder": "Los rápidos zorros marrones...",
58
-         "button_text": "Analizar",
59
-         "button_processing_text": "Procesando...",
60
-         "tab_graphic": "Análisis Sintáctico (Gráfico)",
61
-         "tab_table": "Tabla Visual (Tokens)",
62
-         "tab_json": "JSON Crudo (para API)",
63
-         "html_label": "Análisis de Dependencia",
64
-         "table_label": "Resultados del Análisis (Tabla)",
65
-         "table_headers": ["Palabra", "Lema", "POS", "Tag (detallado)", "Morfología", "Dependencia"],
66
-         "json_label": "Resultados del Análisis (JSON)",
67
-         "error_message": "Error al procesar el texto: "
68
-     }
69
- }
70
 
 
 
 
71
 
72
- # --- 2. Model Loading Info ---
73
- # Define model types: 'spacy' (standard download), 'grecy' (uses grecy install)
74
  MODEL_INFO = {
75
-     # lang_code: (model_type, model_name_to_load, install_arg [optional])
76
-     "de": ("spacy", "de_core_news_md", "de_core_news_md"),
77
-     "en": ("spacy", "en_core_web_md", "en_core_web_md"),
78
-     "es": ("spacy", "es_core_news_md", "es_core_news_md"),
79
-     "grc": ("grecy", "grc_proiel_trf", "grc_proiel_trf"), # Use grecy install model_name
80
-     # Removed HE, AR, LA
81
  }
82
 
83
- def load_model(lang_code, model_info):
84
-     """
85
-     Loads a spaCy model based on its type, handling installation/download failures.
86
-     """
87
-     model_type, model_name_to_load, install_arg = model_info
88
-     nlp = None
89
-
90
-     try:
91
-         # First, try loading directly
92
-         nlp = spacy.load(model_name_to_load)
93
-         print(f"Model '{model_name_to_load}' loaded successfully for '{lang_code}'.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
-     except OSError:
96
-         # If loading fails, attempt installation based on model_type
97
-         print(f"Model '{model_name_to_load}' not found or failed initial load. Attempting installation...")
98
-         install_success = False # Flag specifically for install types
99
-         try:
100
-             if model_type == "spacy" and install_arg:
101
-                 print(f"Running: spacy download {install_arg}")
102
-                 subprocess.check_call([sys.executable, "-m", "spacy", "download", install_arg])
103
-                 install_success = True
104
-                 print(f"spaCy download for '{install_arg}' completed.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- # --- THIS BLOCK IS THE FIX ---
107
-             elif model_type == "grecy" and install_arg:
108
-                  # Grecy needs to run its own command, using the *current* Python executable
109
-                  print(f"Running: {sys.executable} -m grecy install {install_arg}")
110
-                  # Use sys.executable to ensure it installs into the correct environment
111
-                  subprocess.check_call([sys.executable, "-m", "grecy", "install", install_arg])
112
-                  install_success = True
113
-                  print(f"Grecy install for '{install_arg}' completed.")
114
- # --- END OF FIX ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
-             else:
117
-                  print(f"--- WARNING: No installation method defined for model type '{model_type}' or missing install_arg for '{model_name_to_load}'. ---")
118
-
119
-             # --- Installation Attempt Finished ---
120
-
121
-             # Try loading again *only* if an installation was attempted
122
-             if install_success:
123
-                 print(f"Attempting to load '{model_name_to_load}' again after installation...")
124
-                 nlp = spacy.load(model_name_to_load)
125
-                 print(f"Model '{model_name_to_load}' loaded successfully after installation for '{lang_code}'.")
126
-             else:
127
-                  # If no install method was triggered or defined
128
-                  raise OSError(f"Model '{model_name_to_load}' not found and no valid installation method triggered.")
129
-
130
-
131
-         # --- Error Handling during Installation/Secondary Load ---
132
-         except subprocess.CalledProcessError as e:
133
-             print(f"--- ERROR: Installation command failed for '{model_name_to_load}'. Error: {e} ---")
134
-             raise OSError(f"[E050] Can't find model '{model_name_to_load}' after installation attempt failed.") from e
135
-         except OSError as e: # Catch failure during the second spacy.load()
136
-             print(f"--- ERROR: Failed to load model '{model_name_to_load}' even after installation attempt. Error: {e} ---")
137
-             raise
138
-         except Exception as e: # Catch other unexpected errors during install/load
139
-             print(f"--- ERROR: An unexpected error occurred during installation/load for '{model_name_to_load}': {e} ---")
140
-             traceback.print_exc()
141
-             raise
142
-
143
-     # --- Catch Errors During Initial Load ---
144
-     except Exception as e: # Catch any other unexpected loading errors during initial spacy.load
145
-          print(f"--- ERROR: An unexpected error occurred trying to initially load model '{model_name_to_load}' for language '{lang_code}': {e} ---")
146
-          traceback.print_exc()
147
-          raise # Re-raise error
148
-
149
-     # Final check
150
-     if nlp is None:
151
-          raise RuntimeError(f"Model object for '{model_name_to_load}' is None after loading attempts.")
152
-
153
-     return nlp
154
-
155
-
156
- # Load all models at startup and store them in a dictionary
157
- print("Loading all models...")
158
- MODELS = {}
159
- loading_errors = False
160
-
161
- # Ensure spacy itself is loaded before trying to load models
162
- try:
163
-     import spacy
164
-     print(f"Using spaCy version: {spacy.__version__}")
165
- except ImportError as e:
166
-     print(f"--- FATAL ERROR: Failed to import spaCy. Check requirements.txt. Error: {e} ---")
167
-     sys.exit(1) # Exit if core dependencies are missing
168
-
169
- for lang_code, model_info_tuple in MODEL_INFO.items():
170
-     try:
171
-         MODELS[lang_code] = load_model(lang_code, model_info_tuple)
172
-     except Exception as e:
173
-         print(f"--- MODEL LOAD FAILED for language '{lang_code}'. Error: {e} ---")
174
-         # Don't print "FATAL" here, allow others to load
175
-         loading_errors = True
176
-         MODELS[lang_code] = None # Explicitly mark as failed
177
-
178
- if not loading_errors:
179
-     print("All specified models loaded successfully.")
180
- else:
181
-     print("--- WARNING: One or more models failed to load. The app will run but analysis for failed languages will not work. Check logs above. ---")
182
-
183
-
184
- # --- 3. The Core Processing Function (with Error Handling) ---
185
- def get_analysis(ui_lang, model_lang, text, analyze_button_state):
186
-     """
187
-     Processes text in the selected language and returns FOUR formats:
188
-     1. A list of lists for the visual DataFrame (or error message).
189
-     2. A list of dicts for the JSON API (or error message).
190
-     3. An HTML string for the dependency parse visualization (or error message).
191
-     4. An updated, re-enabled analyze button.
192
-     """
193
-     current_ui_lang_code = ui_lang.lower()
194
-     current_ui_config = UI_TEXT.get(current_ui_lang_code, UI_TEXT["en"])
195
-     error_prefix = current_ui_config["error_message"]
196
-     yield {analyze_button: gr.Button(interactive=False, value=current_ui_config["button_processing_text"])}
197
-     try:
198
-         if not text:
199
-             yield {
200
-                 df_out: [], json_out: [], html_out: "",
201
-                 analyze_button: gr.Button(interactive=True, value=current_ui_config["button_text"])
202
-             }
203
-             return
204
-         # Extract lang code like 'GRC' from 'GRC (grc_proiel_trf)'
205
-         lang_code = model_lang.split(" ")[0].lower()
206
-
207
-         if lang_code not in MODELS or MODELS[lang_code] is None:
208
-              raise ValueError(f"Model for language code '{lang_code}' failed to load during startup or is unavailable. Cannot process text.")
209
-
210
-         nlp = MODELS[lang_code]
211
-         doc = nlp(text)
212
-         dataframe_output = []
213
-         json_output = []
214
-
215
-         # Extract data, accessing attributes directly and providing fallbacks
216
-         for token in doc:
217
-             # Use getattr to safely access attributes with a default value ('') if missing
218
-             morph_str = str(getattr(token, 'morph', ''))
219
-             dep_str = getattr(token, 'dep_', '') if doc.has_annotation("DEP") else "" # Keep DEP check here
220
-             tag_str = getattr(token, 'tag_', '')
221
-             pos_str = getattr(token, 'pos_', '')
222
-             # Lemma check remains the same
223
-             lemma_str = token.lemma_ if getattr(token, 'lemma', 0) != 0 else token.text
224
-
225
-             json_output.append({
226
-                 "word": token.text,
227
-                 "lemma": lemma_str,
228
-                 "pos": pos_str,
229
-                 "tag": tag_str,
230
-                 "morphology": morph_str,
231
-                 "dependency": dep_str,
232
-                 "is_stopword": token.is_stop # is_stop should generally be safe
233
-             })
234
-             dataframe_output.append([
235
-                 token.text,
236
-                 lemma_str,
237
-                 pos_str,
238
-                 tag_str,
239
-                 morph_str,
240
-                 dep_str
241
-   _B_])
242
-
243
-         options = {"compact": True, "bg": "#ffffff", "color": "#000000", "font": "Source Sans Pro"}
244
-
245
-         # Use displacy only if the model supports parsing (has DEP annotation)
246
-         html_out_content = ""
247
-         if doc.has_annotation("DEP"):
248
-              html_svg = displacy.render(doc, style="dep", jupyter=False, options=options)
249
-              svg_b64 = base64.b64encode(html_svg.encode("utf-8")).decode("utf-8")
250
-           _    html_out_content = f"""
251
-              <div style="background-color: #ffffff; overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; line-height: 2.5;">
252
-                  <img src="data:image/svg+xml;base64,{svg_b64}" />
253
-              </div>
254
-              """
255
-         else:
256
-              # Provide more context if dependency info is missing
257
-              model_name_used = MODEL_INFO.get(lang_code, ("unknown", "", ""))[1] # Get model name/path
258
-              html_out_content = f"<p style='color: orange;'>Dependency parse visualization not available for the selected model ('{model_name_used}'). Ensure the loaded pipeline includes a parser component.</p>"
259
-
260
-
261
-         yield {
262
-             df_out: dataframe_output, json_out: json_output, html_out: html_out_content,
263
-             analyze_button: gr.Button(interactive=True, value=current_ui_config["button_text"])
264
-         }
265
-     except Exception as e:
266
-         print(f"--- ERROR during get_analysis ---")
267
-         traceback.print_exc()
268
-         error_details = str(e)
269
-         error_html = f"<div style='color: red; border: 1px solid red; padding: 10px; border-radius: 5px;'>{error_prefix}{error_details}</div>"
270
-         yield {
271
-             df_out: [[f"{error_prefix}{error_details}"]],
272
-             json_out: {"error": error_details},
273
-             html_out: error_html,
274
-             analyze_button: gr.Button(interactive=True, value=current_ui_config["button_text"])
275
-         }
276
-
277
-
278
- # --- 4. UI Update Function ---
279
- # (No changes needed here)
280
- def update_ui(ui_lang):
281
-     """Updates all UI components when the language is changed."""
282
-     lang_code = ui_lang.lower()
283
-     ui_config = UI_TEXT[lang_code]
284
-     return {
285
-         markdown_title: gr.Markdown(value=ui_config["title"]),
286
-         markdown_subtitle: gr.Markdown(value=ui_config["subtitle"]),
287
-         ui_lang_radio: gr.Radio(label=ui_config["ui_lang_label"]),
288
-         model_lang_radio: gr.Radio(label=ui_config["model_lang_label"]),
289
-         text_input: gr.Textbox(
290
-             label=ui_config["input_label"],
291
-             placeholder=ui_config["input_placeholder"]
292
-         ),
293
-         analyze_button: gr.Button(value=ui_config["button_text"]),
294
- button_text"]),
295
-         tab_graphic: gr.Tab(label=ui_config["tab_graphic"]),
296
-         tab_table: gr.Tab(label=ui_config["tab_table"]),
297
-         tab_json: gr.Tab(label=ui_config["tab_json"]),
298
-         html_out: gr.HTML(label=ui_config["html_label"]),
299
-         df_out: gr.DataFrame(
300
-             label=ui_config["table_label"],
301
-             headers=ui_config["table_headers"],
302
-             interactive=False
303
-         ),
304
-         json_out: gr.JSON(label=ui_config["json_label"])
305
-     }
306
-
307
-
308
- # --- 5. Gradio Interface ---
309
- with gr.Blocks() as demo:
310
-     default_config = UI_TEXT["de"]
311
-     with gr.Row():
312
-         ui_lang_radio = gr.Radio(
313
-             ["DE", "EN", "ES"], label=default_config["ui_lang_label"], value="DE"
314
-         )
315
-         # Dynamically generate choices from updated MODEL_INFO keys
316
-         model_lang_choices = [f"{k.upper()} ({v[1]})" for k, v in MODEL_INFO.items()]
317
-         model_lang_radio = gr.Radio(
318
-             model_lang_choices,
319
-             label=default_config["model_lang_label"], value=model_lang_choices[0] # Default to first model
320
-         )
321
-     markdown_title = gr.Markdown(default_config["title"])
322
-     markdown_subtitle = gr.Markdown(default_config["subtitle"])
323
-     text_input = gr.Textbox(
324
-         label=default_config["input_label"], placeholder=default_config["input_placeholder"], lines=5
325
-     )
326
-     analyze_button = gr.Button(default_config["button_text"], variant="primary")
327
-     with gr.Tabs() as tabs:
328
-         with gr.Tab(default_config["tab_graphic"]) as tab_graphic:
329
-             html_out = gr.HTML(label=default_config["html_label"])
330
-         with gr.Tab(default_config["tab_table"]) as tab_table:
331
-             df_out = gr.DataFrame(
332
-                 label=default_config["table_label"], headers=default_config["table_headers"], interactive=False
333
-             )
334
-         with gr.Tab(default_config["tab_json"]) as tab_json:
335
-             json_out = gr.JSON(label=default_config["json_label"])
336
-     analyze_button_state = gr.State(value=True)
337
-
338
-     # --- 6. Event Listeners ---
339
-     # (No changes needed here)
340
-     analyze_button.click(
341
-         fn=get_analysis,
342
-         inputs=[ui_lang_radio, model_lang_radio, text_input, analyze_button_state],
343
-         outputs=[df_out, json_out, html_out, analyze_button],
344
-         api_name="get_morphology"
345
-     )
346
-     ui_lang_radio.change(
347
-         fn=update_ui,
348
-         inputs=ui_lang_radio,
349
-         outputs=[
350
-             markdown_title, markdown_subtitle, ui_lang_radio, model_lang_radio,
351
-             text_input, analyze_button, tab_graphic, tab_table, tab_json,
352
-             html_out, df_out, json_out
353
-         ]
354
-     )
355
-
356
- # Launch the app
357
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import spacy
 
 
 
 
3
  from spacy import displacy
4
  import base64
5
  import traceback
6
+ import sys
7
+ import os
8
+ import subprocess
9
+ import importlib
10
+ from typing import Dict, Optional, Tuple, List
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # ============================================================================
13
+ # CONFIGURATION
14
+ # ============================================================================
15
 
 
 
16
  MODEL_INFO = {
17
+ "de": ("German", "de_core_news_md", "spacy"),
18
+ "en": ("English", "en_core_web_md", "spacy"),
19
+ "es": ("Spanish", "es_core_news_md", "spacy"),
20
+ "grc": ("Ancient Greek", "grc_proiel_trf", "grecy")
 
 
21
  }
22
 
23
+ UI_TEXT = {
24
+ "de": {
25
+ "title": "# 🔍 Mehrsprachiger Morpho-Syntaktischer Analysator",
26
+ "subtitle": "Analysieren Sie Texte auf Deutsch, Englisch, Spanisch und Altgriechisch",
27
+ "ui_lang_label": "Benutzeroberflächensprache",
28
+ "model_lang_label": "Textsprache für Analyse",
29
+ "input_label": "Text eingeben",
30
+ "input_placeholder": "Geben Sie hier Ihren Text ein...",
31
+ "button_text": "Text analysieren",
32
+ "button_processing_text": "Verarbeitung läuft...",
33
+ "tab_graphic": "Grafische Darstellung",
34
+ "tab_table": "Tabelle",
35
+ "tab_json": "JSON",
36
+ "html_label": "Abhängigkeitsparsing",
37
+ "table_label": "Morphologische Analyse",
38
+ "table_headers": ["Wort", "Lemma", "POS", "Tag", "Morphologie", "Abhängigkeit"],
39
+ "json_label": "JSON-Ausgabe",
40
+ "error_message": "Fehler: "
41
+ },
42
+ "en": {
43
+ "title": "# 🔍 Multilingual Morpho-Syntactic Analyzer",
44
+ "subtitle": "Analyze texts in German, English, Spanish, and Ancient Greek",
45
+ "ui_lang_label": "Interface Language",
46
+ "model_lang_label": "Text Language for Analysis",
47
+ "input_label": "Enter Text",
48
+ "input_placeholder": "Enter your text here...",
49
+ "button_text": "Analyze Text",
50
+ "button_processing_text": "Processing...",
51
+ "tab_graphic": "Graphic View",
52
+ "tab_table": "Table",
53
+ "tab_json": "JSON",
54
+ "html_label": "Dependency Parsing",
55
+ "table_label": "Morphological Analysis",
56
+ "table_headers": ["Word", "Lemma", "POS", "Tag", "Morphology", "Dependency"],
57
+ "json_label": "JSON Output",
58
+ "error_message": "Error: "
59
+ },
60
+ "es": {
61
+ "title": "# 🔍 Analizador Morfo-Sintáctico Multilingüe",
62
+ "subtitle": "Analice textos en alemán, inglés, español y griego antiguo",
63
+ "ui_lang_label": "Idioma de la Interfaz",
64
+ "model_lang_label": "Idioma del Texto para Análisis",
65
+ "input_label": "Introducir Texto",
66
+ "input_placeholder": "Ingrese su texto aquí...",
67
+ "button_text": "Analizar Texto",
68
+ "button_processing_text": "Procesando...",
69
+ "tab_graphic": "Vista Gráfica",
70
+ "tab_table": "Tabla",
71
+ "tab_json": "JSON",
72
+ "html_label": "Análisis de Dependencias",
73
+ "table_label": "Análisis Morfológico",
74
+ "table_headers": ["Palabra", "Lema", "POS", "Etiqueta", "Morfología", "Dependencia"],
75
+ "json_label": "Salida JSON",
76
+ "error_message": "Error: "
77
+ }
78
+ }
79
 
80
+ # ============================================================================
81
+ # MODEL MANAGEMENT
82
+ # ============================================================================
83
+
84
+ MODELS: Dict[str, Optional[spacy.Language]] = {}
85
+
86
+ def install_spacy_model(model_name: str) -> bool:
87
+ """Install a standard spaCy model."""
88
+ try:
89
+ print(f"Installing spaCy model: {model_name}")
90
+ subprocess.check_call(
91
+ [sys.executable, "-m", "spacy", "download", model_name],
92
+ stdout=subprocess.PIPE,
93
+ stderr=subprocess.PIPE
94
+ )
95
+ print(f"✓ Successfully installed {model_name}")
96
+ return True
97
+ except subprocess.CalledProcessError as e:
98
+ print(f"✗ Failed to install {model_name}: {e}")
99
+ return False
100
+
101
+ def install_grecy_model(model_name: str) -> bool:
102
+ """Install a grecy model for Ancient Greek."""
103
+ try:
104
+ # First ensure grecy package is installed
105
+ print(f"Ensuring grecy package is installed...")
106
+ subprocess.check_call(
107
+ [sys.executable, "-m", "pip", "install", "-U", "grecy"],
108
+ stdout=subprocess.PIPE,
109
+ stderr=subprocess.PIPE
110
+ )
111
+
112
+ print(f"Installing grecy model: {model_name}")
113
+ result = subprocess.run(
114
+ [sys.executable, "-m", "grecy", "install", model_name],
115
+ capture_output=True,
116
+ text=True,
117
+ timeout=600 # 10 minute timeout for large models
118
+ )
119
+
120
+ print(result.stdout)
121
+ if result.stderr:
122
+ print(f"stderr: {result.stderr}")
123
+
124
+ if result.returncode == 0:
125
+ print(f"✓ Successfully installed grecy model {model_name}")
126
+ # Invalidate import caches to help Python find the new model
127
+ importlib.invalidate_caches()
128
+ return True
129
+ else:
130
+ print(f"✗ Failed to install {model_name}, return code: {result.returncode}")
131
+ return False
132
 
133
+ except subprocess.TimeoutExpired:
134
+ print(f"✗ Installation of {model_name} timed out")
135
+ return False
136
+ except Exception as e:
137
+ print(f"✗ Error installing {model_name}: {e}")
138
+ traceback.print_exc()
139
+ return False
140
+
141
+ def load_model_with_retry(lang_code: str, model_name: str, model_type: str, max_retries: int = 2) -> Optional[spacy.Language]:
142
+ """
143
+ Load a spaCy model with installation retry logic.
144
+
145
+ Args:
146
+ lang_code: Language code (e.g., 'de', 'en', 'grc')
147
+ model_name: Name of the model to load
148
+ model_type: Type of model ('spacy' or 'grecy')
149
+ max_retries: Maximum number of installation attempts
150
+
151
+ Returns:
152
+ Loaded spaCy model or None if loading fails
153
+ """
154
+ for attempt in range(max_retries):
155
+ try:
156
+ print(f"Attempt {attempt + 1}/{max_retries}: Loading model '{model_name}' for language '{lang_code}'")
157
 
158
+ # Try to load the model
159
+ nlp = spacy.load(model_name)
160
+ print(f"✓ Successfully loaded {model_name}")
161
+ return nlp
162
+
163
+ except OSError as e:
164
+ print(f"✗ Model '{model_name}' not found: {e}")
165
+
166
+ if attempt < max_retries - 1:
167
+ # Try to install the model
168
+ print(f"Attempting to install {model_name}...")
169
+
170
+ if model_type == "spacy":
171
+ success = install_spacy_model(model_name)
172
+ elif model_type == "grecy":
173
+ success = install_grecy_model(model_name)
174
+ else:
175
+ print(f"Unknown model type: {model_type}")
176
+ return None
177
+
178
+ if not success:
179
+ print(f"Installation failed for {model_name}")
180
+ if attempt == max_retries - 2:
181
+ return None
182
+ else:
183
+ # Give the system a moment to register the new model
184
+ import time
185
+ time.sleep(2)
186
+ # Refresh Python's module cache
187
+ importlib.invalidate_caches()
188
+ else:
189
+ print(f"✗ All attempts to load {model_name} failed")
190
+ return None
191
+
192
+ except Exception as e:
193
+ print(f" Unexpected error loading {model_name}: {e}")
194
+ traceback.print_exc()
195
+ return None
196
+
197
+ return None
198
+
199
+ def initialize_models():
200
+ """Initialize all models at startup."""
201
+ print("\n" + "="*70)
202
+ print("INITIALIZING MODELS")
203
+ print("="*70)
204
+
205
+ for lang_code, (lang_name, model_name, model_type) in MODEL_INFO.items():
206
+ print(f"\n--- Loading {lang_name} ({model_name}) ---")
207
+
208
+ nlp = load_model_with_retry(lang_code, model_name, model_type)
209
+
210
+ if nlp is not None:
211
+ MODELS[lang_code] = nlp
212
+ print(f"✓ {lang_name} model ready")
213
+ else:
214
+ MODELS[lang_code] = None
215
+ print(f" {lang_name} model FAILED - analysis will be unavailable")
216
+
217
+ print("\n" + "="*70)
218
+ print("MODEL INITIALIZATION COMPLETE")
219
+ print("="*70)
220
+
221
+ # Print summary
222
+ loaded = sum(1 for model in MODELS.values() if model is not None)
223
+ total = len(MODELS)
224
+ print(f"\nLoaded {loaded}/{total} models successfully")
225
+
226
+ if loaded < total:
227
+ print("\n⚠ WARNING: Some models failed to load")
228
+ for lang_code, model in MODELS.items():
229
+ if model is None:
230
+ lang_name = MODEL_INFO[lang_code][0]
231
+ print(f" - {lang_name} ({lang_code}): UNAVAILABLE")
232
+
233
+ print("\n")
234
+
235
+ # ============================================================================
236
+ # TEXT ANALYSIS
237
+ # ============================================================================
238
+
239
+ def get_analysis(ui_lang: str, model_lang: str, text: str, analyze_button: gr.Button):
240
+ """
241
+ Perform morpho-syntactic analysis on the input text.
242
+
243
+ Yields progressive updates to the UI components.
244
+ """
245
+ current_ui_lang_code = ui_lang.lower()
246
+ current_ui_config = UI_TEXT.get(current_ui_lang_code, UI_TEXT["en"])
247
+ error_prefix = current_ui_config["error_message"]
248
+
249
+ # Disable button during processing
250
+ yield {
251
+ analyze_button: gr.Button(interactive=False, value=current_ui_config["button_processing_text"])
252
+ }
253
+
254
+ try:
255
+ # Validate input
256
+ if not text or not text.strip():
257
+ yield {
258
+ "df_out": [],
259
+ "json_out": [],
260
+ "html_out": "<p style='color: orange;'>No text provided for analysis.</p>",
261
+ "analyze_button": gr.Button(interactive=True, value=current_ui_config["button_text"])
262
+ }
263
+ return
264
+
265
+ # Extract language code from selection like 'GRC (grc_proiel_trf)'
266
+ lang_code = model_lang.split(" ")[0].lower()
267
+
268
+ # Check if model is available
269
+ if lang_code not in MODELS:
270
+ raise ValueError(f"Unknown language code: '{lang_code}'")
271
+
272
+ if MODELS[lang_code] is None:
273
+ lang_name = MODEL_INFO[lang_code][0]
274
+ model_name = MODEL_INFO[lang_code][1]
275
+ raise ValueError(
276
+ f"Model for {lang_name} ('{model_name}') failed to load during startup. "
277
+ f"Please check the logs or try restarting the Space."
278
+ )
279
+
280
+ # Get the model and process text
281
+ nlp = MODELS[lang_code]
282
+ doc = nlp(text)
283
+
284
+ # Prepare outputs
285
+ dataframe_output = []
286
+ json_output = []
287
+
288
+ # Extract linguistic information for each token
289
+ for token in doc:
290
+ # Safely extract attributes
291
+ morph_str = str(token.morph) if hasattr(token, 'morph') else ''
292
+ dep_str = token.dep_ if doc.has_annotation("DEP") else ''
293
+ tag_str = token.tag_ if hasattr(token, 'tag_') else ''
294
+ pos_str = token.pos_ if hasattr(token, 'pos_') else ''
295
+ lemma_str = token.lemma_ if token.lemma != 0 else token.text
296
+
297
+ # Add to JSON output
298
+ json_output.append({
299
+ "word": token.text,
300
+ "lemma": lemma_str,
301
+ "pos": pos_str,
302
+ "tag": tag_str,
303
+ "morphology": morph_str,
304
+ "dependency": dep_str,
305
+ "is_stopword": token.is_stop
306
+ })
307
+
308
+ # Add to dataframe output
309
+ dataframe_output.append([
310
+ token.text,
311
+ lemma_str,
312
+ pos_str,
313
+ tag_str,
314
+ morph_str,
315
+ dep_str
316
+ ])
317
+
318
+ # Generate dependency visualization if available
319
+ html_out_content = ""
320
+ if doc.has_annotation("DEP"):
321
+ try:
322
+ options = {
323
+ "compact": True,
324
+ "bg": "#ffffff",
325
+ "color": "#000000",
326
+ "font": "Source Sans Pro"
327
+ }
328
+ html_svg = displacy.render(doc, style="dep", jupyter=False, options=options)
329
+ svg_b64 = base64.b64encode(html_svg.encode("utf-8")).decode("utf-8")
330
+ html_out_content = f"""
331
+ <div style="background-color: #ffffff; overflow-x: auto; border: 1px solid #e6e9ef;
332
+ border-radius: 0.25rem; padding: 1rem; line-height: 2.5;">
333
+ <img src="data:image/svg+xml;base64,{svg_b64}" alt="Dependency Parse" />
334
+ </div>
335
+ """
336
+ except Exception as viz_error:
337
+ print(f"Warning: Could not generate visualization: {viz_error}")
338
+ html_out_content = f"<p style='color: orange;'>Dependency visualization could not be generated: {viz_error}</p>"
339
+ else:
340
+ model_name = MODEL_INFO[lang_code][1]
341
+ html_out_content = f"""
342
+ <p style='color: orange;'>
343
+ Dependency parse visualization is not available for the selected model ('{model_name}').
344
+ The model may not include a parser component.
345
+ </p>
346
+ """
347
+
348
+ # Yield final results
349
+ yield {
350
+ "df_out": dataframe_output,
351
+ "json_out": json_output,
352
+ "html_out": html_out_content,
353
+ "analyze_button": gr.Button(interactive=True, value=current_ui_config["button_text"])
354
+ }
355
+
356
+ except Exception as e:
357
+ print(f"--- ERROR during get_analysis ---")
358
+ traceback.print_exc()
359
+
360
+ error_details = str(e)
361
+ error_html = f"""
362
+ <div style='color: red; border: 1px solid red; padding: 10px; border-radius: 5px;
363
+ background-color: #fff5f5;'>
364
+ <strong>{error_prefix}</strong> {error_details}
365
+ </div>
366
+ """
367
+
368
+ yield {
369
+ "df_out": [[f"{error_prefix}{error_details}"]],
370
+ "json_out": {"error": error_details},
371
+ "html_out": error_html,
372
+ "analyze_button": gr.Button(interactive=True, value=current_ui_config["button_text"])
373
+ }
374
+
375
+ # ============================================================================
376
+ # UI UPDATE
377
+ # ============================================================================
378
+
379
+ def update_ui(ui_lang: str):
380
+ """Update all UI components when the interface language is changed."""
381
+ lang_code = ui_lang.lower()
382
+ ui_config = UI_TEXT.get(lang_code, UI_TEXT["en"])
383
+
384
+ return {
385
+ "markdown_title": gr.Markdown(value=ui_config["title"]),
386
+ "markdown_subtitle": gr.Markdown(value=ui_config["subtitle"]),
387
+ "ui_lang_radio": gr.Radio(label=ui_config["ui_lang_label"]),
388
+ "model_lang_radio": gr.Radio(label=ui_config["model_lang_label"]),
389
+ "text_input": gr.Textbox(
390
+ label=ui_config["input_label"],
391
+ placeholder=ui_config["input_placeholder"]
392
+ ),
393
+ "analyze_button": gr.Button(value=ui_config["button_text"]),
394
+ "tab_graphic": gr.Tab(label=ui_config["tab_graphic"]),
395
+ "tab_table": gr.Tab(label=ui_config["tab_table"]),
396
+ "tab_json": gr.Tab(label=ui_config["tab_json"]),
397
+ "html_out": gr.HTML(label=ui_config["html_label"]),
398
+ "df_out": gr.DataFrame(
399
+ label=ui_config["table_label"],
400
+ headers=ui_config["table_headers"],
401
+ interactive=False
402
+ ),
403
+ "json_out": gr.JSON(label=ui_config["json_label"])
404
+ }
405
+
406
+ # ============================================================================
407
+ # GRADIO INTERFACE
408
+ # ============================================================================
409
+
410
+ def create_interface():
411
+ """Create and configure the Gradio interface."""
412
+ default_config = UI_TEXT["en"]
413
+
414
+ # Generate model choices dynamically
415
+ model_lang_choices = [
416
+ f"{k.upper()} ({v[1]})"
417
+ for k, v in MODEL_INFO.items()
418
+ ]
419
+
420
+ with gr.Blocks(title="Multilingual Morpho-Syntactic Analyzer") as demo:
421
+ # Header
422
+ with gr.Row():
423
+ ui_lang_radio = gr.Radio(
424
+ ["DE", "EN", "ES"],
425
+ label=default_config["ui_lang_label"],
426
+ value="EN"
427
+ )
428
+ model_lang_radio = gr.Radio(
429
+ model_lang_choices,
430
+ label=default_config["model_lang_label"],
431
+ value=model_lang_choices[0]
432
+ )
433
+
434
+ markdown_title = gr.Markdown(default_config["title"])
435
+ markdown_subtitle = gr.Markdown(default_config["subtitle"])
436
+
437
+ # Input section
438
+ text_input = gr.Textbox(
439
+ label=default_config["input_label"],
440
+ placeholder=default_config["input_placeholder"],
441
+ lines=5
442
+ )
443
+ analyze_button = gr.Button(default_config["button_text"], variant="primary")
444
+
445
+ # Output tabs
446
+ with gr.Tabs() as tabs:
447
+ with gr.Tab(default_config["tab_graphic"]) as tab_graphic:
448
+ html_out = gr.HTML(label=default_config["html_label"])
449
+
450
+ with gr.Tab(default_config["tab_table"]) as tab_table:
451
+ df_out = gr.DataFrame(
452
+ label=default_config["table_label"],
453
+ headers=default_config["table_headers"],
454
+ interactive=False
455
+ )
456
+
457
+ with gr.Tab(default_config["tab_json"]) as tab_json:
458
+ json_out = gr.JSON(label=default_config["json_label"])
459
+
460
+ # Event handlers
461
+ analyze_button.click(
462
+ fn=get_analysis,
463
+ inputs=[ui_lang_radio, model_lang_radio, text_input, analyze_button],
464
+ outputs={
465
+ "df_out": df_out,
466
+ "json_out": json_out,
467
+ "html_out": html_out,
468
+ "analyze_button": analyze_button
469
+ },
470
+ api_name="get_morphology"
471
+ )
472
+
473
+ ui_lang_radio.change(
474
+ fn=update_ui,
475
+ inputs=ui_lang_radio,
476
+ outputs={
477
+ "markdown_title": markdown_title,
478
+ "markdown_subtitle": markdown_subtitle,
479
+ "ui_lang_radio": ui_lang_radio,
480
+ "model_lang_radio": model_lang_radio,
481
+ "text_input": text_input,
482
+ "analyze_button": analyze_button,
483
+ "tab_graphic": tab_graphic,
484
+ "tab_table": tab_table,
485
+ "tab_json": tab_json,
486
+ "html_out": html_out,
487
+ "df_out": df_out,
488
+ "json_out": json_out
489
+ }
490
+ )
491
+
492
+ return demo
493
+
494
+ # ============================================================================
495
+ # MAIN
496
+ # ============================================================================
497
+
498
+ if __name__ == "__main__":
499
+ print("\n" + "="*70)
500
+ print("MULTILINGUAL MORPHO-SYNTACTIC ANALYZER")
501
+ print("Starting application...")
502
+ print("="*70 + "\n")
503
+
504
+ # Initialize all models
505
+ initialize_models()
506
+
507
+ # Create and launch the interface
508
+ demo = create_interface()
509
+ demo.launch(
510
+ server_name="0.0.0.0",
511
+ server_port=7860,
512
+ show_error=True
513
+ )