cstr commited on
Commit
92ef58f
·
verified ·
1 Parent(s): 783f826

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -82
app.py CHANGED
@@ -8,14 +8,13 @@ import sys
8
  import os
9
  from pathlib import Path
10
  from typing import Dict, Optional, Tuple
 
 
11
 
12
  # ============================================================================
13
  # CONFIGURATION
14
  # ============================================================================
15
 
16
- # We can now add all models. The installer will work for all of them.
17
- # The key is the lang code used by the UI.
18
- # The value is (Display Name, spaCy model name, type)
19
  MODEL_INFO: Dict[str, Tuple[str, str, str]] = {
20
  "de": ("German", "de_core_news_md", "spacy"),
21
  "en": ("English", "en_core_web_md", "spacy"),
@@ -23,10 +22,11 @@ MODEL_INFO: Dict[str, Tuple[str, str, str]] = {
23
  "grc-proiel-trf": ("Ancient Greek (PROIEL TRF)", "grc_proiel_trf", "grecy"),
24
  "grc-perseus-trf": ("Ancient Greek (Perseus TRF)", "grc_perseus_trf", "grecy"),
25
  "grc-proiel-lg": ("Ancient Greek (PROIEL LG)", "grc_proiel_lg", "grecy"),
26
- # Add other models here as needed
 
 
27
  }
28
 
29
- # UI Text is unchanged
30
  UI_TEXT = {
31
  "de": {
32
  "title": "# 🔍 Mehrsprachiger Morpho-Syntaktischer Analysator",
@@ -77,25 +77,45 @@ UI_TEXT = {
77
  "tab_table": "Tabla",
78
  "tab_json": "JSON",
79
  "html_label": "Análisis de Dependencias",
80
- "table_label": "Análisis de Dependencias",
81
  "table_headers": ["Palabra", "Lema", "POS", "Etiqueta", "Morfología", "Dependencia"],
82
  "json_label": "Salida JSON",
83
  "error_message": "Error: "
84
  }
85
  }
86
 
87
- # This global dict holds the loaded models
88
  MODELS: Dict[str, Optional[spacy.Language]] = {}
89
 
90
  # ============================================================================
91
- # FIXED GRECY MODEL INSTALLATION
92
  # ============================================================================
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def install_grecy_model_from_github(model_name: str) -> bool:
95
  """
96
  Installs a greCy model from your specific GitHub Release.
97
- This bypasses the broken 'grecy install' and uses --no-deps
98
- to prevent dependency conflicts.
99
  """
100
  marker_file = Path(f".{model_name}_installed")
101
  if marker_file.exists():
@@ -104,49 +124,37 @@ def install_grecy_model_from_github(model_name: str) -> bool:
104
 
105
  print(f"Installing grecy model: {model_name}...")
106
 
107
- # --- Logic from our install_model.py script ---
 
 
108
  if model_name == "grc_proiel_trf":
109
  wheel_filename = "grc_proiel_trf-3.7.5-py3-none-any.whl"
110
- elif model_name in ["grc_perseus_trf", "grc_proiel_lg", "grc_perseus_lg", "grc_proiel_sm", "grc_perseus_sm"]:
111
- wheel_filename = f"{model_name.replace('_', '-')}-0.0.0-py3-none-any.whl"
112
  else:
113
- print(f"✗ Unknown grecy model: {model_name}")
114
- return False
115
- # --- End logic ---
 
116
 
117
- # This URL points to YOUR fork's release
118
  install_url = f"https://github.com/CrispStrobe/greCy/releases/download/v1.0-models/{wheel_filename}"
119
-
120
- # This is the command that fixes everything
121
- cmd = [
122
- sys.executable, "-m", "pip", "install",
123
- install_url,
124
- "--no-deps" # <-- The most important part!
125
- ]
126
 
127
  print(f"Running: {' '.join(cmd)}")
128
  try:
129
- result = subprocess.run(
130
- cmd,
131
- capture_output=True,
132
- text=True,
133
- check=True, # This will raise an error if pip fails
134
- timeout=900 # 15 minute timeout
135
- )
136
-
137
- if result.stdout:
138
- print("STDOUT:", result.stdout)
139
- if result.stderr:
140
- print("STDERR:", result.stderr)
141
-
142
  print(f"✓ Successfully installed {model_name} from GitHub")
143
  marker_file.touch()
144
  return True
145
-
146
  except subprocess.CalledProcessError as e:
147
  print(f"✗ Installation subprocess FAILED with code {e.returncode}")
148
  print("STDOUT:", e.stdout)
149
  print("STDERR:", e.stderr)
 
 
 
 
 
150
  return False
151
  except Exception as e:
152
  print(f"✗ Installation exception: {e}")
@@ -154,14 +162,13 @@ def install_grecy_model_from_github(model_name: str) -> bool:
154
  return False
155
 
156
  # ============================================================================
157
- # MODEL LOADING (SIMPLIFIED)
158
  # ============================================================================
159
 
160
  def load_spacy_model(model_name: str) -> Optional[spacy.Language]:
161
  """Load or install a standard spaCy model."""
162
  try:
163
- nlp = spacy.load(model_name)
164
- return nlp
165
  except OSError:
166
  print(f"Installing {model_name}...")
167
  try:
@@ -176,26 +183,14 @@ def load_grecy_model(model_name: str) -> Optional[spacy.Language]:
176
  Load a grecy model.
177
  First, installs it from our GitHub. Then, loads it.
178
  """
179
- # Step 1: Ensure it's installed using our fixed method
180
- install_success = install_grecy_model_from_github(model_name)
181
-
182
- if not install_success:
183
  print(f"✗ Cannot load {model_name} because installation failed.")
184
  return None
185
-
186
- # Step 2: Now that it's installed, just load it.
187
- # All the complex strategies are no longer needed.
188
  try:
189
- # We MUST refresh the import paths for spaCy to find the new package
190
  print("Refreshing importlib to find new package...")
191
- import importlib
192
- import site
193
  importlib.invalidate_caches()
194
- try:
195
- # Reload site-packages to find the newly pip-installed package
196
- importlib.reload(site)
197
- except Exception as e:
198
- print(f"Warning: Could not reload 'site'. {e}")
199
 
200
  print(f"Trying: spacy.load('{model_name}')")
201
  nlp = spacy.load(model_name)
@@ -207,36 +202,43 @@ def load_grecy_model(model_name: str) -> Optional[spacy.Language]:
207
  traceback.print_exc()
208
  return None
209
 
210
-
211
  def initialize_models():
212
- """Load all models at startup."""
 
 
 
 
213
  print("\n" + "="*70)
214
  print("INITIALIZING MODELS")
215
  print("="*70 + "\n")
216
 
217
- # We iterate over the key (lang_code) and the tuple (lang_name, model_name, type)
 
 
 
 
 
218
  for lang_code, (lang_name, model_name, model_type) in MODEL_INFO.items():
219
- print(f"Loading {lang_name} ({model_name})...")
220
-
221
- if model_type == "grecy":
222
- nlp = load_grecy_model(model_name)
223
- else:
224
  nlp = load_spacy_model(model_name)
225
-
226
- # The key in the MODELS dict is the lang_code from MODEL_INFO
227
- MODELS[lang_code] = nlp
228
-
229
- if nlp:
230
- print(f" {lang_name} ready\n")
231
  else:
232
- print(f"✗ {lang_name} FAILED\n")
 
 
233
 
234
- loaded = sum(1 for m in MODELS.values() if m is not None)
235
- print(f"Loaded {loaded}/{len(MODELS)} models successfully")
236
  print("="*70 + "\n")
237
 
238
  # ============================================================================
239
- # ANALYSIS
240
  # ============================================================================
241
 
242
  def get_analysis(ui_lang: str, model_lang_key: str, text: str):
@@ -249,18 +251,40 @@ def get_analysis(ui_lang: str, model_lang_key: str, text: str):
249
  return ([], [], "<p style='color: orange;'>No text provided.</p>",
250
  gr.Button(value=ui_config["button_text"], interactive=True))
251
 
252
- # model_lang_key is now the key from MODEL_INFO, e.g., "grc-proiel-trf"
253
- if model_lang_key not in MODELS or MODELS[model_lang_key] is None:
254
- raise ValueError(f"Model for {model_lang_key} is not available. Please check logs.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
- nlp = MODELS[model_lang_key]
257
  doc = nlp(text)
258
 
259
  dataframe_output = []
260
  json_output = []
261
 
262
  for token in doc:
263
- # FIXED: simplified lemma access for spaCy 3
264
  lemma_str = token.lemma_
265
  morph_str = str(token.morph) if token.has_morph() else ''
266
  dep_str = token.dep_ if doc.has_annotation("DEP") else ''
@@ -275,7 +299,6 @@ def get_analysis(ui_lang: str, model_lang_key: str, text: str):
275
 
276
  dataframe_output.append([token.text, lemma_str, pos_str, tag_str, morph_str, dep_str])
277
 
278
- # Generate visualization
279
  html_out = ""
280
  if doc.has_annotation("DEP"):
281
  try:
@@ -324,7 +347,6 @@ def create_interface():
324
  config = UI_TEXT["en"]
325
 
326
  # Use the keys from MODEL_INFO as the choices
327
- # The value passed to the backend will be "grc-proiel-trf"
328
  model_choices = list(MODEL_INFO.keys())
329
 
330
  with gr.Blocks(title="Multilingual Morpho-Syntactic Analyzer") as demo:
@@ -368,7 +390,6 @@ if __name__ == "__main__":
368
  print("MULTILINGUAL MORPHO-SYNTACTIC ANALYZER")
369
  print("="*70 + "\n")
370
 
371
- # This will now use the new, robust installation
372
  initialize_models()
373
 
374
  demo = create_interface()
 
8
  import os
9
  from pathlib import Path
10
  from typing import Dict, Optional, Tuple
11
+ import importlib
12
+ import site
13
 
14
  # ============================================================================
15
  # CONFIGURATION
16
  # ============================================================================
17
 
 
 
 
18
  MODEL_INFO: Dict[str, Tuple[str, str, str]] = {
19
  "de": ("German", "de_core_news_md", "spacy"),
20
  "en": ("English", "en_core_web_md", "spacy"),
 
22
  "grc-proiel-trf": ("Ancient Greek (PROIEL TRF)", "grc_proiel_trf", "grecy"),
23
  "grc-perseus-trf": ("Ancient Greek (Perseus TRF)", "grc_perseus_trf", "grecy"),
24
  "grc-proiel-lg": ("Ancient Greek (PROIEL LG)", "grc_proiel_lg", "grecy"),
25
+ "grc-perseus-lg": ("Ancient Greek (Perseus LG)", "grc_perseus_lg", "grecy"),
26
+ "grc-proiel-sm": ("Ancient Greek (PROIEL SM)", "grc_proiel_sm", "grecy"),
27
+ "grc-perseus-sm": ("Ancient Greek (Perseus SM)", "grc_perseus_sm", "grecy"),
28
  }
29
 
 
30
  UI_TEXT = {
31
  "de": {
32
  "title": "# 🔍 Mehrsprachiger Morpho-Syntaktischer Analysator",
 
77
  "tab_table": "Tabla",
78
  "tab_json": "JSON",
79
  "html_label": "Análisis de Dependencias",
80
+ "table_label": "Análisis Morfológico",
81
  "table_headers": ["Palabra", "Lema", "POS", "Etiqueta", "Morfología", "Dependencia"],
82
  "json_label": "Salida JSON",
83
  "error_message": "Error: "
84
  }
85
  }
86
 
87
+ # This global dict holds the *loaded* models
88
  MODELS: Dict[str, Optional[spacy.Language]] = {}
89
 
90
  # ============================================================================
91
+ # DEPENDENCY INSTALLATION
92
  # ============================================================================
93
 
94
+ def install_spacy_transformers_once():
95
+ """
96
+ Installs spacy-transformers, required for all _trf models.
97
+ """
98
+ marker_file = Path(".spacy_transformers_installed")
99
+ if marker_file.exists():
100
+ print("✓ spacy-transformers already installed (marker found)")
101
+ return True
102
+
103
+ print("Installing spacy-transformers (for _trf models)...")
104
+ cmd = [sys.executable, "-m", "pip", "install", "spacy-transformers"]
105
+ try:
106
+ subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=300)
107
+ print("✓ Successfully installed spacy-transformers")
108
+ marker_file.touch()
109
+ return True
110
+ except Exception as e:
111
+ print(f"✗ FAILED to install spacy-transformers: {e}")
112
+ if hasattr(e, 'stderr'): print(e.stderr)
113
+ return False
114
+
115
  def install_grecy_model_from_github(model_name: str) -> bool:
116
  """
117
  Installs a greCy model from your specific GitHub Release.
118
+ Uses --no-deps to prevent dependency conflicts.
 
119
  """
120
  marker_file = Path(f".{model_name}_installed")
121
  if marker_file.exists():
 
124
 
125
  print(f"Installing grecy model: {model_name}...")
126
 
127
+ # --- *** THIS IS THE CRITICAL FILENAME FIX *** ---
128
+ # The wheel filename's "distribution" part uses underscores,
129
+ # not hyphens, to be PEP 427 compliant.
130
  if model_name == "grc_proiel_trf":
131
  wheel_filename = "grc_proiel_trf-3.7.5-py3-none-any.whl"
 
 
132
  else:
133
+ # All other models use the 0.0.0 version and underscore in name
134
+ # e.g., grc_perseus_trf-0.0.0-py3-none-any.whl
135
+ wheel_filename = f"{model_name}-0.0.0-py3-none-any.whl"
136
+ # --- *** END FIX *** ---
137
 
 
138
  install_url = f"https://github.com/CrispStrobe/greCy/releases/download/v1.0-models/{wheel_filename}"
139
+ cmd = [sys.executable, "-m", "pip", "install", install_url, "--no-deps"]
 
 
 
 
 
 
140
 
141
  print(f"Running: {' '.join(cmd)}")
142
  try:
143
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=900)
144
+ if result.stdout: print("STDOUT:", result.stdout)
145
+ if result.stderr: print("STDERR:", result.stderr)
 
 
 
 
 
 
 
 
 
 
146
  print(f"✓ Successfully installed {model_name} from GitHub")
147
  marker_file.touch()
148
  return True
 
149
  except subprocess.CalledProcessError as e:
150
  print(f"✗ Installation subprocess FAILED with code {e.returncode}")
151
  print("STDOUT:", e.stdout)
152
  print("STDERR:", e.stderr)
153
+ print("\n\n!! --- NOTE --- !!")
154
+ print(f"If the error is 'Invalid wheel filename', it means the files on your GitHub Release are named incorrectly.")
155
+ print(f"You must delete the release and re-run the fixed 'create_grecy_release.sh' script.")
156
+ print(f"The correct filename should be: '{wheel_filename}'.")
157
+ print("!! --- END NOTE --- !!\n\n")
158
  return False
159
  except Exception as e:
160
  print(f"✗ Installation exception: {e}")
 
162
  return False
163
 
164
  # ============================================================================
165
+ # MODEL LOADING (NOW WITH LAZY LOADING)
166
  # ============================================================================
167
 
168
  def load_spacy_model(model_name: str) -> Optional[spacy.Language]:
169
  """Load or install a standard spaCy model."""
170
  try:
171
+ return spacy.load(model_name)
 
172
  except OSError:
173
  print(f"Installing {model_name}...")
174
  try:
 
183
  Load a grecy model.
184
  First, installs it from our GitHub. Then, loads it.
185
  """
186
+ if not install_grecy_model_from_github(model_name):
 
 
 
187
  print(f"✗ Cannot load {model_name} because installation failed.")
188
  return None
 
 
 
189
  try:
 
190
  print("Refreshing importlib to find new package...")
 
 
191
  importlib.invalidate_caches()
192
+ try: importlib.reload(site)
193
+ except Exception: pass # Fails in some envs, but that's ok
 
 
 
194
 
195
  print(f"Trying: spacy.load('{model_name}')")
196
  nlp = spacy.load(model_name)
 
202
  traceback.print_exc()
203
  return None
204
 
 
205
  def initialize_models():
206
+ """
207
+ Load all *non-grecy* models at startup for speed.
208
+ Grecy models will be lazy-loaded on first use.
209
+ Also ensures spacy-transformers is installed.
210
+ """
211
  print("\n" + "="*70)
212
  print("INITIALIZING MODELS")
213
  print("="*70 + "\n")
214
 
215
+ # First, ensure dependencies are met
216
+ install_spacy_transformers_once()
217
+
218
+ loaded_count = 0
219
+ spacy_model_count = 0
220
+
221
  for lang_code, (lang_name, model_name, model_type) in MODEL_INFO.items():
222
+ if model_type == "spacy":
223
+ spacy_model_count += 1
224
+ print(f"Loading {lang_name} ({model_name})...")
 
 
225
  nlp = load_spacy_model(model_name)
226
+ MODELS[lang_code] = nlp # Store the loaded model
227
+ if nlp:
228
+ print(f"✓ {lang_name} ready\n")
229
+ loaded_count += 1
230
+ else:
231
+ print(f" {lang_name} FAILED\n")
232
  else:
233
+ # It's a grecy model, just mark as not loaded yet
234
+ print(f"✓ {lang_name} ({model_name}) will be loaded on first use.\n")
235
+ MODELS[lang_code] = None # Mark as available but not loaded
236
 
237
+ print(f"Pre-loaded {loaded_count}/{spacy_model_count} standard models.")
 
238
  print("="*70 + "\n")
239
 
240
  # ============================================================================
241
+ # ANALYSIS (WITH LAZY LOADING)
242
  # ============================================================================
243
 
244
  def get_analysis(ui_lang: str, model_lang_key: str, text: str):
 
251
  return ([], [], "<p style='color: orange;'>No text provided.</p>",
252
  gr.Button(value=ui_config["button_text"], interactive=True))
253
 
254
+ # --- LAZY LOADING LOGIC ---
255
+ nlp = MODELS.get(model_lang_key)
256
+
257
+ if nlp is None:
258
+ # Model hasn't been loaded yet
259
+ print(f"First use of {model_lang_key}. Loading model...")
260
+
261
+ if model_lang_key not in MODEL_INFO:
262
+ raise ValueError(f"Unknown model key: {model_lang_key}")
263
+
264
+ _, model_name, model_type = MODEL_INFO[model_lang_key]
265
+
266
+ if model_type == "grecy":
267
+ nlp = load_grecy_model(model_name)
268
+ else:
269
+ # This case should be pre-loaded, but as a fallback
270
+ nlp = load_spacy_model(model_name)
271
+
272
+ if nlp is None:
273
+ # Failed to load
274
+ MODELS.pop(model_lang_key, None) # Remove from cache to try again next time
275
+ raise ValueError(f"Model for {model_lang_key} ({model_name}) FAILED to load. Check logs.")
276
+ else:
277
+ # Store in cache
278
+ MODELS[model_lang_key] = nlp
279
+ print(f"✓ {model_lang_key} is now loaded and cached.")
280
+ # --- END LAZY LOADING ---
281
 
 
282
  doc = nlp(text)
283
 
284
  dataframe_output = []
285
  json_output = []
286
 
287
  for token in doc:
 
288
  lemma_str = token.lemma_
289
  morph_str = str(token.morph) if token.has_morph() else ''
290
  dep_str = token.dep_ if doc.has_annotation("DEP") else ''
 
299
 
300
  dataframe_output.append([token.text, lemma_str, pos_str, tag_str, morph_str, dep_str])
301
 
 
302
  html_out = ""
303
  if doc.has_annotation("DEP"):
304
  try:
 
347
  config = UI_TEXT["en"]
348
 
349
  # Use the keys from MODEL_INFO as the choices
 
350
  model_choices = list(MODEL_INFO.keys())
351
 
352
  with gr.Blocks(title="Multilingual Morpho-Syntactic Analyzer") as demo:
 
390
  print("MULTILINGUAL MORPHO-SYNTACTIC ANALYZER")
391
  print("="*70 + "\n")
392
 
 
393
  initialize_models()
394
 
395
  demo = create_interface()