Madras1 commited on
Commit
c7e17a4
·
verified ·
1 Parent(s): 3c9cd4c

Upload 6 files

Browse files
Files changed (2) hide show
  1. app.py +20 -3
  2. requirements.txt +2 -0
app.py CHANGED
@@ -158,18 +158,30 @@ def load_reranker():
158
 
159
  # Cache for spaCy models
160
  _spacy_models = {}
 
161
 
162
  def load_spacy_model(lang: str):
163
  """Carrega modelo spaCy com cache."""
 
 
 
 
 
164
  if lang not in _spacy_models:
165
  model_name = "pt_core_news_sm" if lang == "pt" else "en_core_web_sm"
166
  try:
167
  _spacy_models[lang] = spacy.load(model_name)
168
  logging.info(f"Modelo spaCy '{model_name}' carregado.")
169
  except OSError:
170
- logging.warning(f"Modelo {model_name} não encontrado. Baixando...")
171
- spacy.cli.download(model_name.replace("_", "-").replace("-sm", ""))
172
- _spacy_models[lang] = spacy.load(model_name)
 
 
 
 
 
 
173
  return _spacy_models[lang]
174
 
175
  def detect_language(texts: List[str]) -> str:
@@ -186,6 +198,11 @@ def extract_entities(textos: List[str]) -> List[List[Tuple[str, str]]]:
186
  lang = detect_language(textos)
187
  nlp = load_spacy_model(lang)
188
 
 
 
 
 
 
189
  entities_by_doc = []
190
  for text in textos:
191
  # Limitar tamanho do texto para performance
 
158
 
159
  # Cache for spaCy models
160
  _spacy_models = {}
161
+ _spacy_available = True
162
 
163
  def load_spacy_model(lang: str):
164
  """Carrega modelo spaCy com cache."""
165
+ global _spacy_available
166
+
167
+ if not _spacy_available:
168
+ return None
169
+
170
  if lang not in _spacy_models:
171
  model_name = "pt_core_news_sm" if lang == "pt" else "en_core_web_sm"
172
  try:
173
  _spacy_models[lang] = spacy.load(model_name)
174
  logging.info(f"Modelo spaCy '{model_name}' carregado.")
175
  except OSError:
176
+ logging.warning(f"Modelo {model_name} não encontrado. Tentando baixar...")
177
+ try:
178
+ import subprocess
179
+ subprocess.run(["python", "-m", "spacy", "download", model_name], check=True)
180
+ _spacy_models[lang] = spacy.load(model_name)
181
+ except Exception as e:
182
+ logging.error(f"Falha ao baixar modelo spaCy: {e}")
183
+ _spacy_available = False
184
+ return None
185
  return _spacy_models[lang]
186
 
187
  def detect_language(texts: List[str]) -> str:
 
198
  lang = detect_language(textos)
199
  nlp = load_spacy_model(lang)
200
 
201
+ # Fallback se spaCy não estiver disponível
202
+ if nlp is None:
203
+ logging.warning("spaCy não disponível. Retornando entidades vazias.")
204
+ return [[] for _ in textos]
205
+
206
  entities_by_doc = []
207
  for text in textos:
208
  # Limitar tamanho do texto para performance
requirements.txt CHANGED
@@ -18,6 +18,8 @@ hdbscan
18
  nltk
19
  spacy
20
  langdetect
 
 
21
 
22
  # --- TORCH CPU ---
23
  torch
 
18
  nltk
19
  spacy
20
  langdetect
21
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
22
+ https://github.com/explosion/spacy-models/releases/download/pt_core_news_sm-3.7.0/pt_core_news_sm-3.7.0-py3-none-any.whl
23
 
24
  # --- TORCH CPU ---
25
  torch