Upload 6 files
Browse files- app.py +20 -3
- requirements.txt +2 -0
app.py
CHANGED
|
@@ -158,18 +158,30 @@ def load_reranker():
|
|
| 158 |
|
| 159 |
# Cache for spaCy models
|
| 160 |
_spacy_models = {}
|
|
|
|
| 161 |
|
| 162 |
def load_spacy_model(lang: str):
|
| 163 |
"""Carrega modelo spaCy com cache."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
if lang not in _spacy_models:
|
| 165 |
model_name = "pt_core_news_sm" if lang == "pt" else "en_core_web_sm"
|
| 166 |
try:
|
| 167 |
_spacy_models[lang] = spacy.load(model_name)
|
| 168 |
logging.info(f"Modelo spaCy '{model_name}' carregado.")
|
| 169 |
except OSError:
|
| 170 |
-
logging.warning(f"Modelo {model_name} não encontrado.
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
return _spacy_models[lang]
|
| 174 |
|
| 175 |
def detect_language(texts: List[str]) -> str:
|
|
@@ -186,6 +198,11 @@ def extract_entities(textos: List[str]) -> List[List[Tuple[str, str]]]:
|
|
| 186 |
lang = detect_language(textos)
|
| 187 |
nlp = load_spacy_model(lang)
|
| 188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
entities_by_doc = []
|
| 190 |
for text in textos:
|
| 191 |
# Limitar tamanho do texto para performance
|
|
|
|
| 158 |
|
| 159 |
# Cache for spaCy models
|
| 160 |
_spacy_models = {}
|
| 161 |
+
_spacy_available = True
|
| 162 |
|
| 163 |
def load_spacy_model(lang: str):
|
| 164 |
"""Carrega modelo spaCy com cache."""
|
| 165 |
+
global _spacy_available
|
| 166 |
+
|
| 167 |
+
if not _spacy_available:
|
| 168 |
+
return None
|
| 169 |
+
|
| 170 |
if lang not in _spacy_models:
|
| 171 |
model_name = "pt_core_news_sm" if lang == "pt" else "en_core_web_sm"
|
| 172 |
try:
|
| 173 |
_spacy_models[lang] = spacy.load(model_name)
|
| 174 |
logging.info(f"Modelo spaCy '{model_name}' carregado.")
|
| 175 |
except OSError:
|
| 176 |
+
logging.warning(f"Modelo {model_name} não encontrado. Tentando baixar...")
|
| 177 |
+
try:
|
| 178 |
+
import subprocess
|
| 179 |
+
subprocess.run(["python", "-m", "spacy", "download", model_name], check=True)
|
| 180 |
+
_spacy_models[lang] = spacy.load(model_name)
|
| 181 |
+
except Exception as e:
|
| 182 |
+
logging.error(f"Falha ao baixar modelo spaCy: {e}")
|
| 183 |
+
_spacy_available = False
|
| 184 |
+
return None
|
| 185 |
return _spacy_models[lang]
|
| 186 |
|
| 187 |
def detect_language(texts: List[str]) -> str:
|
|
|
|
| 198 |
lang = detect_language(textos)
|
| 199 |
nlp = load_spacy_model(lang)
|
| 200 |
|
| 201 |
+
# Fallback se spaCy não estiver disponível
|
| 202 |
+
if nlp is None:
|
| 203 |
+
logging.warning("spaCy não disponível. Retornando entidades vazias.")
|
| 204 |
+
return [[] for _ in textos]
|
| 205 |
+
|
| 206 |
entities_by_doc = []
|
| 207 |
for text in textos:
|
| 208 |
# Limitar tamanho do texto para performance
|
requirements.txt
CHANGED
|
@@ -18,6 +18,8 @@ hdbscan
|
|
| 18 |
nltk
|
| 19 |
spacy
|
| 20 |
langdetect
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# --- TORCH CPU ---
|
| 23 |
torch
|
|
|
|
| 18 |
nltk
|
| 19 |
spacy
|
| 20 |
langdetect
|
| 21 |
+
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
|
| 22 |
+
https://github.com/explosion/spacy-models/releases/download/pt_core_news_sm-3.7.0/pt_core_news_sm-3.7.0-py3-none-any.whl
|
| 23 |
|
| 24 |
# --- TORCH CPU ---
|
| 25 |
torch
|