Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -116,7 +116,8 @@ def _load_spacy_model(lang_code: str | None):
|
|
| 116 |
if spacy is None:
|
| 117 |
return None
|
| 118 |
|
| 119 |
-
code = (lang_code or "en").lower()
|
|
|
|
| 120 |
if code in _SPACY_CACHE:
|
| 121 |
return _SPACY_CACHE[code]
|
| 122 |
|
|
@@ -224,12 +225,19 @@ def _map_spacy_to_smart_tokens(smart_tokens, full_text, nlp_model):
|
|
| 224 |
if not spacy.tokens.Token.has_extension("noun_chunk_id"):
|
| 225 |
spacy.tokens.Token.set_extension("noun_chunk_id", default=None)
|
| 226 |
|
|
|
|
| 227 |
try:
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
|
| 234 |
spacy_token_map = {spacy_tok.idx: spacy_tok for spacy_tok in doc}
|
| 235 |
for smart_tok in smart_tokens:
|
|
|
|
| 116 |
if spacy is None:
|
| 117 |
return None
|
| 118 |
|
| 119 |
+
code = (lang_code or "en").lower().replace("_", "-")
|
| 120 |
+
code = code.split("-")[0]
|
| 121 |
if code in _SPACY_CACHE:
|
| 122 |
return _SPACY_CACHE[code]
|
| 123 |
|
|
|
|
| 225 |
if not spacy.tokens.Token.has_extension("noun_chunk_id"):
|
| 226 |
spacy.tokens.Token.set_extension("noun_chunk_id", default=None)
|
| 227 |
|
| 228 |
+
can_use_noun_chunks = False
|
| 229 |
try:
|
| 230 |
+
can_use_noun_chunks = doc.has_annotation("DEP")
|
| 231 |
+
except Exception:
|
| 232 |
+
can_use_noun_chunks = False
|
| 233 |
+
|
| 234 |
+
if can_use_noun_chunks:
|
| 235 |
+
try:
|
| 236 |
+
for chunk_id, chunk in enumerate(doc.noun_chunks):
|
| 237 |
+
for token in chunk:
|
| 238 |
+
token._.noun_chunk_id = chunk_id
|
| 239 |
+
except (NotImplementedError, AttributeError, ValueError):
|
| 240 |
+
pass
|
| 241 |
|
| 242 |
spacy_token_map = {spacy_tok.idx: spacy_tok for spacy_tok in doc}
|
| 243 |
for smart_tok in smart_tokens:
|