toanatp commited on
Commit
bfe0686
·
verified ·
1 Parent(s): 0b174df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -116,7 +116,8 @@ def _load_spacy_model(lang_code: str | None):
116
  if spacy is None:
117
  return None
118
 
119
- code = (lang_code or "en").lower()
 
120
  if code in _SPACY_CACHE:
121
  return _SPACY_CACHE[code]
122
 
@@ -224,12 +225,19 @@ def _map_spacy_to_smart_tokens(smart_tokens, full_text, nlp_model):
224
  if not spacy.tokens.Token.has_extension("noun_chunk_id"):
225
  spacy.tokens.Token.set_extension("noun_chunk_id", default=None)
226
 
 
227
  try:
228
- for chunk_id, chunk in enumerate(doc.noun_chunks):
229
- for token in chunk:
230
- token._.noun_chunk_id = chunk_id
231
- except (NotImplementedError, AttributeError):
232
- pass
 
 
 
 
 
 
233
 
234
  spacy_token_map = {spacy_tok.idx: spacy_tok for spacy_tok in doc}
235
  for smart_tok in smart_tokens:
 
116
  if spacy is None:
117
  return None
118
 
119
+ code = (lang_code or "en").lower().replace("_", "-")
120
+ code = code.split("-")[0]
121
  if code in _SPACY_CACHE:
122
  return _SPACY_CACHE[code]
123
 
 
225
  if not spacy.tokens.Token.has_extension("noun_chunk_id"):
226
  spacy.tokens.Token.set_extension("noun_chunk_id", default=None)
227
 
228
+ can_use_noun_chunks = False
229
  try:
230
+ can_use_noun_chunks = doc.has_annotation("DEP")
231
+ except Exception:
232
+ can_use_noun_chunks = False
233
+
234
+ if can_use_noun_chunks:
235
+ try:
236
+ for chunk_id, chunk in enumerate(doc.noun_chunks):
237
+ for token in chunk:
238
+ token._.noun_chunk_id = chunk_id
239
+ except (NotImplementedError, AttributeError, ValueError):
240
+ pass
241
 
242
  spacy_token_map = {spacy_tok.idx: spacy_tok for spacy_tok in doc}
243
  for smart_tok in smart_tokens: