emanuelaboros commited on
Commit
b48099f
·
1 Parent(s): c30f539
Files changed (1) hide show
  1. generic_ner.py +8 -4
generic_ner.py CHANGED
@@ -277,16 +277,20 @@ from stopwordsiso import stopwords
277
  stop_words = stopwords(["en", "fr", "de"])
278
 
279
 
280
- def remove_trailing_stopwords(entities):
281
- # This function removes stopwords from the end of each entity's text
282
  for entity in entities:
283
  words = entity["text"].split()
284
 
285
- # Continue removing stopwords from the end of the text
 
 
 
 
286
  while words and words[-1].lower() in stop_words:
287
  words.pop() # Remove the last word if it's a stopword
288
 
289
- # Join the words back together and update the entity's text
290
  entity["text"] = " ".join(words)
291
 
292
  return entities
 
277
  stop_words = stopwords(["en", "fr", "de"])
278
 
279
 
280
+ def remove_trailing_stopwords(entities, stop_words):
281
+ # This function removes stopwords from both the beginning and end of each entity's text
282
  for entity in entities:
283
  words = entity["text"].split()
284
 
285
+ # Remove stopwords from the beginning
286
+ while words and words[0].lower() in stop_words:
287
+ words.pop(0) # Remove the first word if it's a stopword
288
+
289
+ # Remove stopwords from the end
290
  while words and words[-1].lower() in stop_words:
291
  words.pop() # Remove the last word if it's a stopword
292
 
293
+ # Join the remaining words back together and update the entity's text
294
  entity["text"] = " ".join(words)
295
 
296
  return entities