Commit ·
b48099f
1
Parent(s): c30f539
degbu
Browse files- generic_ner.py +8 -4
generic_ner.py
CHANGED
|
@@ -277,16 +277,20 @@ from stopwordsiso import stopwords
|
|
| 277 |
stop_words = stopwords(["en", "fr", "de"])
|
| 278 |
|
| 279 |
|
| 280 |
-
def remove_trailing_stopwords(entities):
|
| 281 |
-
# This function removes stopwords from the end of each entity's text
|
| 282 |
for entity in entities:
|
| 283 |
words = entity["text"].split()
|
| 284 |
|
| 285 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
while words and words[-1].lower() in stop_words:
|
| 287 |
words.pop() # Remove the last word if it's a stopword
|
| 288 |
|
| 289 |
-
# Join the words back together and update the entity's text
|
| 290 |
entity["text"] = " ".join(words)
|
| 291 |
|
| 292 |
return entities
|
|
|
|
| 277 |
stop_words = stopwords(["en", "fr", "de"])
|
| 278 |
|
| 279 |
|
| 280 |
+
def remove_trailing_stopwords(entities, stop_words):
|
| 281 |
+
# This function removes stopwords from both the beginning and end of each entity's text
|
| 282 |
for entity in entities:
|
| 283 |
words = entity["text"].split()
|
| 284 |
|
| 285 |
+
# Remove stopwords from the beginning
|
| 286 |
+
while words and words[0].lower() in stop_words:
|
| 287 |
+
words.pop(0) # Remove the first word if it's a stopword
|
| 288 |
+
|
| 289 |
+
# Remove stopwords from the end
|
| 290 |
while words and words[-1].lower() in stop_words:
|
| 291 |
words.pop() # Remove the last word if it's a stopword
|
| 292 |
|
| 293 |
+
# Join the remaining words back together and update the entity's text
|
| 294 |
entity["text"] = " ".join(words)
|
| 295 |
|
| 296 |
return entities
|