Spaces:
Build error
Build error
Commit
·
4e0e782
1
Parent(s):
8ccc348
Remove NLTK package
Browse files
app/search/bm25_search.py
CHANGED
|
@@ -32,9 +32,9 @@ class BM25_search:
|
|
| 32 |
- perform_lemmatization (bool): Whether to perform lemmatization on tokens.
|
| 33 |
"""
|
| 34 |
# Ensure NLTK resources are downloaded only once
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
|
| 39 |
self.documents: List[str] = []
|
| 40 |
self.doc_ids: List[str] = []
|
|
@@ -153,7 +153,7 @@ async def initialize_bm25_search(remove_stopwords: bool = True, perform_lemmatiz
|
|
| 153 |
Initializes the BM25search with proper NLTK resource downloading.
|
| 154 |
"""
|
| 155 |
loop = asyncio.get_running_loop()
|
| 156 |
-
|
| 157 |
return BM25_search(remove_stopwords, perform_lemmatization)
|
| 158 |
|
| 159 |
|
|
|
|
| 32 |
- perform_lemmatization (bool): Whether to perform lemmatization on tokens.
|
| 33 |
"""
|
| 34 |
# Ensure NLTK resources are downloaded only once
|
| 35 |
+
if not BM25_search.nltk_resources_downloaded:
|
| 36 |
+
download_nltk_resources()
|
| 37 |
+
BM25_search.nltk_resources_downloaded = True # Mark as downloaded
|
| 38 |
|
| 39 |
self.documents: List[str] = []
|
| 40 |
self.doc_ids: List[str] = []
|
|
|
|
| 153 |
Initializes the BM25search with proper NLTK resource downloading.
|
| 154 |
"""
|
| 155 |
loop = asyncio.get_running_loop()
|
| 156 |
+
await loop.run_in_executor(None, download_nltk_resources)
|
| 157 |
return BM25_search(remove_stopwords, perform_lemmatization)
|
| 158 |
|
| 159 |
|