Spaces:
Sleeping
Sleeping
Update data_processor.py
Browse files- data_processor.py +10 -6
data_processor.py
CHANGED
|
@@ -30,12 +30,16 @@ try:
|
|
| 30 |
except:
|
| 31 |
pass
|
| 32 |
"""
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
class FinancialDataProcessor:
|
| 41 |
"""Processes financial documents and generates Q&A pairs"""
|
|
|
|
| 30 |
except:
|
| 31 |
pass
|
| 32 |
"""
|
| 33 |
+
resources = ["stopwords", "punkt", "punkt_tab"]
|
| 34 |
+
for res in resources:
|
| 35 |
+
try:
|
| 36 |
+
if res == "stopwords":
|
| 37 |
+
from nltk.corpus import stopwords
|
| 38 |
+
_ = stopwords.words("english")
|
| 39 |
+
elif res in ["punkt", "punkt_tab"]:
|
| 40 |
+
nltk.data.find(f"tokenizers/{res}")
|
| 41 |
+
except LookupError:
|
| 42 |
+
nltk.download(res)
|
| 43 |
|
| 44 |
class FinancialDataProcessor:
|
| 45 |
"""Processes financial documents and generates Q&A pairs"""
|