vibertron commited on
Commit
2dd2bd1
·
verified ·
1 Parent(s): b3fa970

Update data_processor.py

Browse files
Files changed (1) hide show
  1. data_processor.py +10 -6
data_processor.py CHANGED
@@ -30,12 +30,16 @@ try:
30
  except:
31
  pass
32
  """
33
- try:
34
- from nltk.corpus import stopwords
35
- _ = stopwords.words("english")
36
- except LookupError:
37
- nltk.download("stopwords")
38
- from nltk.corpus import stopwords
 
 
 
 
39
 
40
  class FinancialDataProcessor:
41
  """Processes financial documents and generates Q&A pairs"""
 
30
  except:
31
  pass
32
  """
33
+ resources = ["stopwords", "punkt", "punkt_tab"]
34
+ for res in resources:
35
+ try:
36
+ if res == "stopwords":
37
+ from nltk.corpus import stopwords
38
+ _ = stopwords.words("english")
39
+ elif res in ["punkt", "punkt_tab"]:
40
+ nltk.data.find(f"tokenizers/{res}")
41
+ except LookupError:
42
+ nltk.download(res)
43
 
44
  class FinancialDataProcessor:
45
  """Processes financial documents and generates Q&A pairs"""