parthmax24 commited on
Commit
d9509ee
·
1 Parent(s): 7a5d5dd

updated fact_check.py

Browse files
Files changed (1) hide show
  1. backend/api/fact_check.py +13 -2
backend/api/fact_check.py CHANGED
@@ -15,6 +15,8 @@ from sumy.parsers.plaintext import PlaintextParser
15
  from sumy.nlp.tokenizers import Tokenizer
16
  from sumy.summarizers.lsa import LsaSummarizer
17
  import nltk
 
 
18
 
19
  load_dotenv()
20
 
@@ -25,8 +27,17 @@ USER_AGENT = {"User-Agent": "Mozilla/5.0"}
25
  session = requests.Session()
26
  session.headers.update(USER_AGENT)
27
 
28
- # nltk data for tokenization once
29
- nltk.download('punkt')
 
 
 
 
 
 
 
 
 
30
 
31
  logging.basicConfig(level=logging.INFO)
32
 
 
15
  from sumy.nlp.tokenizers import Tokenizer
16
  from sumy.summarizers.lsa import LsaSummarizer
17
  import nltk
18
+ from nltk.corpus import stopwords
19
+ from nltk.tokenize import word_tokenize
20
 
21
  load_dotenv()
22
 
 
27
  session = requests.Session()
28
  session.headers.update(USER_AGENT)
29
 
30
+
31
+ # Set NLTK data path to a writable directory
32
+ nltk_data_dir = "/tmp/nltk_data"
33
+ os.makedirs(nltk_data_dir, exist_ok=True)
34
+
35
+ # Download necessary NLTK data to that directory
36
+ nltk.download("punkt", download_dir=nltk_data_dir)
37
+ nltk.download("stopwords", download_dir=nltk_data_dir)
38
+
39
+ # Tell NLTK to look here for data
40
+ nltk.data.path.append(nltk_data_dir)
41
 
42
  logging.basicConfig(level=logging.INFO)
43