Prageeth-1 commited on
Commit
4343224
·
verified ·
1 Parent(s): 119b991

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -16
app.py CHANGED
@@ -3,30 +3,44 @@ import pandas as pd
3
  import numpy as np
4
  import re
5
  import nltk
 
6
  from nltk.corpus import stopwords
7
  from nltk.stem import WordNetLemmatizer
8
- from nltk.tokenize import word_tokenize, sent_tokenize
9
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
10
  from wordcloud import WordCloud
11
  import matplotlib.pyplot as plt
12
  import io
13
 
14
-
15
- # Download all required NLTK resources with error handling
16
- try:
17
- nltk.data.find('tokenizers/punkt')
18
- except LookupError:
19
- nltk.download('punkt')
20
-
21
- try:
22
- nltk.data.find('corpora/stopwords')
23
- except LookupError:
24
- nltk.download('stopwords')
25
 
26
- try:
27
- nltk.data.find('corpora/wordnet')
28
- except LookupError:
29
- nltk.download('wordnet')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # Initialize lemmatizer
32
  lemmatizer = WordNetLemmatizer()
 
3
  import numpy as np
4
  import re
5
  import nltk
6
+ import os
7
  from nltk.corpus import stopwords
8
  from nltk.stem import WordNetLemmatizer
9
+ from nltk.tokenize import word_tokenize
10
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
11
  from wordcloud import WordCloud
12
  import matplotlib.pyplot as plt
13
  import io
14
 
15
+ @st.cache_resource
16
+ def setup_nltk():
17
+ # Set NLTK data path
18
+ nltk_data_path = os.path.join(os.getcwd(), 'nltk_data')
19
+ os.makedirs(nltk_data_path, exist_ok=True)
20
+ nltk.data.path.append(nltk_data_path)
 
 
 
 
 
21
 
22
+ # Download required resources with retries
23
+ required = ['punkt', 'stopwords', 'wordnet', 'omw-1.4']
24
+ for resource in required:
25
+ max_retries = 3
26
+ for attempt in range(max_retries):
27
+ try:
28
+ nltk.data.find(f'tokenizers/punkt/PY3/english.pickle')
29
+ break
30
+ except LookupError:
31
+ try:
32
+ nltk.download(resource, download_dir=nltk_data_path)
33
+ # Special handling for punkt
34
+ if resource == 'punkt':
35
+ nltk.download('punkt_tab', download_dir=nltk_data_path)
36
+ except Exception as e:
37
+ if attempt == max_retries - 1:
38
+ st.error(f"Failed to download NLTK resource {resource} after {max_retries} attempts")
39
+ raise
40
+ continue
41
+
42
+ # Run initialization before anything else
43
+ setup_nltk()
44
 
45
  # Initialize lemmatizer
46
  lemmatizer = WordNetLemmatizer()