kundan621 commited on
Commit
5376334
·
1 Parent(s): 21f80bf
Files changed (4) hide show
  1. .gitignore +1 -0
  2. requirements.txt +0 -1
  3. src/search_final.py +14 -2
  4. src/streamlit_app.py +6 -1
.gitignore CHANGED
@@ -56,3 +56,4 @@ Thumbs.db
56
 
57
  # Streamlit
58
  .streamlit/
 
 
56
 
57
  # Streamlit
58
  .streamlit/
59
+ !.streamlit/config.toml
requirements.txt CHANGED
@@ -33,5 +33,4 @@ scikit-learn>=1.3.0
33
 
34
  openai>=1.0.0
35
  streamlit
36
- torch==2.8.0
37
  huggingface_hub
 
33
 
34
  openai>=1.0.0
35
  streamlit
 
36
  huggingface_hub
src/search_final.py CHANGED
@@ -24,8 +24,20 @@ logging.basicConfig(
24
  )
25
  logger = logging.getLogger(__name__)
26
 
27
- nltk.download("stopwords")
28
- STOPWORDS = set(stopwords.words("english"))
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
31
  # ...rest of your imports...
 
24
  )
25
  logger = logging.getLogger(__name__)
26
 
27
+ # Set NLTK data path to a writable directory
28
+ nltk_data_dir = os.path.join(os.getcwd(), "nltk_data")
29
+ if not os.path.exists(nltk_data_dir):
30
+ os.makedirs(nltk_data_dir)
31
+ nltk.data.path.append(nltk_data_dir)
32
+
33
+ # Download to the custom directory
34
+ try:
35
+ nltk.download("stopwords", download_dir=nltk_data_dir)
36
+ STOPWORDS = set(stopwords.words("english"))
37
+ except Exception as e:
38
+ print(f"NLTK download failed: {e}")
39
+ # Fallback to basic English stopwords
40
+ STOPWORDS = set(['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'through', 'during', 'before', 'after', 'above', 'below', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once'])
41
 
42
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
43
  # ...rest of your imports...
src/streamlit_app.py CHANGED
@@ -4,7 +4,12 @@ import numpy as np
4
  import torch
5
  import os
6
  from dotenv import load_dotenv
7
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
8
  from peft import PeftModel
9
  from search_final import rag_pipeline
10
 
 
4
  import torch
5
  import os
6
  from dotenv import load_dotenv
7
+ try:
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer
9
+ except ImportError as e:
10
+ st.error(f"Error importing transformers: {e}")
11
+ st.error("Please ensure transformers library is properly installed.")
12
+ st.stop()
13
  from peft import PeftModel
14
  from search_final import rag_pipeline
15