Spaces:
Sleeping
Sleeping
fixes
Browse files- .gitignore +1 -0
- requirements.txt +0 -1
- src/search_final.py +14 -2
- src/streamlit_app.py +6 -1
.gitignore
CHANGED
|
@@ -56,3 +56,4 @@ Thumbs.db
|
|
| 56 |
|
| 57 |
# Streamlit
|
| 58 |
.streamlit/
|
|
|
|
|
|
| 56 |
|
| 57 |
# Streamlit
|
| 58 |
.streamlit/
|
| 59 |
+
!.streamlit/config.toml
|
requirements.txt
CHANGED
|
@@ -33,5 +33,4 @@ scikit-learn>=1.3.0
|
|
| 33 |
|
| 34 |
openai>=1.0.0
|
| 35 |
streamlit
|
| 36 |
-
torch==2.8.0
|
| 37 |
huggingface_hub
|
|
|
|
| 33 |
|
| 34 |
openai>=1.0.0
|
| 35 |
streamlit
|
|
|
|
| 36 |
huggingface_hub
|
src/search_final.py
CHANGED
|
@@ -24,8 +24,20 @@ logging.basicConfig(
|
|
| 24 |
)
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 31 |
# ...rest of your imports...
|
|
|
|
| 24 |
)
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
|
| 27 |
+
# Set NLTK data path to a writable directory
|
| 28 |
+
nltk_data_dir = os.path.join(os.getcwd(), "nltk_data")
|
| 29 |
+
if not os.path.exists(nltk_data_dir):
|
| 30 |
+
os.makedirs(nltk_data_dir)
|
| 31 |
+
nltk.data.path.append(nltk_data_dir)
|
| 32 |
+
|
| 33 |
+
# Download to the custom directory
|
| 34 |
+
try:
|
| 35 |
+
nltk.download("stopwords", download_dir=nltk_data_dir)
|
| 36 |
+
STOPWORDS = set(stopwords.words("english"))
|
| 37 |
+
except Exception as e:
|
| 38 |
+
print(f"NLTK download failed: {e}")
|
| 39 |
+
# Fallback to basic English stopwords
|
| 40 |
+
STOPWORDS = set(['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'through', 'during', 'before', 'after', 'above', 'below', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once'])
|
| 41 |
|
| 42 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 43 |
# ...rest of your imports...
|
src/streamlit_app.py
CHANGED
|
@@ -4,7 +4,12 @@ import numpy as np
|
|
| 4 |
import torch
|
| 5 |
import os
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from peft import PeftModel
|
| 9 |
from search_final import rag_pipeline
|
| 10 |
|
|
|
|
| 4 |
import torch
|
| 5 |
import os
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
+
try:
|
| 8 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 9 |
+
except ImportError as e:
|
| 10 |
+
st.error(f"Error importing transformers: {e}")
|
| 11 |
+
st.error("Please ensure transformers library is properly installed.")
|
| 12 |
+
st.stop()
|
| 13 |
from peft import PeftModel
|
| 14 |
from search_final import rag_pipeline
|
| 15 |
|