Spaces:

nivakaran
/

Portfolio-Chatbot

Runtime error

App Files Files Community

nivakaran commited on Aug 8, 2025

Commit

d68a3d3

verified ·

1 Parent(s): 388931a

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +20 -23

src/streamlit_app.py CHANGED Viewed

@@ -15,48 +15,46 @@ from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_chroma import Chroma
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Set up proper cache directories for HuggingFace Spaces
 def setup_environment():
-    # Create cache directories in a writable location
-    cache_dir = Path("/tmp/cache")  # Using /tmp which is writable in HuggingFace Spaces
     cache_dir.mkdir(exist_ok=True)
-    # Set environment variables
-    os.environ['STREAMLIT_HOME'] = str(cache_dir / "streamlit")
     os.environ['HF_HOME'] = str(cache_dir / "huggingface")
-    os.environ['TRANSFORMERS_CACHE'] = str(cache_dir / "transformers")
-    os.environ['XDG_CACHE_HOME'] = str(cache_dir)
-    # Ensure subdirectories exist
-    (cache_dir / "huggingface").mkdir(exist_ok=True)
-    (cache_dir / "streamlit").mkdir(exist_ok=True)
-    (cache_dir / "transformers").mkdir(exist_ok=True)
 setup_environment()
 # Load environment variables
 load_dotenv()
-HF_TOKEN = os.getenv("HF_TOKEN")
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-PDF_PATH = os.getenv("PDF_PATH", "./nivakaran.pdf")
 # Validate environment variables
-if not all([HF_TOKEN, GROQ_API_KEY, PDF_PATH]):
     st.error("Missing required environment variables")
     st.stop()
-# Initialize RAG components with proper cache handling
 try:
     embeddings = HuggingFaceEmbeddings(
         model_name="sentence-transformers/all-MiniLM-L6-v2",
         model_kwargs={'device': 'cpu'},
-        encode_kwargs={'normalize_embeddings': True},
-        cache_folder=os.environ['HF_HOME']
     )
 except Exception as e:
     logger.error(f"Failed to initialize embeddings: {str(e)}")
@@ -64,7 +62,6 @@ except Exception as e:
     st.stop()
 llm = ChatGroq(model_name="Deepseek-R1-Distill-Llama-70b", temperature=0.1)
-session_store = {}
 # Process PDF into vectorstore
 def process_pdf(file_path: str):
@@ -74,12 +71,10 @@ def process_pdf(file_path: str):
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
         splits = text_splitter.split_documents(documents)
-        # Use temporary directory for Chroma DB
-        chroma_dir = "/tmp/chroma_db"
         vectorstore = Chroma.from_documents(
             documents=splits,
             embedding=embeddings,
-            persist_directory=chroma_dir
         )
         logger.info(f"PDF {file_path} processed successfully")
         return vectorstore
@@ -97,6 +92,8 @@ except Exception as e:
     st.error("Failed to initialize document store. Please try again later.")
     st.stop()
 # System prompt for the assistant
 system_prompt = """You are Max, a friendly and professional chatbot designed to
 assist visitors to Nivakaran's portfolio website. Your primary goal

 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_chroma import Chroma
+import torch
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Set up proper cache directories
 def setup_environment():
+    cache_dir = Path("/tmp/cache")
     cache_dir.mkdir(exist_ok=True)
     os.environ['HF_HOME'] = str(cache_dir / "huggingface")
+    os.environ['STREAMLIT_HOME'] = str(cache_dir / "streamlit")
 setup_environment()
 # Load environment variables
 load_dotenv()
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+PDF_PATH = os.getenv("PDF_PATH", "nivakaran.pdf")  # Changed to direct filename
 # Validate environment variables
+if not all([GROQ_API_KEY]):
     st.error("Missing required environment variables")
     st.stop()
+# Verify PDF exists
+if not Path(PDF_PATH).exists():
+    st.error(f"PDF file not found at: {PDF_PATH}")
+    st.stop()
+# Initialize RAG components with proper device handling
 try:
+    # Force CPU and disable metal for sentence-transformers
+    os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
+    os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0'
     embeddings = HuggingFaceEmbeddings(
         model_name="sentence-transformers/all-MiniLM-L6-v2",
         model_kwargs={'device': 'cpu'},
+        encode_kwargs={'normalize_embeddings': True}
     )
 except Exception as e:
     logger.error(f"Failed to initialize embeddings: {str(e)}")
     st.stop()
 llm = ChatGroq(model_name="Deepseek-R1-Distill-Llama-70b", temperature=0.1)
 # Process PDF into vectorstore
 def process_pdf(file_path: str):
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
         splits = text_splitter.split_documents(documents)
         vectorstore = Chroma.from_documents(
             documents=splits,
             embedding=embeddings,
+            persist_directory="/tmp/chroma_db"
         )
         logger.info(f"PDF {file_path} processed successfully")
         return vectorstore
     st.error("Failed to initialize document store. Please try again later.")
     st.stop()
+# [Rest of your existing Streamlit UI code remains the same...]
 # System prompt for the assistant
 system_prompt = """You are Max, a friendly and professional chatbot designed to
 assist visitors to Nivakaran's portfolio website. Your primary goal