ai-assistent-project

Sleeping

App Files Files Community

mariaanwer commited on Feb 28

Commit

82d3171

verified ·

1 Parent(s): cc1f05b

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +33 -18

src/streamlit_app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import re
 import os
 import shutil
 import streamlit as st
 from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
@@ -29,31 +30,45 @@ token = os.environ.get("HUGGINGFACEHUB_API_TOKEN2")
 # 2. RAG Logic
 # -----------------------------
 def process_lecture_pdf(uploaded_file):
     temp_path = os.path.join("/tmp", uploaded_file.name)
     with open(temp_path, "wb") as f:
         f.write(uploaded_file.getbuffer())
-    loader = PyPDFLoader(temp_path)
-    docs = loader.load()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
-    chunks = text_splitter.split_documents(docs)
-    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-    db_path = "/tmp/chroma_db"
-    if os.path.exists(db_path):
-        shutil.rmtree(db_path)
-    vectorstore = Chroma.from_documents(
-        documents=chunks,
-        embedding=embeddings,
-        persist_directory=db_path
-    )
-    return vectorstore.as_retriever(search_kwargs={"k": 3}), docs
 # -----------------------------
 # 3. Model Setup
 # -----------------------------
 llm_endpoint = HuggingFaceEndpoint(
     repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
     task="conversational",
@@ -73,7 +88,8 @@ with col1:
     uploaded_file = st.file_uploader("Upload Lecture PDF", type="pdf")
     if uploaded_file:
-        if 'retriever' not in st.session_state or st.session_state.get('last_file') != uploaded_file.name:
             with st.spinner("Analyzing PDF with Llama 3..."):
                 retriever, full_docs = process_lecture_pdf(uploaded_file)
                 st.session_state.retriever = retriever
@@ -97,7 +113,6 @@ with col1:
 with col2:
     st.header("💬 Ask Questions")
-    # UI Update: Using a form for the Q&A section
     with st.form("qa_form"):
         user_query = st.text_input("What would you like to know about your lecture?")
         submit_button = st.form_submit_button("Ask Question")

 import os
 import shutil
 import streamlit as st
+import chromadb  # Added for EphemeralClient
 from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 # 2. RAG Logic
 # -----------------------------
 def process_lecture_pdf(uploaded_file):
+    # Save the uploaded file temporarily
     temp_path = os.path.join("/tmp", uploaded_file.name)
     with open(temp_path, "wb") as f:
         f.write(uploaded_file.getbuffer())
+    try:
+        # Load and split PDF
+        loader = PyPDFLoader(temp_path)
+        docs = loader.load()
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
+        chunks = text_splitter.split_documents(docs)
+        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        # --- FIX: Use In-Memory Client ---
+        # This prevents the "readonly database" error (Code 1032) by not using the disk
+        client = chromadb.EphemeralClient()
+        vectorstore = Chroma.from_documents(
+            documents=chunks,
+            embedding=embeddings,
+            client=client
+        )
+        return vectorstore.as_retriever(search_kwargs={"k": 3}), docs
+    finally:
+        # Cleanup: Remove the temp PDF file after processing
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
 # -----------------------------
 # 3. Model Setup
 # -----------------------------
+# Ensure the token exists before initializing
+if not token:
+    st.error("HUGGINGFACEHUB_API_TOKEN2 is not set in environment variables.")
+    st.stop()
 llm_endpoint = HuggingFaceEndpoint(
     repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
     task="conversational",
     uploaded_file = st.file_uploader("Upload Lecture PDF", type="pdf")
     if uploaded_file:
+        # Only process if it's a new file
+        if 'last_file' not in st.session_state or st.session_state.last_file != uploaded_file.name:
             with st.spinner("Analyzing PDF with Llama 3..."):
                 retriever, full_docs = process_lecture_pdf(uploaded_file)
                 st.session_state.retriever = retriever
 with col2:
     st.header("💬 Ask Questions")
     with st.form("qa_form"):
         user_query = st.text_input("What would you like to know about your lecture?")
         submit_button = st.form_submit_button("Ask Question")