mariaanwer commited on
Commit
82d3171
·
verified ·
1 Parent(s): cc1f05b

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +33 -18
src/streamlit_app.py CHANGED
@@ -10,6 +10,7 @@ import re
10
  import os
11
  import shutil
12
  import streamlit as st
 
13
  from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace
14
  from langchain_text_splitters import RecursiveCharacterTextSplitter
15
  from langchain_community.vectorstores import Chroma
@@ -29,31 +30,45 @@ token = os.environ.get("HUGGINGFACEHUB_API_TOKEN2")
29
  # 2. RAG Logic
30
  # -----------------------------
31
  def process_lecture_pdf(uploaded_file):
 
32
  temp_path = os.path.join("/tmp", uploaded_file.name)
33
  with open(temp_path, "wb") as f:
34
  f.write(uploaded_file.getbuffer())
35
 
36
- loader = PyPDFLoader(temp_path)
37
- docs = loader.load()
38
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
39
- chunks = text_splitter.split_documents(docs)
40
-
41
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
42
-
43
- db_path = "/tmp/chroma_db"
44
- if os.path.exists(db_path):
45
- shutil.rmtree(db_path)
 
 
46
 
47
- vectorstore = Chroma.from_documents(
48
- documents=chunks,
49
- embedding=embeddings,
50
- persist_directory=db_path
51
- )
52
- return vectorstore.as_retriever(search_kwargs={"k": 3}), docs
 
 
 
 
 
 
53
 
54
  # -----------------------------
55
  # 3. Model Setup
56
  # -----------------------------
 
 
 
 
 
57
  llm_endpoint = HuggingFaceEndpoint(
58
  repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
59
  task="conversational",
@@ -73,7 +88,8 @@ with col1:
73
  uploaded_file = st.file_uploader("Upload Lecture PDF", type="pdf")
74
 
75
  if uploaded_file:
76
- if 'retriever' not in st.session_state or st.session_state.get('last_file') != uploaded_file.name:
 
77
  with st.spinner("Analyzing PDF with Llama 3..."):
78
  retriever, full_docs = process_lecture_pdf(uploaded_file)
79
  st.session_state.retriever = retriever
@@ -97,7 +113,6 @@ with col1:
97
  with col2:
98
  st.header("💬 Ask Questions")
99
 
100
- # UI Update: Using a form for the Q&A section
101
  with st.form("qa_form"):
102
  user_query = st.text_input("What would you like to know about your lecture?")
103
  submit_button = st.form_submit_button("Ask Question")
 
10
  import os
11
  import shutil
12
  import streamlit as st
13
+ import chromadb # Added for EphemeralClient
14
  from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace
15
  from langchain_text_splitters import RecursiveCharacterTextSplitter
16
  from langchain_community.vectorstores import Chroma
 
30
  # 2. RAG Logic
31
  # -----------------------------
32
  def process_lecture_pdf(uploaded_file):
33
+ # Save the uploaded file temporarily
34
  temp_path = os.path.join("/tmp", uploaded_file.name)
35
  with open(temp_path, "wb") as f:
36
  f.write(uploaded_file.getbuffer())
37
 
38
+ try:
39
+ # Load and split PDF
40
+ loader = PyPDFLoader(temp_path)
41
+ docs = loader.load()
42
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
43
+ chunks = text_splitter.split_documents(docs)
44
+
45
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
46
+
47
+ # --- FIX: Use In-Memory Client ---
48
+ # This prevents the "readonly database" error (Code 1032) by not using the disk
49
+ client = chromadb.EphemeralClient()
50
 
51
+ vectorstore = Chroma.from_documents(
52
+ documents=chunks,
53
+ embedding=embeddings,
54
+ client=client
55
+ )
56
+
57
+ return vectorstore.as_retriever(search_kwargs={"k": 3}), docs
58
+
59
+ finally:
60
+ # Cleanup: Remove the temp PDF file after processing
61
+ if os.path.exists(temp_path):
62
+ os.remove(temp_path)
63
 
64
  # -----------------------------
65
  # 3. Model Setup
66
  # -----------------------------
67
+ # Ensure the token exists before initializing
68
+ if not token:
69
+ st.error("HUGGINGFACEHUB_API_TOKEN2 is not set in environment variables.")
70
+ st.stop()
71
+
72
  llm_endpoint = HuggingFaceEndpoint(
73
  repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
74
  task="conversational",
 
88
  uploaded_file = st.file_uploader("Upload Lecture PDF", type="pdf")
89
 
90
  if uploaded_file:
91
+ # Only process if it's a new file
92
+ if 'last_file' not in st.session_state or st.session_state.last_file != uploaded_file.name:
93
  with st.spinner("Analyzing PDF with Llama 3..."):
94
  retriever, full_docs = process_lecture_pdf(uploaded_file)
95
  st.session_state.retriever = retriever
 
113
  with col2:
114
  st.header("💬 Ask Questions")
115
 
 
116
  with st.form("qa_form"):
117
  user_query = st.text_input("What would you like to know about your lecture?")
118
  submit_button = st.form_submit_button("Ask Question")