# 1. Mandatory SQLite fix for ChromaDB in Docker (MUST BE AT THE VERY TOP) try: __import__('pysqlite3') import sys sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') except ImportError: pass import re import os import shutil import streamlit as st import chromadb # Added for EphemeralClient from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_community.document_loaders import PyPDFLoader from langchain_core.messages import HumanMessage, SystemMessage # ----------------------------- # 1. Page Configuration + UI Styling # ----------------------------- st.set_page_config( page_title="AI Study Assistant for University Lecture Notes", page_icon="🎓", layout="wide", ) st.markdown(""" """, unsafe_allow_html=True) st.markdown("
🎓 AI Study Assistant for University Lecture Notest
", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) st.markdown("---") token = os.environ.get("HUGGINGFACEHUB_API_TOKEN2") # ----------------------------- # 2. RAG Logic # ----------------------------- def process_lecture_pdf(uploaded_file): # Save the uploaded file temporarily temp_path = os.path.join("/tmp", uploaded_file.name) with open(temp_path, "wb") as f: f.write(uploaded_file.getbuffer()) try: # Load and split PDF loader = PyPDFLoader(temp_path) docs = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100) chunks = text_splitter.split_documents(docs) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # --- FIX: Use In-Memory Client --- # This prevents the "readonly database" error (Code 1032) by not using the disk client = chromadb.EphemeralClient() vectorstore = Chroma.from_documents( documents=chunks, embedding=embeddings, client=client ) return vectorstore.as_retriever(search_kwargs={"k": 3}), docs finally: # Cleanup: Remove the temp PDF file after processing if os.path.exists(temp_path): os.remove(temp_path) # ----------------------------- # 3. Model Setup # ----------------------------- # Ensure the token exists before initializing if not token: st.error("HUGGINGFACEHUB_API_TOKEN2 is not set in environment variables.") st.stop() llm_endpoint = HuggingFaceEndpoint( repo_id="meta-llama/Meta-Llama-3-8B-Instruct", task="conversational", huggingfacehub_api_token=token, max_new_tokens=1024, temperature=0.6 ) chat_llm = ChatHuggingFace(llm=llm_endpoint) # ----------------------------- # 4. User Interface # ----------------------------- col1, col2 = st.columns([1, 2]) with col1: st.header("📂 Upload Notes") uploaded_file = st.file_uploader("Upload Lecture PDF", type="pdf") if uploaded_file: # Only process if it's a new file if 'last_file' not in st.session_state or st.session_state.last_file != uploaded_file.name: with st.spinner("Analyzing PDF with Llama 3..."): retriever, full_docs = process_lecture_pdf(uploaded_file) st.session_state.retriever = retriever st.session_state.full_text = "\n".join([d.page_content for d in full_docs]) st.session_state.last_file = uploaded_file.name st.success("Ready to study!") st.header("📝 Summarize") if st.button("Generate Summary"): if 'full_text' in st.session_state: with st.spinner("Llama 3 is summarizing..."): messages = [ SystemMessage(content="You are a helpful university teaching assistant. Summarize the following text clearly."), HumanMessage(content=f"Notes: {st.session_state.full_text[:4000]}") ] response = chat_llm.invoke(messages) st.write(response.content) else: st.warning("Please upload a PDF first.") with col2: st.header("💬 Ask Questions") with st.form("qa_form"): user_query = st.text_input("What would you like to know about your lecture?") submit_button = st.form_submit_button("Ask Question") if submit_button: if not user_query: st.error("Please enter a question.") elif 'retriever' in st.session_state: with st.spinner("Llama 3 is searching for the answer..."): context_docs = st.session_state.retriever.invoke(user_query) context_text = "\n\n".join([doc.page_content for doc in context_docs]) messages = [ SystemMessage(content="Use the provided context to answer the student's question accurately."), HumanMessage(content=f"Context: {context_text}\n\nQuestion: {user_query}") ] response = chat_llm.invoke(messages) st.markdown("### Answer") st.info(response.content) with st.expander("View Source Context"): st.write(context_text) else: st.warning("Upload a PDF to start.")