Spaces:
Sleeping
Sleeping
| # 1. Mandatory SQLite fix for ChromaDB in Docker (MUST BE AT THE VERY TOP) | |
| try: | |
| __import__('pysqlite3') | |
| import sys | |
| sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
| except ImportError: | |
| pass | |
| import re | |
| import os | |
| import shutil | |
| import streamlit as st | |
| import chromadb # Added for EphemeralClient | |
| from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_core.messages import HumanMessage, SystemMessage | |
| # ----------------------------- | |
| # 1. Page Configuration + UI Styling | |
| # ----------------------------- | |
| st.set_page_config( | |
| page_title="AI Study Assistant for University Lecture Notes", | |
| page_icon="π", | |
| layout="wide", | |
| ) | |
| st.markdown(""" | |
| <style> | |
| .block-container { | |
| padding-top: 2rem; | |
| padding-bottom: 2rem; | |
| } | |
| .main-title { | |
| text-align: center; | |
| font-size: 42px; | |
| font-weight: 700; | |
| } | |
| .subtitle { | |
| text-align: center; | |
| font-size: 18px; | |
| color: #555; | |
| margin-bottom: 30px; | |
| } | |
| .stButton>button { | |
| width: 100%; | |
| border-radius: 12px; | |
| height: 3em; | |
| font-weight: 600; | |
| } | |
| .section-card { | |
| padding: 20px; | |
| border-radius: 15px; | |
| background-color: #f8f9fb; | |
| box-shadow: 0 4px 10px rgba(0,0,0,0.05); | |
| margin-bottom: 20px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.markdown("<div class='main-title'>π AI Study Assistant for University Lecture Notest</div>", unsafe_allow_html=True) | |
| st.markdown("<div class='subtitle'></div>", unsafe_allow_html=True) | |
| st.markdown("---") | |
| token = os.environ.get("HUGGINGFACEHUB_API_TOKEN2") | |
| # ----------------------------- | |
| # 2. RAG Logic | |
| # ----------------------------- | |
| def process_lecture_pdf(uploaded_file): | |
| # Save the uploaded file temporarily | |
| temp_path = os.path.join("/tmp", uploaded_file.name) | |
| with open(temp_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| try: | |
| # Load and split PDF | |
| loader = PyPDFLoader(temp_path) | |
| docs = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100) | |
| chunks = text_splitter.split_documents(docs) | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| # --- FIX: Use In-Memory Client --- | |
| # This prevents the "readonly database" error (Code 1032) by not using the disk | |
| client = chromadb.EphemeralClient() | |
| vectorstore = Chroma.from_documents( | |
| documents=chunks, | |
| embedding=embeddings, | |
| client=client | |
| ) | |
| return vectorstore.as_retriever(search_kwargs={"k": 3}), docs | |
| finally: | |
| # Cleanup: Remove the temp PDF file after processing | |
| if os.path.exists(temp_path): | |
| os.remove(temp_path) | |
| # ----------------------------- | |
| # 3. Model Setup | |
| # ----------------------------- | |
| # Ensure the token exists before initializing | |
| if not token: | |
| st.error("HUGGINGFACEHUB_API_TOKEN2 is not set in environment variables.") | |
| st.stop() | |
| llm_endpoint = HuggingFaceEndpoint( | |
| repo_id="meta-llama/Meta-Llama-3-8B-Instruct", | |
| task="conversational", | |
| huggingfacehub_api_token=token, | |
| max_new_tokens=1024, | |
| temperature=0.6 | |
| ) | |
| chat_llm = ChatHuggingFace(llm=llm_endpoint) | |
| # ----------------------------- | |
| # 4. User Interface | |
| # ----------------------------- | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| st.header("π Upload Notes") | |
| uploaded_file = st.file_uploader("Upload Lecture PDF", type="pdf") | |
| if uploaded_file: | |
| # Only process if it's a new file | |
| if 'last_file' not in st.session_state or st.session_state.last_file != uploaded_file.name: | |
| with st.spinner("Analyzing PDF with Llama 3..."): | |
| retriever, full_docs = process_lecture_pdf(uploaded_file) | |
| st.session_state.retriever = retriever | |
| st.session_state.full_text = "\n".join([d.page_content for d in full_docs]) | |
| st.session_state.last_file = uploaded_file.name | |
| st.success("Ready to study!") | |
| st.header("π Summarize") | |
| if st.button("Generate Summary"): | |
| if 'full_text' in st.session_state: | |
| with st.spinner("Llama 3 is summarizing..."): | |
| messages = [ | |
| SystemMessage(content="You are a helpful university teaching assistant. Summarize the following text clearly."), | |
| HumanMessage(content=f"Notes: {st.session_state.full_text[:4000]}") | |
| ] | |
| response = chat_llm.invoke(messages) | |
| st.write(response.content) | |
| else: | |
| st.warning("Please upload a PDF first.") | |
| with col2: | |
| st.header("π¬ Ask Questions") | |
| with st.form("qa_form"): | |
| user_query = st.text_input("What would you like to know about your lecture?") | |
| submit_button = st.form_submit_button("Ask Question") | |
| if submit_button: | |
| if not user_query: | |
| st.error("Please enter a question.") | |
| elif 'retriever' in st.session_state: | |
| with st.spinner("Llama 3 is searching for the answer..."): | |
| context_docs = st.session_state.retriever.invoke(user_query) | |
| context_text = "\n\n".join([doc.page_content for doc in context_docs]) | |
| messages = [ | |
| SystemMessage(content="Use the provided context to answer the student's question accurately."), | |
| HumanMessage(content=f"Context: {context_text}\n\nQuestion: {user_query}") | |
| ] | |
| response = chat_llm.invoke(messages) | |
| st.markdown("### Answer") | |
| st.info(response.content) | |
| with st.expander("View Source Context"): | |
| st.write(context_text) | |
| else: | |
| st.warning("Upload a PDF to start.") |