ai-assistent-project

Sleeping

File size: 6,052 Bytes

# 1. Mandatory SQLite fix for ChromaDB in Docker (MUST BE AT THE VERY TOP)
try:
    __import__('pysqlite3')
    import sys
    sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
except ImportError:
    pass

import re
import os
import shutil
import streamlit as st
import chromadb  # Added for EphemeralClient
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.messages import HumanMessage, SystemMessage

# -----------------------------
# 1. Page Configuration + UI Styling
# -----------------------------
st.set_page_config(
    page_title="AI Study Assistant for University Lecture Notes",
    page_icon="🎓",
    layout="wide",
)

st.markdown("""
<style>
.block-container {
    padding-top: 2rem;
    padding-bottom: 2rem;
}

.main-title {
    text-align: center;
    font-size: 42px;
    font-weight: 700;
}

.subtitle {
    text-align: center;
    font-size: 18px;
    color: #555;
    margin-bottom: 30px;
}

.stButton>button {
    width: 100%;
    border-radius: 12px;
    height: 3em;
    font-weight: 600;
}

.section-card {
    padding: 20px;
    border-radius: 15px;
    background-color: #f8f9fb;
    box-shadow: 0 4px 10px rgba(0,0,0,0.05);
    margin-bottom: 20px;
}
</style>
""", unsafe_allow_html=True)

st.markdown("<div class='main-title'>🎓 AI Study Assistant for University Lecture Notest</div>", unsafe_allow_html=True)
st.markdown("<div class='subtitle'></div>", unsafe_allow_html=True)
st.markdown("---")

token = os.environ.get("HUGGINGFACEHUB_API_TOKEN2")

# -----------------------------
# 2. RAG Logic
# -----------------------------
def process_lecture_pdf(uploaded_file):
    # Save the uploaded file temporarily
    temp_path = os.path.join("/tmp", uploaded_file.name)
    with open(temp_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    
    try:
        # Load and split PDF
        loader = PyPDFLoader(temp_path)
        docs = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
        chunks = text_splitter.split_documents(docs)
        
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        
        # --- FIX: Use In-Memory Client ---
        # This prevents the "readonly database" error (Code 1032) by not using the disk
        client = chromadb.EphemeralClient()
        
        vectorstore = Chroma.from_documents(
            documents=chunks,
            embedding=embeddings,
            client=client
        )
        
        return vectorstore.as_retriever(search_kwargs={"k": 3}), docs

    finally:
        # Cleanup: Remove the temp PDF file after processing
        if os.path.exists(temp_path):
            os.remove(temp_path)

# -----------------------------
# 3. Model Setup
# -----------------------------
# Ensure the token exists before initializing
if not token:
    st.error("HUGGINGFACEHUB_API_TOKEN2 is not set in environment variables.")
    st.stop()

llm_endpoint = HuggingFaceEndpoint(
    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
    task="conversational",
    huggingfacehub_api_token=token,
    max_new_tokens=1024,
    temperature=0.6
)
chat_llm = ChatHuggingFace(llm=llm_endpoint)

# -----------------------------
# 4. User Interface
# -----------------------------
col1, col2 = st.columns([1, 2])

with col1:
    st.header("📂 Upload Notes")
    uploaded_file = st.file_uploader("Upload Lecture PDF", type="pdf")
    
    if uploaded_file:
        # Only process if it's a new file
        if 'last_file' not in st.session_state or st.session_state.last_file != uploaded_file.name:
            with st.spinner("Analyzing PDF with Llama 3..."):
                retriever, full_docs = process_lecture_pdf(uploaded_file)
                st.session_state.retriever = retriever
                st.session_state.full_text = "\n".join([d.page_content for d in full_docs])
                st.session_state.last_file = uploaded_file.name
            st.success("Ready to study!")

    st.header("📝 Summarize")
    if st.button("Generate Summary"):
        if 'full_text' in st.session_state:
            with st.spinner("Llama 3 is summarizing..."):
                messages = [
                    SystemMessage(content="You are a helpful university teaching assistant. Summarize the following text clearly."),
                    HumanMessage(content=f"Notes: {st.session_state.full_text[:4000]}")
                ]
                response = chat_llm.invoke(messages)
                st.write(response.content)
        else:
            st.warning("Please upload a PDF first.")

with col2:
    st.header("💬 Ask Questions")
    
    with st.form("qa_form"):
        user_query = st.text_input("What would you like to know about your lecture?")
        submit_button = st.form_submit_button("Ask Question")
    
    if submit_button:
        if not user_query:
            st.error("Please enter a question.")
        elif 'retriever' in st.session_state:
            with st.spinner("Llama 3 is searching for the answer..."):
                context_docs = st.session_state.retriever.invoke(user_query)
                context_text = "\n\n".join([doc.page_content for doc in context_docs])
                
                messages = [
                    SystemMessage(content="Use the provided context to answer the student's question accurately."),
                    HumanMessage(content=f"Context: {context_text}\n\nQuestion: {user_query}")
                ]
                response = chat_llm.invoke(messages)
                
                st.markdown("### Answer")
                st.info(response.content)
                
                with st.expander("View Source Context"):
                    st.write(context_text)
        else:
            st.warning("Upload a PDF to start.")