ai-assistent-project / src /streamlit_app.py
Manar11's picture
Update src/streamlit_app.py
45b170d verified
# 1. Mandatory SQLite fix for ChromaDB in Docker (MUST BE AT THE VERY TOP)
try:
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
except ImportError:
pass
import re
import os
import shutil
import streamlit as st
import chromadb # Added for EphemeralClient
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.messages import HumanMessage, SystemMessage
# -----------------------------
# 1. Page Configuration + UI Styling
# -----------------------------
st.set_page_config(
page_title="AI Study Assistant for University Lecture Notes",
page_icon="πŸŽ“",
layout="wide",
)
st.markdown("""
<style>
.block-container {
padding-top: 2rem;
padding-bottom: 2rem;
}
.main-title {
text-align: center;
font-size: 42px;
font-weight: 700;
}
.subtitle {
text-align: center;
font-size: 18px;
color: #555;
margin-bottom: 30px;
}
.stButton>button {
width: 100%;
border-radius: 12px;
height: 3em;
font-weight: 600;
}
.section-card {
padding: 20px;
border-radius: 15px;
background-color: #f8f9fb;
box-shadow: 0 4px 10px rgba(0,0,0,0.05);
margin-bottom: 20px;
}
</style>
""", unsafe_allow_html=True)
st.markdown("<div class='main-title'>πŸŽ“ AI Study Assistant for University Lecture Notest</div>", unsafe_allow_html=True)
st.markdown("<div class='subtitle'></div>", unsafe_allow_html=True)
st.markdown("---")
token = os.environ.get("HUGGINGFACEHUB_API_TOKEN2")
# -----------------------------
# 2. RAG Logic
# -----------------------------
def process_lecture_pdf(uploaded_file):
# Save the uploaded file temporarily
temp_path = os.path.join("/tmp", uploaded_file.name)
with open(temp_path, "wb") as f:
f.write(uploaded_file.getbuffer())
try:
# Load and split PDF
loader = PyPDFLoader(temp_path)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# --- FIX: Use In-Memory Client ---
# This prevents the "readonly database" error (Code 1032) by not using the disk
client = chromadb.EphemeralClient()
vectorstore = Chroma.from_documents(
documents=chunks,
embedding=embeddings,
client=client
)
return vectorstore.as_retriever(search_kwargs={"k": 3}), docs
finally:
# Cleanup: Remove the temp PDF file after processing
if os.path.exists(temp_path):
os.remove(temp_path)
# -----------------------------
# 3. Model Setup
# -----------------------------
# Ensure the token exists before initializing
if not token:
st.error("HUGGINGFACEHUB_API_TOKEN2 is not set in environment variables.")
st.stop()
llm_endpoint = HuggingFaceEndpoint(
repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
task="conversational",
huggingfacehub_api_token=token,
max_new_tokens=1024,
temperature=0.6
)
chat_llm = ChatHuggingFace(llm=llm_endpoint)
# -----------------------------
# 4. User Interface
# -----------------------------
col1, col2 = st.columns([1, 2])
with col1:
st.header("πŸ“‚ Upload Notes")
uploaded_file = st.file_uploader("Upload Lecture PDF", type="pdf")
if uploaded_file:
# Only process if it's a new file
if 'last_file' not in st.session_state or st.session_state.last_file != uploaded_file.name:
with st.spinner("Analyzing PDF with Llama 3..."):
retriever, full_docs = process_lecture_pdf(uploaded_file)
st.session_state.retriever = retriever
st.session_state.full_text = "\n".join([d.page_content for d in full_docs])
st.session_state.last_file = uploaded_file.name
st.success("Ready to study!")
st.header("πŸ“ Summarize")
if st.button("Generate Summary"):
if 'full_text' in st.session_state:
with st.spinner("Llama 3 is summarizing..."):
messages = [
SystemMessage(content="You are a helpful university teaching assistant. Summarize the following text clearly."),
HumanMessage(content=f"Notes: {st.session_state.full_text[:4000]}")
]
response = chat_llm.invoke(messages)
st.write(response.content)
else:
st.warning("Please upload a PDF first.")
with col2:
st.header("πŸ’¬ Ask Questions")
with st.form("qa_form"):
user_query = st.text_input("What would you like to know about your lecture?")
submit_button = st.form_submit_button("Ask Question")
if submit_button:
if not user_query:
st.error("Please enter a question.")
elif 'retriever' in st.session_state:
with st.spinner("Llama 3 is searching for the answer..."):
context_docs = st.session_state.retriever.invoke(user_query)
context_text = "\n\n".join([doc.page_content for doc in context_docs])
messages = [
SystemMessage(content="Use the provided context to answer the student's question accurately."),
HumanMessage(content=f"Context: {context_text}\n\nQuestion: {user_query}")
]
response = chat_llm.invoke(messages)
st.markdown("### Answer")
st.info(response.content)
with st.expander("View Source Context"):
st.write(context_text)
else:
st.warning("Upload a PDF to start.")