ai-assistent-project

Sleeping

App Files Files Community

ai-assistent-project / src /streamlit_app.py

Manar11

Update src/streamlit_app.py

45b170d verified 15 days ago

raw

history blame contribute delete

6.05 kB

	# 1. Mandatory SQLite fix for ChromaDB in Docker (MUST BE AT THE VERY TOP)
	try:
	__import__('pysqlite3')
	import sys
	sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
	except ImportError:
	pass

	import re
	import os
	import shutil
	import streamlit as st
	import chromadb # Added for EphemeralClient
	from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import Chroma
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_core.messages import HumanMessage, SystemMessage

	# -----------------------------
	# 1. Page Configuration + UI Styling
	# -----------------------------
	st.set_page_config(
	page_title="AI Study Assistant for University Lecture Notes",
	page_icon="🎓",
	layout="wide",
	)

	st.markdown("""
	<style>
	.block-container {
	padding-top: 2rem;
	padding-bottom: 2rem;
	}

	.main-title {
	text-align: center;
	font-size: 42px;
	font-weight: 700;
	}

	.subtitle {
	text-align: center;
	font-size: 18px;
	color: #555;
	margin-bottom: 30px;
	}

	.stButton>button {
	width: 100%;
	border-radius: 12px;
	height: 3em;
	font-weight: 600;
	}

	.section-card {
	padding: 20px;
	border-radius: 15px;
	background-color: #f8f9fb;
	box-shadow: 0 4px 10px rgba(0,0,0,0.05);
	margin-bottom: 20px;
	}
	</style>
	""", unsafe_allow_html=True)

	st.markdown("<div class='main-title'>🎓 AI Study Assistant for University Lecture Notest</div>", unsafe_allow_html=True)
	st.markdown("<div class='subtitle'></div>", unsafe_allow_html=True)
	st.markdown("---")

	token = os.environ.get("HUGGINGFACEHUB_API_TOKEN2")

	# -----------------------------
	# 2. RAG Logic
	# -----------------------------
	def process_lecture_pdf(uploaded_file):
	# Save the uploaded file temporarily
	temp_path = os.path.join("/tmp", uploaded_file.name)
	with open(temp_path, "wb") as f:
	f.write(uploaded_file.getbuffer())

	try:
	# Load and split PDF
	loader = PyPDFLoader(temp_path)
	docs = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
	chunks = text_splitter.split_documents(docs)

	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

	# --- FIX: Use In-Memory Client ---
	# This prevents the "readonly database" error (Code 1032) by not using the disk
	client = chromadb.EphemeralClient()

	vectorstore = Chroma.from_documents(
	documents=chunks,
	embedding=embeddings,
	client=client
	)

	return vectorstore.as_retriever(search_kwargs={"k": 3}), docs

	finally:
	# Cleanup: Remove the temp PDF file after processing
	if os.path.exists(temp_path):
	os.remove(temp_path)

	# -----------------------------
	# 3. Model Setup
	# -----------------------------
	# Ensure the token exists before initializing
	if not token:
	st.error("HUGGINGFACEHUB_API_TOKEN2 is not set in environment variables.")
	st.stop()

	llm_endpoint = HuggingFaceEndpoint(
	repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
	task="conversational",
	huggingfacehub_api_token=token,
	max_new_tokens=1024,
	temperature=0.6
	)
	chat_llm = ChatHuggingFace(llm=llm_endpoint)

	# -----------------------------
	# 4. User Interface
	# -----------------------------
	col1, col2 = st.columns([1, 2])

	with col1:
	st.header("📂 Upload Notes")
	uploaded_file = st.file_uploader("Upload Lecture PDF", type="pdf")

	if uploaded_file:
	# Only process if it's a new file
	if 'last_file' not in st.session_state or st.session_state.last_file != uploaded_file.name:
	with st.spinner("Analyzing PDF with Llama 3..."):
	retriever, full_docs = process_lecture_pdf(uploaded_file)
	st.session_state.retriever = retriever
	st.session_state.full_text = "\n".join([d.page_content for d in full_docs])
	st.session_state.last_file = uploaded_file.name
	st.success("Ready to study!")

	st.header("📝 Summarize")
	if st.button("Generate Summary"):
	if 'full_text' in st.session_state:
	with st.spinner("Llama 3 is summarizing..."):
	messages = [
	SystemMessage(content="You are a helpful university teaching assistant. Summarize the following text clearly."),
	HumanMessage(content=f"Notes: {st.session_state.full_text[:4000]}")
	]
	response = chat_llm.invoke(messages)
	st.write(response.content)
	else:
	st.warning("Please upload a PDF first.")

	with col2:
	st.header("💬 Ask Questions")

	with st.form("qa_form"):
	user_query = st.text_input("What would you like to know about your lecture?")
	submit_button = st.form_submit_button("Ask Question")

	if submit_button:
	if not user_query:
	st.error("Please enter a question.")
	elif 'retriever' in st.session_state:
	with st.spinner("Llama 3 is searching for the answer..."):
	context_docs = st.session_state.retriever.invoke(user_query)
	context_text = "\n\n".join([doc.page_content for doc in context_docs])

	messages = [
	SystemMessage(content="Use the provided context to answer the student's question accurately."),
	HumanMessage(content=f"Context: {context_text}\n\nQuestion: {user_query}")
	]
	response = chat_llm.invoke(messages)

	st.markdown("### Answer")
	st.info(response.content)

	with st.expander("View Source Context"):
	st.write(context_text)
	else:
	st.warning("Upload a PDF to start.")