Spaces:

Dinesh310
/

Multi_Pdf_Rag_chat

Sleeping

App Files Files Community

Multi_Pdf_Rag_chat / streamlit_app.py

Dinesh310

Update streamlit_app.py

3a8d617 verified 24 days ago

raw

history blame contribute delete

4.99 kB

	import streamlit as st
	import os
	import tempfile
	# from src.RAG_builder import ProjectRAGGraph # Ensure your graph class is in your_filename.py

	from src.rag_graph import ProjectRAGGraph

	# from src.graph.rag_graph import ProjectRAGGraph
	# --- Page Config ---
	st.set_page_config(page_title="Project Report Analyzer", layout="wide")
	st.title("📄 Project Report Analyzer")

	# --- Initialize Session State ---
	if "rag_graph" not in st.session_state:
	st.session_state.rag_graph = ProjectRAGGraph()
	if "messages" not in st.session_state:
	st.session_state.messages = []
	if "thread_id" not in st.session_state:
	st.session_state.thread_id = "default_user_1" # Hardcoded for demo, could be unique per session

	# --- Sidebar: File Upload ---
	with st.sidebar:
	st.header("Upload Documents")
	uploaded_files = st.file_uploader(
	"Upload Project PDFs",
	type="pdf",
	accept_multiple_files=True
	)

	process_button = st.button("Process Documents")

	if process_button and uploaded_files:
	with st.spinner("Processing PDFs..."):
	pdf_paths = []
	original_names = [] # <--- Add this
	for uploaded_file in uploaded_files:
	original_names.append(uploaded_file.name) # <--- Capture real name
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
	tmp.write(uploaded_file.getvalue())
	pdf_paths.append(tmp.name)

	# Pass BOTH the paths and the original names
	st.session_state.rag_graph.process_documents(
	pdf_paths,
	original_names=original_names
	)

	for path in pdf_paths:
	os.remove(path)
	st.success("Documents Indexed Successfully!")

	# if process_button and uploaded_files:
	# with st.spinner("Processing PDFs..."):
	# # Create temporary file paths to pass to your PDF Loader
	# pdf_paths = []
	# for uploaded_file in uploaded_files:
	# with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
	# tmp.write(uploaded_file.getvalue())
	# pdf_paths.append(tmp.name)

	# # Use your existing process_documents method
	# st.session_state.rag_graph.process_documents(pdf_paths)

	# # Clean up temp files
	# for path in pdf_paths:
	# os.remove(path)

	# st.success("Documents Indexed Successfully!")

	# --- Chat Interface ---
	# Display existing messages
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])
	if "citations" in message and message["citations"]:
	with st.expander("View Sources"):
	for doc in message["citations"]:
	st.caption(f"Source: {doc.metadata.get('source', 'Unknown')} - Page: {doc.metadata.get('page', 'N/A')}")
	st.write(f"_{doc.page_content[:200]}..._")

	# User Input
	if prompt := st.chat_input("Ask a question about your projects..."):
	# Check if vector store is ready
	if st.session_state.rag_graph.vector_store is None:
	st.error("Please upload and process documents first!")
	else:
	# Add user message to state
	st.session_state.messages.append({"role": "user", "content": prompt})
	with st.chat_message("user"):
	st.markdown(prompt)

	# Generate Response using the Graph
	with st.chat_message("assistant"):
	with st.spinner("Analyzing..."):
	# We need to call the graph. We'll modify the query return slightly to get citations
	config = {"configurable": {"thread_id": st.session_state.thread_id}}
	inputs = {"question": prompt}

	# Execute graph
	result = st.session_state.rag_graph.workflow.invoke(inputs, config=config)

	answer = result["answer"]
	context = result["context"] # These are the retrieved Document objects

	st.markdown(answer)

	# Citations section
	if context:
	with st.expander("View Sources"):
	for doc in context:
	source_name = os.path.basename(doc.metadata.get('source', 'Unknown'))
	page_num = doc.metadata.get('page', 0) + 1
	st.caption(f"📄 {source_name} (Page {page_num})")
	st.write(f"_{doc.page_content[:300]}..._")

	# Add assistant response to state
	st.session_state.messages.append({
	"role": "assistant",
	"content": answer,
	"citations": context
	})