Spaces:

Dinesh310
/

Multi_Pdf_Rag_chat

Sleeping

File size: 4,991 Bytes

import streamlit as st
import os
import tempfile
# from src.RAG_builder import ProjectRAGGraph # Ensure your graph class is in your_filename.py

from src.rag_graph import ProjectRAGGraph

# from src.graph.rag_graph import ProjectRAGGraph
# --- Page Config ---
st.set_page_config(page_title="Project Report Analyzer", layout="wide")
st.title("📄 Project Report Analyzer")

# --- Initialize Session State ---
if "rag_graph" not in st.session_state:
    st.session_state.rag_graph = ProjectRAGGraph()
if "messages" not in st.session_state:
    st.session_state.messages = []
if "thread_id" not in st.session_state:
    st.session_state.thread_id = "default_user_1" # Hardcoded for demo, could be unique per session

# --- Sidebar: File Upload ---
with st.sidebar:
    st.header("Upload Documents")
    uploaded_files = st.file_uploader(
        "Upload Project PDFs", 
        type="pdf", 
        accept_multiple_files=True
    )
    
    process_button = st.button("Process Documents")

    if process_button and uploaded_files:
        with st.spinner("Processing PDFs..."):
            pdf_paths = []
            original_names = [] # <--- Add this
            for uploaded_file in uploaded_files:
                original_names.append(uploaded_file.name) # <--- Capture real name
                with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
                    tmp.write(uploaded_file.getvalue())
                    pdf_paths.append(tmp.name)
            
            # Pass BOTH the paths and the original names
            st.session_state.rag_graph.process_documents(
                pdf_paths, 
                original_names=original_names
            )
            
            for path in pdf_paths:
                os.remove(path)
            st.success("Documents Indexed Successfully!")

    # if process_button and uploaded_files:
    #     with st.spinner("Processing PDFs..."):
    #         # Create temporary file paths to pass to your PDF Loader
    #         pdf_paths = []
    #         for uploaded_file in uploaded_files:
    #             with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
    #                 tmp.write(uploaded_file.getvalue())
    #                 pdf_paths.append(tmp.name)
            
    #         # Use your existing process_documents method
    #         st.session_state.rag_graph.process_documents(pdf_paths)
            
    #         # Clean up temp files
    #         for path in pdf_paths:
    #             os.remove(path)
                
    #         st.success("Documents Indexed Successfully!")

# --- Chat Interface ---
# Display existing messages
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])
        if "citations" in message and message["citations"]:
            with st.expander("View Sources"):
                for doc in message["citations"]:
                    st.caption(f"Source: {doc.metadata.get('source', 'Unknown')} - Page: {doc.metadata.get('page', 'N/A')}")
                    st.write(f"_{doc.page_content[:200]}..._")

# User Input
if prompt := st.chat_input("Ask a question about your projects..."):
    # Check if vector store is ready
    if st.session_state.rag_graph.vector_store is None:
        st.error("Please upload and process documents first!")
    else:
        # Add user message to state
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)

        # Generate Response using the Graph
        with st.chat_message("assistant"):
            with st.spinner("Analyzing..."):
                # We need to call the graph. We'll modify the query return slightly to get citations
                config = {"configurable": {"thread_id": st.session_state.thread_id}}
                inputs = {"question": prompt}
                
                # Execute graph
                result = st.session_state.rag_graph.workflow.invoke(inputs, config=config)
                
                answer = result["answer"]
                context = result["context"] # These are the retrieved Document objects

                st.markdown(answer)
                
                # Citations section
                if context:
                    with st.expander("View Sources"):
                        for doc in context:
                            source_name = os.path.basename(doc.metadata.get('source', 'Unknown'))
                            page_num = doc.metadata.get('page', 0) + 1
                            st.caption(f"📄 {source_name} (Page {page_num})")
                            st.write(f"_{doc.page_content[:300]}..._")

                # Add assistant response to state
                st.session_state.messages.append({
                    "role": "assistant", 
                    "content": answer, 
                    "citations": context
                })