Spaces:

zidankhan
/

LangGraphPDFCHATquery

Sleeping

File size: 6,079 Bytes

c0f74f5

import streamlit as st
from agents.graph import app
from langchain_core.messages import HumanMessage
import os
import sys
import tempfile
from typing import List

# Ensure you have implemented this function in FinalProject/models/retriever.py
# It should accept a list of PDF file paths and return a LangChain Retriever object.
try:
    from models.retriever import get_rag_retriever_from_paths
except ImportError:
    st.error("Could not import get_rag_retriever_from_paths. Please check your models/retriever.py file.")
    sys.exit()


# --- PATH SETUP ---
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, current_dir) 

# --- PAGE CONFIGURATION ---
st.set_page_config(
    page_title="GraphQuery RAG Agent",
    page_icon="🤖",
    layout="wide"  
)

# --- CACHED FUNCTION TO BUILD RAG RETRIEVER ---
# Hashing trick: By passing file_paths (a list of strings), Streamlit hashes the list.
# The expensive function only runs if the list of paths changes (i.e., files are added/removed).
@st.cache_resource
def load_and_index_documents(file_paths: List[str]):
    """Loads documents and creates/returns a RAG retriever."""
    if not file_paths:
        return None
    
    with st.spinner(f"Indexing {len(file_paths)} PDF file(s)... This may take a moment."):
        try:
            # Calls the function from your models/retriever.py
            retriever = get_rag_retriever_from_paths(file_paths)
            st.success(f"Indexed {len(file_paths)} PDF file(s) successfully!")
            return retriever
        except Exception as e:
            st.error(f"Failed to index documents: {e}")
            return None

# --- SIDEBAR (Settings, Key, and Upload) ---
with st.sidebar:
    st.header("⚙️ Agent Settings")
    st.caption("Configure your LLM and Access Key.")
    
    # API Key Input
    api_key = st.text_input(
        "**Groq API Key (Required):**", 
        type="password",  
        help="Paste your private Groq API Key here. It is used only for this session.",
    )
    
    st.divider()
    
    # 1. FILE UPLOAD SECTION
    st.subheader("📚 Document Upload")
    uploaded_files = st.file_uploader(
        "Upload your own PDFs for RAG context:", 
        type=["pdf"], 
        accept_multiple_files=True
    )
    
    # 2. FILE SAVING & INDEXING LOGIC
    file_paths = []
    rag_retriever = None
    
    if uploaded_files:
        # Streamlit files are in memory; we must write them to a temporary file 
        # so LangChain's PyPDFLoader (which needs a file path) can read them.
        with tempfile.TemporaryDirectory() as temp_dir:
            for uploaded_file in uploaded_files:
                file_path = os.path.join(temp_dir, uploaded_file.name)
                # Write the file bytes to the temporary path
                with open(file_path, "wb") as f:
                    f.write(uploaded_file.getbuffer())
                file_paths.append(file_path)

            # 3. Build the retriever and cache it based on the list of paths
            # NOTE: We pass the list of temporary paths to the cached function.
            rag_retriever = load_and_index_documents(file_paths)

    else:
        # Clear the cache if no files are uploaded to ensure a clean state
        st.info("No documents uploaded. Only Wikipedia lookup is enabled.")
        load_and_index_documents.clear() # Clears the cache for this function
    

    st.divider()
    st.subheader("🛠️ Features")
    st.info(f"RAG (Document Context) status: {'**ENABLED**' if rag_retriever else 'DISABLED'}")
    st.info("Wikipedia Routing is always active.")
    st.text("MORE COMING SOON ⏱️")

# --- MAIN INTERFACE (Header) ---
st.markdown(
    """

    # 🧠 LangGraph Query Model 

    ### Multi-Source RAG Agent

    Ask a question related to your uploaded documents or general knowledge.

    """
)
st.divider()

# --- STATE INITIALIZATION ---
initial_state_base = {
    "documents": [],
    "source": "",
    "api_key": api_key, 
    # Pass the dynamically created retriever to the graph state
    "rag_retriever": rag_retriever 
}

# --- CHAT INPUT AND LOGIC ---
with st.form(key='query_form', clear_on_submit=True):
    user_query = st.text_input(
        "**Your Question:**", 
        placeholder="e.g., What is the significance of the military-industrial complex in Russia?",
        label_visibility="collapsed"
    )
    submit_button = st.form_submit_button(label='Ask the Agent 🚀')


# --- EXECUTION LOGIC ---

if submit_button and user_query:
    if not api_key:
        st.error("🔑 **Error:** Please enter your Groq API Key in the sidebar to run the query.")
        st.stop()
        
    st.info("🔄 **Querying the Agent...** Please wait.")
    
    # Prepare state
    initial_state = initial_state_base.copy()
    initial_state["messages"] = [HumanMessage(content=user_query)]
    
    with st.spinner('Thinking... Routing and Retrieving Context...'):
        try:
            response = app.invoke(initial_state)
            
            # --- Output Display ---
            final_message = response["messages"][-1].content
            
            st.success("✅ **Agent Response:**")
            st.markdown(final_message)
            st.divider()
            
            # Optional: Show debug info
            with st.expander("🔍 **Debug Info (Agent Flow)**"):
                st.write(f"**Final Source:** {response.get('source', 'Unknown')}")
                if 'documents' in response and response['documents']:
                    st.write(f"**Retrieved Documents:** {len(response['documents'])} chunks used.")
                
        except Exception as e:
            st.error("❌ **Agent Failure:** An error occurred during execution.")
            st.exception(e)
            
elif not user_query and not api_key:
    st.markdown("👋 Start by entering your **Groq API Key** in the sidebar and asking a question above!")