Spaces:

zidankhan
/

LangGraphPDFCHATquery

Sleeping

App Files Files Community

zidankhan commited on Dec 16, 2025

Commit

c0f74f5

verified ·

1 Parent(s): 014d53f

Upload 43 files

Browse files

Files changed (44) hide show

.gitattributes +1 -0
FinalProject/__init__.py +0 -0
FinalProject/__pycache__/__init__.cpython-311.pyc +0 -0
FinalProject/agents/__pycache__/graph.cpython-311.pyc +0 -0
FinalProject/agents/__pycache__/graph.cpython-313.pyc +0 -0
FinalProject/agents/__pycache__/state.cpython-311.pyc +0 -0
FinalProject/agents/graph.py +40 -0
FinalProject/agents/nodes/__init__.py +0 -0
FinalProject/agents/nodes/__pycache__/__init__.cpython-311.pyc +0 -0
FinalProject/agents/nodes/__pycache__/__init__.cpython-313.pyc +0 -0
FinalProject/agents/nodes/__pycache__/answer_node.cpython-311.pyc +0 -0
FinalProject/agents/nodes/__pycache__/rag_node.cpython-311.pyc +0 -0
FinalProject/agents/nodes/__pycache__/rag_node.cpython-313.pyc +0 -0
FinalProject/agents/nodes/__pycache__/router_noder.cpython-311.pyc +0 -0
FinalProject/agents/nodes/__pycache__/wiki_node.cpython-311.pyc +0 -0
FinalProject/agents/nodes/__pycache__/wiki_node.cpython-313.pyc +0 -0
FinalProject/agents/nodes/answer_node.py +32 -0
FinalProject/agents/nodes/jskdnvcoa.ipynb +61 -0
FinalProject/agents/nodes/rag_node.py +13 -0
FinalProject/agents/nodes/router_noder.py +47 -0
FinalProject/agents/nodes/wiki_node.py +17 -0
FinalProject/agents/state.py +12 -0
FinalProject/agents/test.ipynb +51 -0
FinalProject/app.py +165 -0
FinalProject/data/__init__.py +0 -0
FinalProject/data/__pycache__/__init__.cpython-311.pyc +0 -0
FinalProject/data/__pycache__/dataingestion.cpython-311.pyc +0 -0
FinalProject/data/dataingestion.py +33 -0
FinalProject/data/pdfs/RU-MILITARY.pdf +3 -0
FinalProject/data/pdfs/__init__.py +0 -0
FinalProject/models/__init__.py +0 -0
FinalProject/models/__pycache__/__init__.cpython-311.pyc +0 -0
FinalProject/models/__pycache__/__init__.cpython-313.pyc +0 -0
FinalProject/models/__pycache__/embedding.cpython-311.pyc +0 -0
FinalProject/models/__pycache__/embedding.cpython-313.pyc +0 -0
FinalProject/models/__pycache__/llm.cpython-311.pyc +0 -0
FinalProject/models/__pycache__/retriever.cpython-311.pyc +0 -0
FinalProject/models/__pycache__/retriever.cpython-313.pyc +0 -0
FinalProject/models/embedding.py +6 -0
FinalProject/models/llm.py +7 -0
FinalProject/models/retriever.py +31 -0
FinalProject/requirements.txt +10 -0
FinalProject/test.ipynb +79 -0
app.py +165 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+FinalProject/data/pdfs/RU-MILITARY.pdf filter=lfs diff=lfs merge=lfs -text

FinalProject/__init__.py ADDED Viewed

File without changes

FinalProject/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (166 Bytes). View file

FinalProject/agents/__pycache__/graph.cpython-311.pyc ADDED Viewed

Binary file (1.95 kB). View file

FinalProject/agents/__pycache__/graph.cpython-313.pyc ADDED Viewed

Binary file (1.66 kB). View file

FinalProject/agents/__pycache__/state.cpython-311.pyc ADDED Viewed

Binary file (1.18 kB). View file

FinalProject/agents/graph.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import os
+import sys
+current_dir = os.path.dirname(os.path.abspath(__file__))
+project_root = os.path.join(current_dir, os.pardir)
+sys.path.insert(0, project_root)
+from agents.nodes.rag_node import rag_node
+from agents.nodes.wiki_node import wiki_node
+from agents.nodes.answer_node import answer_node
+from agents.nodes.router_noder import route_node,route_decision
+from agents.state import AgentGraph
+from langgraph.graph import StateGraph
+graph = StateGraph(state_schema=AgentGraph)
+graph.add_node("router",route_node)
+graph.add_node("document",rag_node)
+graph.add_node("wiki",wiki_node)
+graph.add_node("answer",answer_node)
+graph.set_entry_point("router")
+graph.add_conditional_edges(
+    source="router",
+    path=route_decision,
+    path_map={
+        "rag":"document",
+        "wiki":"wiki"
+    }
+)
+graph.add_edge("document","answer")
+graph.add_edge("wiki","answer")
+graph.set_finish_point("answer")
+app = graph.compile()

FinalProject/agents/nodes/__init__.py ADDED Viewed

File without changes

FinalProject/agents/nodes/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (179 Bytes). View file

FinalProject/agents/nodes/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (177 Bytes). View file

FinalProject/agents/nodes/__pycache__/answer_node.cpython-311.pyc ADDED Viewed

Binary file (1.91 kB). View file

FinalProject/agents/nodes/__pycache__/rag_node.cpython-311.pyc ADDED Viewed

Binary file (708 Bytes). View file

FinalProject/agents/nodes/__pycache__/rag_node.cpython-313.pyc ADDED Viewed

Binary file (653 Bytes). View file

FinalProject/agents/nodes/__pycache__/router_noder.cpython-311.pyc ADDED Viewed

Binary file (2.48 kB). View file

FinalProject/agents/nodes/__pycache__/wiki_node.cpython-311.pyc ADDED Viewed

Binary file (1.04 kB). View file

FinalProject/agents/nodes/__pycache__/wiki_node.cpython-313.pyc ADDED Viewed

Binary file (989 Bytes). View file

FinalProject/agents/nodes/answer_node.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import os
+import sys
+project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir))
+sys.path.insert(0, project_root)
+from models.llm import get_llm
+from langchain_core.prompts import ChatPromptTemplate
+def answer_node(state):
+    question = state["messages"][-1].content
+    document = state["documents"]
+    context = "\n\n----\n\n".join([docs.page_content for docs in document])
+    api_key = state.get("api_key")
+    if not api_key:
+         raise ValueError("API Key not found in state.")
+    model = get_llm(api=api_key)
+    prompt = ChatPromptTemplate.from_messages([
+        ("system","Your job is to provide a concise answer to the user query from the provided context: {context}"),
+        ("user","{query}")
+    ])
+    answer_chain = prompt|model
+    response = answer_chain.invoke({"query":question,"context":context})
+    return {"messages":[response]}

FinalProject/agents/nodes/jskdnvcoa.ipynb ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2be3c102",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "\n",
+    "\n",
+    "project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir))\n",
+    "\n",
+    "sys.path.insert(0, project_root)\n",
+    "\n",
+    "print(f\"Added {project_root} to system path.\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d31913ce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from models.llm import get_llm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4edc211f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "myenv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

FinalProject/agents/nodes/rag_node.py ADDED Viewed

	@@ -0,0 +1,13 @@

+def rag_node(state):
+    question = state["messages"][-1].content
+    rag_retriever = state.get("rag_retriever")
+    if rag_retriever is None:
+        print("RAG source is not available. Skipping retrieval.")
+        return {"documents": []}
+    documents = rag_retriever.invoke(question)
+    return {"documents": documents}

FinalProject/agents/nodes/router_noder.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+import sys
+project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir))
+sys.path.insert(0, project_root)
+from models.llm import get_llm
+from langchain_core.prompts import ChatPromptTemplate
+from models.llm import get_llm
+from langchain_core.output_parsers import StrOutputParser
+from data.dataingestion import load_all_pdfs
+document = load_all_pdfs()
+def route_node(state):
+    question = state["messages"][-1].content
+    api_key = state.get("api_key")
+    if not api_key:
+         raise ValueError("API Key not found in state.")
+    model = get_llm(api=api_key)
+    prompt = ChatPromptTemplate.from_messages([
+        ("system","""You are an expert router.
+        Your task is to classify the user's question based on its content:
+        1. 'rag': If the question is related to the topics provided in these documents : {documents}
+        2. 'wikipedia': If the question is about general knowledge, history, people, or events.
+        Return ONLY a single word string: 'rag' or 'wikipedia'.
+        """),
+        ("user","{question}")
+    ])
+    route_chain = prompt|model|StrOutputParser()
+    route = route_chain.invoke({"question":question,"documents":document})
+    if "rag" in route:
+        decision = "rag"
+        print("routing to rag")
+    else:
+        decision = "wiki"
+        print("routing to wikipedia")
+    return {"source":decision}
+def route_decision(state):
+    return state["source"]

FinalProject/agents/nodes/wiki_node.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os
+import sys
+project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir))
+sys.path.insert(0, project_root)
+from models.retriever import get_wiki_retriever
+def wiki_node(state):
+    retriever = get_wiki_retriever()
+    question = state["messages"][-1].content
+    document = retriever.invoke(question)
+    return {"documents":document,"source":"final"}

FinalProject/agents/state.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from langgraph.graph import StateGraph
+from langgraph.graph.message import TypedDict,Annotated,Literal
+from typing import List,Optional
+from operator import add
+from langchain_core.messages import BaseMessage
+from langchain_core.documents import Document
+class AgentGraph(TypedDict):
+    messages:Annotated[List[BaseMessage],add]
+    documents:List[Document]
+    source:Literal["rag","wiki","final"]
+    api_key: Optional[str]

FinalProject/agents/test.ipynb ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "41112477",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from graph import app"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19a2b36e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "49732fe8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "myenv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

FinalProject/app.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import streamlit as st
+from agents.graph import app
+from langchain_core.messages import HumanMessage
+import os
+import sys
+import tempfile
+from typing import List
+# Ensure you have implemented this function in FinalProject/models/retriever.py
+# It should accept a list of PDF file paths and return a LangChain Retriever object.
+try:
+    from models.retriever import get_rag_retriever_from_paths
+except ImportError:
+    st.error("Could not import get_rag_retriever_from_paths. Please check your models/retriever.py file.")
+    sys.exit()
+# --- PATH SETUP ---
+current_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, current_dir)
+# --- PAGE CONFIGURATION ---
+st.set_page_config(
+    page_title="GraphQuery RAG Agent",
+    page_icon="🤖",
+    layout="wide"
+)
+# --- CACHED FUNCTION TO BUILD RAG RETRIEVER ---
+# Hashing trick: By passing file_paths (a list of strings), Streamlit hashes the list.
+# The expensive function only runs if the list of paths changes (i.e., files are added/removed).
+@st.cache_resource
+def load_and_index_documents(file_paths: List[str]):
+    """Loads documents and creates/returns a RAG retriever."""
+    if not file_paths:
+        return None
+    with st.spinner(f"Indexing {len(file_paths)} PDF file(s)... This may take a moment."):
+        try:
+            # Calls the function from your models/retriever.py
+            retriever = get_rag_retriever_from_paths(file_paths)
+            st.success(f"Indexed {len(file_paths)} PDF file(s) successfully!")
+            return retriever
+        except Exception as e:
+            st.error(f"Failed to index documents: {e}")
+            return None
+# --- SIDEBAR (Settings, Key, and Upload) ---
+with st.sidebar:
+    st.header("⚙️ Agent Settings")
+    st.caption("Configure your LLM and Access Key.")
+    # API Key Input
+    api_key = st.text_input(
+        "**Groq API Key (Required):**",
+        type="password",
+        help="Paste your private Groq API Key here. It is used only for this session.",
+    )
+    st.divider()
+    # 1. FILE UPLOAD SECTION
+    st.subheader("📚 Document Upload")
+    uploaded_files = st.file_uploader(
+        "Upload your own PDFs for RAG context:",
+        type=["pdf"],
+        accept_multiple_files=True
+    )
+    # 2. FILE SAVING & INDEXING LOGIC
+    file_paths = []
+    rag_retriever = None
+    if uploaded_files:
+        # Streamlit files are in memory; we must write them to a temporary file
+        # so LangChain's PyPDFLoader (which needs a file path) can read them.
+        with tempfile.TemporaryDirectory() as temp_dir:
+            for uploaded_file in uploaded_files:
+                file_path = os.path.join(temp_dir, uploaded_file.name)
+                # Write the file bytes to the temporary path
+                with open(file_path, "wb") as f:
+                    f.write(uploaded_file.getbuffer())
+                file_paths.append(file_path)
+            # 3. Build the retriever and cache it based on the list of paths
+            # NOTE: We pass the list of temporary paths to the cached function.
+            rag_retriever = load_and_index_documents(file_paths)
+    else:
+        # Clear the cache if no files are uploaded to ensure a clean state
+        st.info("No documents uploaded. Only Wikipedia lookup is enabled.")
+        load_and_index_documents.clear() # Clears the cache for this function
+    st.divider()
+    st.subheader("🛠️ Features")
+    st.info(f"RAG (Document Context) status: {'**ENABLED**' if rag_retriever else 'DISABLED'}")
+    st.info("Wikipedia Routing is always active.")
+    st.text("MORE COMING SOON ⏱️")
+# --- MAIN INTERFACE (Header) ---
+st.markdown(
+    """
+    # 🧠 LangGraph Query Model
+    ### Multi-Source RAG Agent
+    Ask a question related to your uploaded documents or general knowledge.
+    """
+)
+st.divider()
+# --- STATE INITIALIZATION ---
+initial_state_base = {
+    "documents": [],
+    "source": "",
+    "api_key": api_key,
+    # Pass the dynamically created retriever to the graph state
+    "rag_retriever": rag_retriever
+}
+# --- CHAT INPUT AND LOGIC ---
+with st.form(key='query_form', clear_on_submit=True):
+    user_query = st.text_input(
+        "**Your Question:**",
+        placeholder="e.g., What is the significance of the military-industrial complex in Russia?",
+        label_visibility="collapsed"
+    )
+    submit_button = st.form_submit_button(label='Ask the Agent 🚀')
+# --- EXECUTION LOGIC ---
+if submit_button and user_query:
+    if not api_key:
+        st.error("🔑 **Error:** Please enter your Groq API Key in the sidebar to run the query.")
+        st.stop()
+    st.info("🔄 **Querying the Agent...** Please wait.")
+    # Prepare state
+    initial_state = initial_state_base.copy()
+    initial_state["messages"] = [HumanMessage(content=user_query)]
+    with st.spinner('Thinking... Routing and Retrieving Context...'):
+        try:
+            response = app.invoke(initial_state)
+            # --- Output Display ---
+            final_message = response["messages"][-1].content
+            st.success("✅ **Agent Response:**")
+            st.markdown(final_message)
+            st.divider()
+            # Optional: Show debug info
+            with st.expander("🔍 **Debug Info (Agent Flow)**"):
+                st.write(f"**Final Source:** {response.get('source', 'Unknown')}")
+                if 'documents' in response and response['documents']:
+                    st.write(f"**Retrieved Documents:** {len(response['documents'])} chunks used.")
+        except Exception as e:
+            st.error("❌ **Agent Failure:** An error occurred during execution.")
+            st.exception(e)
+elif not user_query and not api_key:
+    st.markdown("👋 Start by entering your **Groq API Key** in the sidebar and asking a question above!")

FinalProject/data/__init__.py ADDED Viewed

File without changes

FinalProject/data/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (171 Bytes). View file

FinalProject/data/__pycache__/dataingestion.cpython-311.pyc ADDED Viewed

Binary file (2.09 kB). View file

FinalProject/data/dataingestion.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os
+from langchain_community.document_loaders import PyPDFDirectoryLoader
+from langchain_core.documents import Document
+from typing import List
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+PDF_FOLDER = os.path.join(BASE_DIR, "pdfs")
+def load_all_pdfs() -> List[Document]:
+    if not os.path.exists(PDF_FOLDER):
+        os.makedirs(PDF_FOLDER, exist_ok=True)
+        print(f"Created PDF ingestion directory: {PDF_FOLDER}")
+        return []
+    try:
+        loader = PyPDFDirectoryLoader(PDF_FOLDER)
+        all_docs = loader.load()
+        print(f"Successfully loaded {len(all_docs)} document pages from the 'pdfs' folder.")
+        for doc in all_docs:
+            if 'source' in doc.metadata:
+                doc.metadata['source_short'] = os.path.basename(doc.metadata['source'])
+        return all_docs
+    except Exception as e:
+        print(f"Error loading PDFs: {e}")
+        return []

FinalProject/data/pdfs/RU-MILITARY.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a435f3f6f06baf8883cab3f2c84cbe0be0a1268df3a3da74f2e3f17161dd6a7
+size 1527223

FinalProject/data/pdfs/__init__.py ADDED Viewed

File without changes

FinalProject/models/__init__.py ADDED Viewed

File without changes

FinalProject/models/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (173 Bytes). View file

FinalProject/models/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (171 Bytes). View file

FinalProject/models/__pycache__/embedding.cpython-311.pyc ADDED Viewed

Binary file (458 Bytes). View file

FinalProject/models/__pycache__/embedding.cpython-313.pyc ADDED Viewed

Binary file (407 Bytes). View file

FinalProject/models/__pycache__/llm.cpython-311.pyc ADDED Viewed

Binary file (449 Bytes). View file

FinalProject/models/__pycache__/retriever.cpython-311.pyc ADDED Viewed

Binary file (1.85 kB). View file

FinalProject/models/__pycache__/retriever.cpython-313.pyc ADDED Viewed

Binary file (1.6 kB). View file

FinalProject/models/embedding.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from langchain_huggingface import HuggingFaceEmbeddings
+def get_embeddings():
+    return HuggingFaceEmbeddings(
+        model="all-MiniLM-L6-v2"
+    )

FinalProject/models/llm.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from langchain_groq import ChatGroq
+def get_llm(api):
+    return ChatGroq(
+        model = "llama-3.3-70b-versatile",
+        api_key=api
+    )

FinalProject/models/retriever.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os
+from langchain_community.retrievers import WikipediaRetriever
+from langchain_community.vectorstores import Chroma
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import PyPDFLoader
+from .embedding import get_embeddings
+from typing import List
+embedder = get_embeddings()
+def get_rag_retriever_from_paths(pdf_paths: List[str]):
+    """Loads PDFs from a list of paths, splits them, and creates a Chroma retriever."""
+    all_docs = []
+    for path in pdf_paths:
+        loader = PyPDFLoader(path)
+        all_docs.extend(loader.load())
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=270)
+    splits = text_splitter.split_documents(all_docs)
+    vectorstore = Chroma.from_documents(documents=splits, embedding=embedder)
+    rag_retriever = vectorstore.as_retriever()
+    return rag_retriever
+def get_wiki_retriever():
+    wikiretriever = WikipediaRetriever(top_k_results=2)
+    return wikiretriever

FinalProject/requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+streamlit
+langchain-core
+langchain-community
+langchain-text-splitters
+langgraph
+langchain-groq
+langchain-huggingface
+pypdf
+sentence-transformers
+chromadb

FinalProject/test.ipynb ADDED Viewed

	@@ -0,0 +1,79 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "f6960caa",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'source': 'c:\\\\Users\\\\xyzai\\\\Documents\\\\GraphDB\\\\FinalProject\\\\data\\\\pdfs\\\\RU-MILITARY.pdf', 'creationdate': '2014-04-22T10:23:16+02:00', 'creator': 'Adobe InDesign CS6 (Macintosh)', 'trapped': '/False', 'page_label': '3', 'total_pages': 4, 'moddate': '2014-04-22T10:23:18+02:00', 'page': 2, 'producer': 'Adobe PDF Library 10.0.1'}, page_content='developing the military-industrial complex.\\nThe Kremlin’s massive armaments program \\nand its reform of the military-industrial \\ncomplex also has significance in terms of \\nindustrial and social policy. The military in-\\ndustry employs two million workers; five \\nper cent of the Russian population depend \\non it for their livelihood. In this way, the \\nKremlin is “solving” an issue in Soviet style: \\nFunding for the military is once more tak -\\ning on a central role in society. It is hoped \\nthat this will boost innovation and global \\nRussian Demographics\\nRussia has a security apparatus \\nproportionally more than twice \\nthe size of that of the US.'),\n",
+       " Document(metadata={'creator': 'Adobe InDesign CS6 (Macintosh)', 'creationdate': '2014-04-22T10:23:16+02:00', 'source': 'c:\\\\Users\\\\xyzai\\\\Documents\\\\GraphDB\\\\FinalProject\\\\data\\\\pdfs\\\\RU-MILITARY.pdf', 'producer': 'Adobe PDF Library 10.0.1', 'moddate': '2014-04-22T10:23:18+02:00', 'total_pages': 4, 'page_label': '3', 'page': 2, 'trapped': '/False'}, page_content='developing the military-industrial complex.\\nThe Kremlin’s massive armaments program \\nand its reform of the military-industrial \\ncomplex also has significance in terms of \\nindustrial and social policy. The military in-\\ndustry employs two million workers; five \\nper cent of the Russian population depend \\non it for their livelihood. In this way, the \\nKremlin is “solving” an issue in Soviet style: \\nFunding for the military is once more tak -\\ning on a central role in society. It is hoped \\nthat this will boost innovation and global \\nRussian Demographics\\nRussia has a security apparatus \\nproportionally more than twice \\nthe size of that of the US.'),\n",
+       " Document(metadata={'producer': 'Adobe PDF Library 10.0.1', 'source': 'c:\\\\Users\\\\xyzai\\\\Documents\\\\GraphDB\\\\FinalProject\\\\data\\\\pdfs\\\\RU-MILITARY.pdf', 'total_pages': 4, 'trapped': '/False', 'creationdate': '2014-04-22T10:23:16+02:00', 'creator': 'Adobe InDesign CS6 (Macintosh)', 'page_label': '4', 'moddate': '2014-04-22T10:23:18+02:00', 'page': 3}, page_content='there are bottlenecks in production capaci-\\nty, for example in aircraft production and \\nshipbuilding. The plans to enhance military \\ntransport aviation can only be realized if \\ncapacity is expanded rapidly. Russia also \\ndepends on cooperation with Ukraine: So \\nfar, many motors for helicopters and air -\\ncraft as well as rockets have been produced \\nin Ukraine. Russia lacks the know-how for \\nproducing many of the parts required. The \\ncurrent conflict is putting a strain on this \\ncooperation and necessitates import substi-\\ntutes, which entail great cost and delays.\\nThe Effects of Remilitarization\\nUnder Vladimir Putin, the modernization \\nof Russia’s armed forces has become a pri -\\nority for the first time since 1991. For sev-\\neral years, considerable sums have been ex-\\npended on this reform. However, challenges \\nremain when it comes to technology and \\norganizational culture; and demographic \\nproblems are also still an issue. Moreover, \\nthe lagging economic output will exacer -'),\n",
+       " Document(metadata={'creationdate': '2014-04-22T10:23:16+02:00', 'total_pages': 4, 'moddate': '2014-04-22T10:23:18+02:00', 'trapped': '/False', 'page_label': '4', 'source': 'c:\\\\Users\\\\xyzai\\\\Documents\\\\GraphDB\\\\FinalProject\\\\data\\\\pdfs\\\\RU-MILITARY.pdf', 'creator': 'Adobe InDesign CS6 (Macintosh)', 'page': 3, 'producer': 'Adobe PDF Library 10.0.1'}, page_content='there are bottlenecks in production capaci-\\nty, for example in aircraft production and \\nshipbuilding. The plans to enhance military \\ntransport aviation can only be realized if \\ncapacity is expanded rapidly. Russia also \\ndepends on cooperation with Ukraine: So \\nfar, many motors for helicopters and air -\\ncraft as well as rockets have been produced \\nin Ukraine. Russia lacks the know-how for \\nproducing many of the parts required. The \\ncurrent conflict is putting a strain on this \\ncooperation and necessitates import substi-\\ntutes, which entail great cost and delays.\\nThe Effects of Remilitarization\\nUnder Vladimir Putin, the modernization \\nof Russia’s armed forces has become a pri -\\nority for the first time since 1991. For sev-\\neral years, considerable sums have been ex-\\npended on this reform. However, challenges \\nremain when it comes to technology and \\norganizational culture; and demographic \\nproblems are also still an issue. Moreover, \\nthe lagging economic output will exacer -')]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_community.retrievers import WikipediaRetriever\n",
+    "from langchain_community.vectorstores import Chroma\n",
+    "from data.dataingestion import load_all_pdfs\n",
+    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
+    "from langchain_community.vectorstores import Chroma\n",
+    "from models.embedding import get_embeddings\n",
+    "embeddings = get_embeddings()\n",
+    "pdf_data = load_all_pdfs()\n",
+    "\n",
+    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=270)\n",
+    "splits = text_splitter.split_documents(pdf_data)\n",
+    "vectorstore = Chroma.from_documents(documents=splits,embedding=embeddings)\n",
+    "rag_retriever = vectorstore.as_retriever()\n",
+    "resp=rag_retriever.invoke(\"russian military\")\n",
+    "resp    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "261ab304",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c7e948d4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "myenv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

app.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import streamlit as st
+from agents.graph import app
+from langchain_core.messages import HumanMessage
+import os
+import sys
+import tempfile
+from typing import List
+# Ensure you have implemented this function in FinalProject/models/retriever.py
+# It should accept a list of PDF file paths and return a LangChain Retriever object.
+try:
+    from models.retriever import get_rag_retriever_from_paths
+except ImportError:
+    st.error("Could not import get_rag_retriever_from_paths. Please check your models/retriever.py file.")
+    sys.exit()
+# --- PATH SETUP ---
+current_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, current_dir)
+# --- PAGE CONFIGURATION ---
+st.set_page_config(
+    page_title="GraphQuery RAG Agent",
+    page_icon="🤖",
+    layout="wide"
+)
+# --- CACHED FUNCTION TO BUILD RAG RETRIEVER ---
+# Hashing trick: By passing file_paths (a list of strings), Streamlit hashes the list.
+# The expensive function only runs if the list of paths changes (i.e., files are added/removed).
+@st.cache_resource
+def load_and_index_documents(file_paths: List[str]):
+    """Loads documents and creates/returns a RAG retriever."""
+    if not file_paths:
+        return None
+    with st.spinner(f"Indexing {len(file_paths)} PDF file(s)... This may take a moment."):
+        try:
+            # Calls the function from your models/retriever.py
+            retriever = get_rag_retriever_from_paths(file_paths)
+            st.success(f"Indexed {len(file_paths)} PDF file(s) successfully!")
+            return retriever
+        except Exception as e:
+            st.error(f"Failed to index documents: {e}")
+            return None
+# --- SIDEBAR (Settings, Key, and Upload) ---
+with st.sidebar:
+    st.header("⚙️ Agent Settings")
+    st.caption("Configure your LLM and Access Key.")
+    # API Key Input
+    api_key = st.text_input(
+        "**Groq API Key (Required):**",
+        type="password",
+        help="Paste your private Groq API Key here. It is used only for this session.",
+    )
+    st.divider()
+    # 1. FILE UPLOAD SECTION
+    st.subheader("📚 Document Upload")
+    uploaded_files = st.file_uploader(
+        "Upload your own PDFs for RAG context:",
+        type=["pdf"],
+        accept_multiple_files=True
+    )
+    # 2. FILE SAVING & INDEXING LOGIC
+    file_paths = []
+    rag_retriever = None
+    if uploaded_files:
+        # Streamlit files are in memory; we must write them to a temporary file
+        # so LangChain's PyPDFLoader (which needs a file path) can read them.
+        with tempfile.TemporaryDirectory() as temp_dir:
+            for uploaded_file in uploaded_files:
+                file_path = os.path.join(temp_dir, uploaded_file.name)
+                # Write the file bytes to the temporary path
+                with open(file_path, "wb") as f:
+                    f.write(uploaded_file.getbuffer())
+                file_paths.append(file_path)
+            # 3. Build the retriever and cache it based on the list of paths
+            # NOTE: We pass the list of temporary paths to the cached function.
+            rag_retriever = load_and_index_documents(file_paths)
+    else:
+        # Clear the cache if no files are uploaded to ensure a clean state
+        st.info("No documents uploaded. Only Wikipedia lookup is enabled.")
+        load_and_index_documents.clear() # Clears the cache for this function
+    st.divider()
+    st.subheader("🛠️ Features")
+    st.info(f"RAG (Document Context) status: {'**ENABLED**' if rag_retriever else 'DISABLED'}")
+    st.info("Wikipedia Routing is always active.")
+    st.text("MORE COMING SOON ⏱️")
+# --- MAIN INTERFACE (Header) ---
+st.markdown(
+    """
+    # 🧠 LangGraph Query Model
+    ### Multi-Source RAG Agent
+    Ask a question related to your uploaded documents or general knowledge.
+    """
+)
+st.divider()
+# --- STATE INITIALIZATION ---
+initial_state_base = {
+    "documents": [],
+    "source": "",
+    "api_key": api_key,
+    # Pass the dynamically created retriever to the graph state
+    "rag_retriever": rag_retriever
+}
+# --- CHAT INPUT AND LOGIC ---
+with st.form(key='query_form', clear_on_submit=True):
+    user_query = st.text_input(
+        "**Your Question:**",
+        placeholder="e.g., What is the significance of the military-industrial complex in Russia?",
+        label_visibility="collapsed"
+    )
+    submit_button = st.form_submit_button(label='Ask the Agent 🚀')
+# --- EXECUTION LOGIC ---
+if submit_button and user_query:
+    if not api_key:
+        st.error("🔑 **Error:** Please enter your Groq API Key in the sidebar to run the query.")
+        st.stop()
+    st.info("🔄 **Querying the Agent...** Please wait.")
+    # Prepare state
+    initial_state = initial_state_base.copy()
+    initial_state["messages"] = [HumanMessage(content=user_query)]
+    with st.spinner('Thinking... Routing and Retrieving Context...'):
+        try:
+            response = app.invoke(initial_state)
+            # --- Output Display ---
+            final_message = response["messages"][-1].content
+            st.success("✅ **Agent Response:**")
+            st.markdown(final_message)
+            st.divider()
+            # Optional: Show debug info
+            with st.expander("🔍 **Debug Info (Agent Flow)**"):
+                st.write(f"**Final Source:** {response.get('source', 'Unknown')}")
+                if 'documents' in response and response['documents']:
+                    st.write(f"**Retrieved Documents:** {len(response['documents'])} chunks used.")
+        except Exception as e:
+            st.error("❌ **Agent Failure:** An error occurred during execution.")
+            st.exception(e)
+elif not user_query and not api_key:
+    st.markdown("👋 Start by entering your **Groq API Key** in the sidebar and asking a question above!")