kg-rag-bot / app.py
Rupesx007's picture
Upload 8 files
73ad28e verified
import streamlit as st
import os, sys, tempfile
from ingest import load_document, chunk_documents, store_in_chromadb, get_vectorstore
from kg_builder import build_kg
from chain import RAGChain
# Page config
st.set_page_config(
page_title="KG-RAG Chatbot",
page_icon="🧠",
layout="wide"
)
st.title("🧠 KG-RAG Chatbot")
st.caption("Upload a PDF β†’ Build Knowledge Graph β†’ Chat with your document")
# Sidebar
with st.sidebar:
st.header("Upload Document")
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
if uploaded_file:
# Save uploaded file to /data directory
os.makedirs("data", exist_ok=True)
save_path = os.path.join("data", uploaded_file.name)
with open(save_path, "wb") as f:
f.write(uploaded_file.read())
st.success(f"Saved: {uploaded_file.name}")
# Process button
if st.button("Process PDF", type="primary", use_container_width=True):
# Clear old chat + chain
st.session_state.messages = []
st.session_state.chain_ready = False
st.session_state.pop("chain", None)
# Step 1: Ingest β†’ ChromaDB
with st.status("Ingesting document...", expanded=True) as status:
try:
st.write("Loading PDF pages...")
docs = load_document(uploaded_file.name)
st.write(f"{len(docs)} pages loaded")
st.write("Chunking text...")
chunks = chunk_documents(docs)
st.write(f"{len(chunks)} chunks created")
st.write("Embedding + saving to ChromaDB...")
store_in_chromadb(chunks)
st.write("ChromaDB ready")
# Step 2: Build KG β†’ Neo4j
st.write("Extracting triples β†’ Neo4j Knowledge Graph...")
build_kg(uploaded_file.name)
st.write("Knowledge Graph built")
status.update(label="PDF processed! You can now chat.", state="complete")
st.session_state.chain_ready = True
st.session_state.current_doc = uploaded_file.name
except Exception as e:
status.update(label="Processing failed", state="error")
st.error(str(e))
st.divider()
# Settings
st.header("Settings")
show_kg = st.toggle("Show KG facts", value=True)
show_chunks = st.toggle("Show source chunks", value=False)
st.divider()
st.markdown("""
**How it works:**
1. Upload + Process your PDF
2. **ChromaDB** stores semantic chunks
3. **Neo4j** stores entity relationships
4. **LLaMA via Groq** answers your questions
""")
if st.button("πŸ—‘οΈ Clear chat history", use_container_width=True):
st.session_state.messages = []
st.rerun()
# Load RAG chain (only after PDF is processed)
@st.cache_resource
def load_chain():
return RAGChain()
# Main area
if not st.session_state.get("chain_ready"):
# Show a friendly landing state
st.info("Upload a PDF from the sidebar and click **Process PDF** to get started.")
col1, col2, col3 = st.columns(3)
with col1:
st.markdown("### Step 1\nUpload any PDF document from the sidebar")
with col2:
st.markdown("### Step 2\nClick **Process PDF** to build the knowledge graph")
with col3:
st.markdown("### Step 3\nAsk questions β€” get answers with KG + RAG context")
else:
# Show which doc is loaded
st.success(f"Active document: **{st.session_state.get('current_doc', 'Unknown')}**")
# Load chain (cached)
try:
if "chain" not in st.session_state:
with st.spinner("Loading RAG chain..."):
st.session_state.chain = load_chain()
chain = st.session_state.chain
except Exception as e:
st.error(f"Failed to load chain: {e}")
st.stop()
# ── Chat history ──────────────────────────────────────────────────────────
if "messages" not in st.session_state:
st.session_state.messages = []
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
if msg.get("kg_facts") and show_kg:
with st.expander("Knowledge Graph Facts"):
st.code(msg["kg_facts"])
if msg.get("sources") and show_chunks:
with st.expander("Source Chunks"):
for s in msg["sources"]:
st.markdown(f"**Page {s['page']}:** {s['snippet']}...")
# Chat input
if question := st.chat_input("Ask anything about your document..."):
st.session_state.messages.append({"role": "user", "content": question})
with st.chat_message("user"):
st.markdown(question)
with st.chat_message("assistant"):
with st.spinner("Retrieving from ChromaDB + Neo4j β†’ asking LLM..."):
try:
result = chain.ask(question)
answer = result["answer"]
st.markdown(answer)
if result["kg_facts"] and show_kg:
with st.expander("Knowledge Graph Facts used"):
st.code(result["kg_facts"])
if show_chunks:
with st.expander("Source Chunks"):
for s in result["sources"]:
st.markdown(f"**Page {s['page']}:** {s['snippet']}...")
st.session_state.messages.append({
"role": "assistant",
"content": answer,
"kg_facts": result["kg_facts"],
"sources": result["sources"]
})
except Exception as e:
st.error(f"Error: {e}")