Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import os, sys, tempfile | |
| from ingest import load_document, chunk_documents, store_in_chromadb, get_vectorstore | |
| from kg_builder import build_kg | |
| from chain import RAGChain | |
| # Page config | |
| st.set_page_config( | |
| page_title="KG-RAG Chatbot", | |
| page_icon="π§ ", | |
| layout="wide" | |
| ) | |
| st.title("π§ KG-RAG Chatbot") | |
| st.caption("Upload a PDF β Build Knowledge Graph β Chat with your document") | |
| # Sidebar | |
| with st.sidebar: | |
| st.header("Upload Document") | |
| uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) | |
| if uploaded_file: | |
| # Save uploaded file to /data directory | |
| os.makedirs("data", exist_ok=True) | |
| save_path = os.path.join("data", uploaded_file.name) | |
| with open(save_path, "wb") as f: | |
| f.write(uploaded_file.read()) | |
| st.success(f"Saved: {uploaded_file.name}") | |
| # Process button | |
| if st.button("Process PDF", type="primary", use_container_width=True): | |
| # Clear old chat + chain | |
| st.session_state.messages = [] | |
| st.session_state.chain_ready = False | |
| st.session_state.pop("chain", None) | |
| # Step 1: Ingest β ChromaDB | |
| with st.status("Ingesting document...", expanded=True) as status: | |
| try: | |
| st.write("Loading PDF pages...") | |
| docs = load_document(uploaded_file.name) | |
| st.write(f"{len(docs)} pages loaded") | |
| st.write("Chunking text...") | |
| chunks = chunk_documents(docs) | |
| st.write(f"{len(chunks)} chunks created") | |
| st.write("Embedding + saving to ChromaDB...") | |
| store_in_chromadb(chunks) | |
| st.write("ChromaDB ready") | |
| # Step 2: Build KG β Neo4j | |
| st.write("Extracting triples β Neo4j Knowledge Graph...") | |
| build_kg(uploaded_file.name) | |
| st.write("Knowledge Graph built") | |
| status.update(label="PDF processed! You can now chat.", state="complete") | |
| st.session_state.chain_ready = True | |
| st.session_state.current_doc = uploaded_file.name | |
| except Exception as e: | |
| status.update(label="Processing failed", state="error") | |
| st.error(str(e)) | |
| st.divider() | |
| # Settings | |
| st.header("Settings") | |
| show_kg = st.toggle("Show KG facts", value=True) | |
| show_chunks = st.toggle("Show source chunks", value=False) | |
| st.divider() | |
| st.markdown(""" | |
| **How it works:** | |
| 1. Upload + Process your PDF | |
| 2. **ChromaDB** stores semantic chunks | |
| 3. **Neo4j** stores entity relationships | |
| 4. **LLaMA via Groq** answers your questions | |
| """) | |
| if st.button("ποΈ Clear chat history", use_container_width=True): | |
| st.session_state.messages = [] | |
| st.rerun() | |
| # Load RAG chain (only after PDF is processed) | |
| def load_chain(): | |
| return RAGChain() | |
| # Main area | |
| if not st.session_state.get("chain_ready"): | |
| # Show a friendly landing state | |
| st.info("Upload a PDF from the sidebar and click **Process PDF** to get started.") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.markdown("### Step 1\nUpload any PDF document from the sidebar") | |
| with col2: | |
| st.markdown("### Step 2\nClick **Process PDF** to build the knowledge graph") | |
| with col3: | |
| st.markdown("### Step 3\nAsk questions β get answers with KG + RAG context") | |
| else: | |
| # Show which doc is loaded | |
| st.success(f"Active document: **{st.session_state.get('current_doc', 'Unknown')}**") | |
| # Load chain (cached) | |
| try: | |
| if "chain" not in st.session_state: | |
| with st.spinner("Loading RAG chain..."): | |
| st.session_state.chain = load_chain() | |
| chain = st.session_state.chain | |
| except Exception as e: | |
| st.error(f"Failed to load chain: {e}") | |
| st.stop() | |
| # ββ Chat history ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| for msg in st.session_state.messages: | |
| with st.chat_message(msg["role"]): | |
| st.markdown(msg["content"]) | |
| if msg.get("kg_facts") and show_kg: | |
| with st.expander("Knowledge Graph Facts"): | |
| st.code(msg["kg_facts"]) | |
| if msg.get("sources") and show_chunks: | |
| with st.expander("Source Chunks"): | |
| for s in msg["sources"]: | |
| st.markdown(f"**Page {s['page']}:** {s['snippet']}...") | |
| # Chat input | |
| if question := st.chat_input("Ask anything about your document..."): | |
| st.session_state.messages.append({"role": "user", "content": question}) | |
| with st.chat_message("user"): | |
| st.markdown(question) | |
| with st.chat_message("assistant"): | |
| with st.spinner("Retrieving from ChromaDB + Neo4j β asking LLM..."): | |
| try: | |
| result = chain.ask(question) | |
| answer = result["answer"] | |
| st.markdown(answer) | |
| if result["kg_facts"] and show_kg: | |
| with st.expander("Knowledge Graph Facts used"): | |
| st.code(result["kg_facts"]) | |
| if show_chunks: | |
| with st.expander("Source Chunks"): | |
| for s in result["sources"]: | |
| st.markdown(f"**Page {s['page']}:** {s['snippet']}...") | |
| st.session_state.messages.append({ | |
| "role": "assistant", | |
| "content": answer, | |
| "kg_facts": result["kg_facts"], | |
| "sources": result["sources"] | |
| }) | |
| except Exception as e: | |
| st.error(f"Error: {e}") |