Spaces:

sofzcc
/

Full_RAG_Assistant

Sleeping

App Files Files Community

sofzcc commited on Dec 2, 2025

Commit

456f6e2

verified ·

1 Parent(s): 2acce8f

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -22

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import glob
 import yaml
 from typing import List, Tuple
 import faiss
@@ -37,7 +38,7 @@ def get_default_config():
             "index_directory": "./index",
         },
         "models": {
-            "embedding": "all-MiniLM-L6-v2",
             "qa": "deepset/roberta-base-squad2",
         },
         "chunking": {
@@ -237,6 +238,8 @@ class RAGIndex:
             print("⚠️ No documents found in knowledge base")
             print(f"   Please add .txt, .md, .pdf, or .docx files to: {KB_DIR}")
             self.index = None
             return
         all_chunks: List[str] = []
@@ -251,6 +254,8 @@ class RAGIndex:
         if not all_chunks:
             print("⚠️ No valid chunks created from documents")
             self.index = None
             return
         print(f"Created {len(all_chunks)} chunks from {len(docs)} documents")
@@ -299,7 +304,10 @@ class RAGIndex:
         try:
             q_emb = self.embedder.encode([query], convert_to_numpy=True)
             faiss.normalize_L2(q_emb)
-            scores, idxs = self.index.search(q_emb, min(top_k, len(self.chunks)))
             results: List[Tuple[str, str, float]] = []
             for score, idx in zip(scores[0], idxs[0]):
@@ -325,7 +333,7 @@ class RAGIndex:
         if not question or not question.strip():
             return "Please ask a question."
-        if self.index is None:
             return (
                 f"📚 Knowledge base is empty.\n\n"
                 f"Please add documents to: `{KB_DIR}`\n"
@@ -390,24 +398,77 @@ print("=" * 50)
 # -----------------------------
-# GRADIO CHAT
 # -----------------------------
 def rag_respond(message, history):
-    """Handle chat messages"""
     if not message or not str(message).strip():
-        return "Please enter a question."
-    return rag_index.answer(str(message))
-# Build interface
 description = WELCOME_MSG
-if not rag_index.initialized or rag_index.index is None:
     description += (
-        f"\n\n⚠️ **Note:** Knowledge base is empty. "
-        f"Add documents to `{KB_DIR}` and restart."
     )
 examples = [
@@ -415,27 +476,85 @@ examples = [
     for qa in CONFIG.get("quick_actions", [])
     if qa.get("query")
 ]
-if not examples and rag_index.initialized and rag_index.index is not None:
     examples = [
         "What is this document about?",
         "Can you summarize the main points?",
         "What are the key findings?",
     ]
-chat = gr.ChatInterface(
-    fn=rag_respond,
-    title=CONFIG["client"]["name"],
-    description=description,
-    examples=examples if examples else None,
-)
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
-    chat.launch(
         server_name="0.0.0.0",
         server_port=port,
         share=False,
-    )

 import os
 import glob
 import yaml
+import shutil
 from typing import List, Tuple
 import faiss
             "index_directory": "./index",
         },
         "models": {
+            "embedding": "sentence-transformers/all-MiniLM-L6-v2",
             "qa": "deepset/roberta-base-squad2",
         },
         "chunking": {
             print("⚠️ No documents found in knowledge base")
             print(f"   Please add .txt, .md, .pdf, or .docx files to: {KB_DIR}")
             self.index = None
+            self.chunks = []
+            self.chunk_sources = []
             return
         all_chunks: List[str] = []
         if not all_chunks:
             print("⚠️ No valid chunks created from documents")
             self.index = None
+            self.chunks = []
+            self.chunk_sources = []
             return
         print(f"Created {len(all_chunks)} chunks from {len(docs)} documents")
         try:
             q_emb = self.embedder.encode([query], convert_to_numpy=True)
             faiss.normalize_L2(q_emb)
+            k = min(top_k, len(self.chunks)) if self.chunks else 0
+            if k == 0:
+                return []
+            scores, idxs = self.index.search(q_emb, k)
             results: List[Tuple[str, str, float]] = []
             for score, idx in zip(scores[0], idxs[0]):
         if not question or not question.strip():
             return "Please ask a question."
+        if self.index is None or not self.chunks:
             return (
                 f"📚 Knowledge base is empty.\n\n"
                 f"Please add documents to: `{KB_DIR}`\n"
 # -----------------------------
+# GRADIO APP (BLOCKS)
 # -----------------------------
 def rag_respond(message, history):
+    """Handle chat messages for chatbot UI"""
     if not message or not str(message).strip():
+        return "", history
+    user_msg = str(message)
+    bot_reply = rag_index.answer(user_msg)
+    history = history + [[user_msg, bot_reply]]
+    return "", history
+def upload_to_kb(files):
+    """Save uploaded files into the KB directory"""
+    if not files:
+        return "No files uploaded."
+    if not isinstance(files, list):
+        files = [files]
+    os.makedirs(KB_DIR, exist_ok=True)
+    saved_files = []
+    for f in files:
+        # Gradio File object or temp file path
+        # In older Gradio, `f.name` is the temp file path
+        src_path = getattr(f, "name", None) or str(f)
+        if not os.path.exists(src_path):
+            continue
+        filename = os.path.basename(src_path)
+        dest_path = os.path.join(KB_DIR, filename)
+        try:
+            shutil.copy(src_path, dest_path)
+            saved_files.append(filename)
+        except Exception as e:
+            print(f"Error saving file {filename}: {e}")
+    if not saved_files:
+        return "No files could be saved. Check logs."
+    return (
+        f"✅ Saved {len(saved_files)} file(s) to knowledge base:\n- "
+        + "\n- ".join(saved_files)
+        + "\n\nClick **Rebuild index** to include them in search."
+    )
+def rebuild_index():
+    """Trigger index rebuild from UI"""
+    rag_index._build_or_load_index()
+    if rag_index.index is None or not rag_index.chunks:
+        return (
+            "⚠️ Index rebuild finished, but no documents or chunks were found.\n"
+            f"Add files to `{KB_DIR}` and try again."
+        )
+    return (
+        f"✅ Index rebuilt successfully.\n"
+        f"Chunks in index: {len(rag_index.chunks)}"
+    )
+# Description + examples
 description = WELCOME_MSG
+if not rag_index.initialized or rag_index.index is None or not rag_index.chunks:
     description += (
+        f"\n\n⚠️ **Note:** Knowledge base is currently empty or index is not built.\n"
+        f"Upload documents in the **Knowledge Base** tab and click **Rebuild index**."
     )
 examples = [
     for qa in CONFIG.get("quick_actions", [])
     if qa.get("query")
 ]
+if not examples and rag_index.initialized and rag_index.index is not None and rag_index.chunks:
     examples = [
         "What is this document about?",
         "Can you summarize the main points?",
         "What are the key findings?",
     ]
+with gr.Blocks(title=CONFIG["client"]["name"]) as demo:
+    gr.Markdown(f"# {CONFIG['client']['name']}")
+    gr.Markdown(description)
+    with gr.Tab("Chat"):
+        chatbot = gr.Chatbot(label="RAG Chat")
+        with gr.Row():
+            txt = gr.Textbox(
+                show_label=False,
+                placeholder="Ask a question about your documents...",
+                lines=2,
+            )
+        with gr.Row():
+            send_btn = gr.Button("Send")
+            clear_btn = gr.Button("Clear")
+        # Pre-fill example buttons if available
+        if examples:
+            gr.Markdown("### Example questions")
+            example_btns = []
+            with gr.Row():
+                for ex in examples:
+                    example_btns.append(gr.Button(ex))
+            def use_example(example, history):
+                """When clicking an example, send it as a message"""
+                bot_reply = rag_index.answer(example)
+                history = history + [[example, bot_reply]]
+                return history
+            for btn, ex in zip(example_btns, examples):
+                btn.click(
+                    use_example,
+                    inputs=[gr.State(ex), chatbot],
+                    outputs=chatbot,
+                )
+        # Chat logic wiring
+        txt.submit(rag_respond, [txt, chatbot], [txt, chatbot])
+        send_btn.click(rag_respond, [txt, chatbot], [txt, chatbot])
+        clear_btn.click(lambda: ([], ""), None, [chatbot, txt])
+    with gr.Tab("Knowledge Base"):
+        gr.Markdown(
+            f"""
+### Manage Knowledge Base
+- Supported formats: `.txt`, `.md`, `.pdf`, `.docx`, `.doc`
+- Files are stored in: `{KB_DIR}`
+- After uploading, click **Rebuild index** so the assistant can use the new content.
+"""
+        )
+        kb_upload = gr.File(
+            label="Upload documents",
+            file_count="multiple",
+        )
+        kb_status = gr.Textbox(
+            label="Status",
+            lines=6,
+            interactive=False,
+        )
+        rebuild_btn = gr.Button("Rebuild index")
+        kb_upload.change(upload_to_kb, inputs=kb_upload, outputs=kb_status)
+        rebuild_btn.click(rebuild_index, inputs=None, outputs=kb_status)
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
+    demo.launch(
         server_name="0.0.0.0",
         server_port=port,
         share=False,
+    )