Spaces:

sofzcc
/

Full_RAG_Assistant

Sleeping

App Files Files Community

sofzcc commited on Dec 2, 2025

Commit

7494e47

verified ·

1 Parent(s): 2d28f5c

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -102

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
-import re
 import os
 import glob
 import yaml
 import shutil
 from typing import List, Tuple
 import faiss
@@ -35,22 +35,18 @@ def get_default_config():
     """Provide default configuration"""
     return {
         "kb": {
-            "directory": "./knowledge_base",
             "index_directory": "./index",
         },
         "models": {
-            # Embedding model for FAISS
             "embedding": "sentence-transformers/all-MiniLM-L6-v2",
-            # Abstractive generation model
             "qa": "google/flan-t5-small",
         },
         "chunking": {
-            # Larger chunks -> better conceptual coverage
             "chunk_size": 1200,
             "overlap": 200,
         },
         "thresholds": {
-            # More permissive to not miss relevant chunks
             "similarity": 0.1,
         },
         "messages": {
@@ -69,7 +65,7 @@ CONFIG = load_config()
 KB_DIR = CONFIG["kb"]["directory"]
 INDEX_DIR = CONFIG["kb"]["index_directory"]
 EMBEDDING_MODEL_NAME = CONFIG["models"]["embedding"]
-QA_MODEL_NAME = CONFIG["models"]["qa"]
 CHUNK_SIZE = CONFIG["chunking"]["chunk_size"]
 CHUNK_OVERLAP = CONFIG["chunking"]["overlap"]
 SIM_THRESHOLD = CONFIG["thresholds"]["similarity"]
@@ -103,45 +99,7 @@ def chunk_text(text: str, chunk_size: int, overlap: int) -> List[str]:
         start += chunk_size - overlap
     return chunks
-def clean_context_text(text: str) -> str:
-    """
-    Clean raw document context before sending to the generator:
-    - Remove markdown headings (#, ##, ###)
-    - Remove list markers (1., 2), -, *)
-    - Remove duplicate lines
-    """
-    lines = text.splitlines()
-    cleaned = []
-    seen = set()
-    for line in lines:
-        l = line.strip()
-        if not l:
-            continue
-        # Remove markdown headings like "# 1. Title", "## Section"
-        l = re.sub(r"^#+\s*", "", l)
-        # Remove ordered list prefixes like "1. ", "2) "
-        l = re.sub(r"^\d+[\.\)]\s*", "", l)
-        # Remove bullet markers like "- ", "* "
-        l = re.sub(r"^[-*]\s*", "", l)
-        # Skip very short "noise" lines
-        if len(l) < 5:
-            continue
-        # Avoid exact duplicates
-        if l in seen:
-            continue
-        seen.add(l)
-        cleaned.append(l)
-    return "\n".join(cleaned)
 def load_file_text(path: str) -> str:
     """Load text from various file formats with error handling"""
@@ -213,6 +171,45 @@ def load_kb_documents(kb_dir: str) -> List[Tuple[str, str]]:
     return docs
 # -----------------------------
 # KB INDEX (FAISS)
 # -----------------------------
@@ -365,10 +362,10 @@ class RAGIndex:
             print(f"Retrieval error: {e}")
             return []
-    def _generate_from_context(self, prompt: str) -> str:
         """Run Flan-T5 on the given prompt and return the decoded answer."""
         if self.qa_model is None or self.qa_tokenizer is None:
-            return "Model not loaded."
         inputs = self.qa_tokenizer(
             prompt,
@@ -377,23 +374,21 @@ class RAGIndex:
             max_length=768,
         )
-        output_ids = self.qa_model.generate(
             **inputs,
-            max_new_tokens=128,
             do_sample=False,
-            top_p=0.9,
-            temperature=0.7,
         )
         answer = self.qa_tokenizer.decode(
-            output_ids[0],
             skip_special_tokens=True,
         ).strip()
         return answer
     def answer(self, question: str) -> str:
-        """Answer a question using RAG + abstractive generation"""
         if not self.initialized:
             return "❌ Assistant not properly initialized. Please check the logs."
@@ -407,7 +402,7 @@ class RAGIndex:
                 f"Supported formats: .txt, .md, .pdf, .docx"
             )
-        # Retrieve relevant contexts
         contexts = self.retrieve(question, top_k=3)
         if not contexts:
@@ -416,62 +411,55 @@ class RAGIndex:
                 f"💡 Try rephrasing your question or check if relevant documents exist in the knowledge base."
             )
-        # Combine contexts into a single block and track sources
-        combined_context = []
         used_sources = set()
         for ctx, source, score in contexts:
             used_sources.add(source)
-            # Only include the pure text as context
-            combined_context.append(ctx)
-        combined_text = "\n\n".join(combined_context)
-        # STEP 1 — Summarize each chunk individually
-        summaries = []
-        for ctx in combined_context:
-            prompt_summary = (
-                "Summarize the following text in one concise sentence, keeping only the core idea:\n\n"
-                f"{ctx}\n\nSummary:"
             )
-            inputs = self.qa_tokenizer(prompt_summary, return_tensors="pt", truncation=True).to(self.qa_model.device)
-            output = self.qa_model.generate(
-                **inputs,
-                max_new_tokens=64,
-                do_sample=False
             )
-            summary_text = self.qa_tokenizer.decode(output[0], skip_special_tokens=True).strip()
-            summaries.append(summary_text)
-        # STEP 2 — Combine all summaries into a clean evidence pool
         evidence = " ".join(summaries)
-        # STEP 3 — Ask model to answer based on summaries only
-        prompt_answer = (
             "You are an AI assistant that answers questions using only the summarized evidence below.\n"
-            "Write a clear and complete answer in 1–3 sentences.\n"
-            "Do NOT repeat numbers, headings, markdown, or irrelevant text.\n"
-            "Do NOT say where the information came from.\n"
-            "If the answer cannot be found in the evidence, reply:\n"
             "\"I don't know based on the provided documents.\"\n\n"
             f"Evidence:\n{evidence}\n\n"
             f"Question: {question}\n\n"
             "Answer:"
         )
-        inputs = self.qa_tokenizer(prompt_answer, return_tensors="pt", truncation=True).to(self.qa_model.device)
-        output = self.qa_model.generate(
-            **inputs,
-            max_new_tokens=128,
-            do_sample=False
-        )
-        answer_text = self.qa_tokenizer.decode(output[0], skip_special_tokens=True).strip()
         try:
-            answer_text = self._generate_from_context(prompt)
         except Exception as e:
             print(f"Generation error: {e}")
             return (
@@ -503,27 +491,22 @@ def rag_respond(message, history):
         history = []
     if not message or not str(message).strip():
-        # Keep history unchanged, just clear input
         return "", history
     user_msg = str(message)
-    # Append user message
     history.append({
         "role": "user",
         "content": user_msg,
     })
-    # Get bot reply
     bot_reply = rag_index.answer(user_msg)
-    # Append assistant message
     history.append({
         "role": "assistant",
         "content": bot_reply,
     })
-    # Clear textbox, return updated history
     return "", history
@@ -539,7 +522,6 @@ def upload_to_kb(files):
     saved_files = []
     for f in files:
-        # Gradio File object or temp file path
         src_path = getattr(f, "name", None) or str(f)
         if not os.path.exists(src_path):
             continue
@@ -603,20 +585,19 @@ with gr.Blocks(title=CONFIG["client"]["name"]) as demo:
     gr.Markdown(description)
     with gr.Tab("Chat"):
-        chatbot = gr.Chatbot(label="RAG Chat")  # messages-format by default
         with gr.Row():
             txt = gr.Textbox(
                 show_label=False,
                 placeholder="Ask a question about your documents and press Enter to send...",
-                lines=1,
             )
         with gr.Row():
             send_btn = gr.Button("Send")
             clear_btn = gr.Button("Clear")
-        # Enter submits, Send button also submits
         txt.submit(rag_respond, [txt, chatbot], [txt, chatbot])
         send_btn.click(rag_respond, [txt, chatbot], [txt, chatbot])
         clear_btn.click(lambda: ([], ""), None, [chatbot, txt])

 import os
 import glob
 import yaml
 import shutil
+import re
 from typing import List, Tuple
 import faiss
     """Provide default configuration"""
     return {
         "kb": {
+            "directory": "./knowledge_base",   # can be overridden in config.yaml (e.g., ./kb)
             "index_directory": "./index",
         },
         "models": {
             "embedding": "sentence-transformers/all-MiniLM-L6-v2",
             "qa": "google/flan-t5-small",
         },
         "chunking": {
             "chunk_size": 1200,
             "overlap": 200,
         },
         "thresholds": {
             "similarity": 0.1,
         },
         "messages": {
 KB_DIR = CONFIG["kb"]["directory"]
 INDEX_DIR = CONFIG["kb"]["index_directory"]
 EMBEDDING_MODEL_NAME = CONFIG["models"]["embedding"]
+QA_MODEL_NAME = CONFIG["models"].get("qa", "google/flan-t5-small")
 CHUNK_SIZE = CONFIG["chunking"]["chunk_size"]
 CHUNK_OVERLAP = CONFIG["chunking"]["overlap"]
 SIM_THRESHOLD = CONFIG["thresholds"]["similarity"]
         start += chunk_size - overlap
     return chunks
 def load_file_text(path: str) -> str:
     """Load text from various file formats with error handling"""
     return docs
+def clean_context_text(text: str) -> str:
+    """
+    Clean raw document context before sending to the generator:
+    - Remove markdown headings (#, ##, ###)
+    - Remove list markers (1., 2), -, *)
+    - Remove duplicate lines
+    """
+    lines = text.splitlines()
+    cleaned = []
+    seen = set()
+    for line in lines:
+        l = line.strip()
+        if not l:
+            continue
+        # Remove markdown headings like "# 1. Title", "## Section"
+        l = re.sub(r"^#+\s*", "", l)
+        # Remove ordered list prefixes like "1. ", "2) "
+        l = re.sub(r"^\d+[\.\)]\s*", "", l)
+        # Remove bullet markers like "- ", "* "
+        l = re.sub(r"^[-*]\s*", "", l)
+        # Skip very short "noise" lines
+        if len(l) < 5:
+            continue
+        # Avoid exact duplicates
+        if l in seen:
+            continue
+        seen.add(l)
+        cleaned.append(l)
+    return "\n".join(cleaned)
 # -----------------------------
 # KB INDEX (FAISS)
 # -----------------------------
             print(f"Retrieval error: {e}")
             return []
+    def _generate_from_context(self, prompt: str, max_new_tokens: int = 128) -> str:
         """Run Flan-T5 on the given prompt and return the decoded answer."""
         if self.qa_model is None or self.qa_tokenizer is None:
+            raise RuntimeError("QA model not loaded.")
         inputs = self.qa_tokenizer(
             prompt,
             max_length=768,
         )
+        outputs = self.qa_model.generate(
             **inputs,
+            max_new_tokens=max_new_tokens,
             do_sample=False,
         )
         answer = self.qa_tokenizer.decode(
+            outputs[0],
             skip_special_tokens=True,
         ).strip()
         return answer
     def answer(self, question: str) -> str:
+        """Answer a question using RAG + two-step summarization + generation."""
         if not self.initialized:
             return "❌ Assistant not properly initialized. Please check the logs."
                 f"Supported formats: .txt, .md, .pdf, .docx"
             )
+        # 1) Retrieve relevant contexts
         contexts = self.retrieve(question, top_k=3)
         if not contexts:
                 f"💡 Try rephrasing your question or check if relevant documents exist in the knowledge base."
             )
         used_sources = set()
+        # 2) Summarize each retrieved chunk into 1 sentence
+        summaries = []
         for ctx, source, score in contexts:
             used_sources.add(source)
+            cleaned_ctx = clean_context_text(ctx)
+            if not cleaned_ctx.strip():
+                continue
+            summary_prompt = (
+                "Summarize the following text in ONE concise sentence, keeping only the main idea. "
+                "Do not include headings, numbers, or bullet markers.\n\n"
+                f"{cleaned_ctx}\n\n"
+                "Summary:"
             )
+            try:
+                summary = self._generate_from_context(summary_prompt, max_new_tokens=64)
+                summaries.append(summary)
+            except Exception as e:
+                print(f"Summary generation error: {e}")
+                continue
+        if not summaries:
+            return (
+                f"{NO_ANSWER_MSG}\n\n"
+                f"💡 Try rephrasing your question or adding more detailed documents to the knowledge base."
             )
+        # 3) Combine summaries into an evidence pool
         evidence = " ".join(summaries)
+        # 4) Ask the model to answer using only the summaries
+        answer_prompt = (
             "You are an AI assistant that answers questions using only the summarized evidence below.\n"
+            "Write a clear, helpful answer in 1–3 sentences, in your own words.\n"
+            "- Do NOT include headings, section numbers, markdown, or bullet symbols.\n"
+            "- Do NOT mention file names or sources in the answer.\n"
+            "- If the answer cannot be found in the evidence, reply exactly: "
             "\"I don't know based on the provided documents.\"\n\n"
             f"Evidence:\n{evidence}\n\n"
             f"Question: {question}\n\n"
             "Answer:"
         )
         try:
+            answer_text = self._generate_from_context(answer_prompt, max_new_tokens=128)
         except Exception as e:
             print(f"Generation error: {e}")
             return (
         history = []
     if not message or not str(message).strip():
         return "", history
     user_msg = str(message)
     history.append({
         "role": "user",
         "content": user_msg,
     })
     bot_reply = rag_index.answer(user_msg)
     history.append({
         "role": "assistant",
         "content": bot_reply,
     })
     return "", history
     saved_files = []
     for f in files:
         src_path = getattr(f, "name", None) or str(f)
         if not os.path.exists(src_path):
             continue
     gr.Markdown(description)
     with gr.Tab("Chat"):
+        chatbot = gr.Chatbot(label="RAG Chat")
         with gr.Row():
             txt = gr.Textbox(
                 show_label=False,
                 placeholder="Ask a question about your documents and press Enter to send...",
+                lines=1,  # single line so Enter submits
             )
         with gr.Row():
             send_btn = gr.Button("Send")
             clear_btn = gr.Button("Clear")
         txt.submit(rag_respond, [txt, chatbot], [txt, chatbot])
         send_btn.click(rag_respond, [txt, chatbot], [txt, chatbot])
         clear_btn.click(lambda: ([], ""), None, [chatbot, txt])