Spaces:

sofzcc
/

Full_RAG_Assistant

Sleeping

App Files Files Community

sofzcc commited on Dec 2, 2025

Commit

27759ba

verified ·

1 Parent(s): 93b82c2

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -48

app.py CHANGED Viewed

@@ -38,16 +38,19 @@ def get_default_config():
             "index_directory": "./index",
         },
         "models": {
-            # You can also use "all-MiniLM-L6-v2" here, but this path works well on HF
             "embedding": "sentence-transformers/all-MiniLM-L6-v2",
-            "qa": "deepset/roberta-base-squad2",
         },
         "chunking": {
-            "chunk_size": 500,
-            "overlap": 50,
         },
         "thresholds": {
-            "similarity": 0.3,
         },
         "messages": {
             "welcome": "Ask me anything about the documents in the knowledge base!",
@@ -178,7 +181,7 @@ def load_kb_documents(kb_dir: str) -> List[Tuple[str, str]]:
 class RAGIndex:
     def __init__(self):
         self.embedder = None
-        self.qa_pipeline = None
         self.chunks: List[str] = []
         self.chunk_sources: List[str] = []
         self.index = None
@@ -200,12 +203,12 @@ class RAGIndex:
             print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}")
             self.embedder = SentenceTransformer(EMBEDDING_MODEL_NAME)
-            print(f"Loading QA model: {QA_MODEL_NAME}")
             self.qa_pipeline = pipeline(
-                "question-answering",
-                model=AutoModelForQuestionAnswering.from_pretrained(QA_MODEL_NAME),
-                tokenizer=AutoTokenizer.from_pretrained(QA_MODEL_NAME),
-                handle_impossible_answer=True,
             )
         except Exception as e:
             print(f"Error loading models: {e}")
@@ -327,7 +330,7 @@ class RAGIndex:
             return []
     def answer(self, question: str) -> str:
-        """Answer a question using RAG"""
         if not self.initialized:
             return "❌ Assistant not properly initialized. Please check the logs."
@@ -350,45 +353,51 @@ class RAGIndex:
                 f"💡 Try rephrasing your question or check if relevant documents exist in the knowledge base."
             )
-        # Try to extract answer from each context
-        answers = []
-        for ctx, source, score in contexts:
-            # Truncate context if too long (max 512 tokens for most QA models)
-            max_context_length = 2000  # characters, roughly 512 tokens
-            truncated_ctx = ctx[:max_context_length]
-            qa_input = {"question": question, "context": truncated_ctx}
-            try:
-                result = self.qa_pipeline(qa_input)
-                answer_text = result.get("answer", "").strip()
-                answer_score = result.get("score", 0.0)
-                if answer_text and answer_score > 0.01:  # Minimum confidence threshold
-                    answers.append((answer_text, source, answer_score, score))
-            except Exception as e:
-                print(f"QA error on context from {source}: {e}")
-                continue
-        if not answers:
-            # Provide context even if no specific answer found
-            best_ctx, best_src, best_score = contexts[0]
-            preview = best_ctx[:300] + "..." if len(best_ctx) > 300 else best_ctx
             return (
-                f"I found relevant information but couldn't extract a specific answer.\n\n"
-                f"**Relevant context from {best_src}:**\n{preview}\n\n"
-                f"💡 Try asking a more specific question."
             )
-        # Pick best answer (weighted by both retrieval and QA scores)
-        answers.sort(key=lambda x: x[2] * x[3], reverse=True)
-        best_answer, best_source, qa_score, retrieval_score = answers[0]
         return (
-            f"**Answer:** {best_answer}\n\n"
-            f"**Source:** {best_source}\n"
-            f"**Confidence:** {qa_score:.2%}"
         )
@@ -482,7 +491,7 @@ def rebuild_index():
     )
-# Description + (optional) examples
 description = WELCOME_MSG
 if not rag_index.initialized or rag_index.index is None or not rag_index.chunks:
     description += (
@@ -497,9 +506,9 @@ examples = [
 ]
 if not examples and rag_index.initialized and rag_index.index is not None and rag_index.chunks:
     examples = [
-        "What is this document about?",
-        "Can you summarize the main points?",
-        "What are the key findings?",
     ]

             "index_directory": "./index",
         },
         "models": {
+            # Embedding model for FAISS
             "embedding": "sentence-transformers/all-MiniLM-L6-v2",
+            # Abstractive generation model (can upgrade to flan-t5-base if resources allow)
+            "qa": "google/flan-t5-small",
         },
         "chunking": {
+            # Larger chunks -> better conceptual coverage
+            "chunk_size": 1200,
+            "overlap": 200,
         },
         "thresholds": {
+            # More permissive to not miss relevant chunks
+            "similarity": 0.1,
         },
         "messages": {
             "welcome": "Ask me anything about the documents in the knowledge base!",
 class RAGIndex:
     def __init__(self):
         self.embedder = None
+        self.qa_pipeline = None  # now a generative pipeline
         self.chunks: List[str] = []
         self.chunk_sources: List[str] = []
         self.index = None
             print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}")
             self.embedder = SentenceTransformer(EMBEDDING_MODEL_NAME)
+            print(f"Loading QA (generation) model: {QA_MODEL_NAME}")
+            # Abstractive generation pipeline (Flan-T5)
             self.qa_pipeline = pipeline(
+                "text2text-generation",
+                model=QA_MODEL_NAME,
+                tokenizer=QA_MODEL_NAME,
             )
         except Exception as e:
             print(f"Error loading models: {e}")
             return []
     def answer(self, question: str) -> str:
+        """Answer a question using RAG + abstractive generation"""
         if not self.initialized:
             return "❌ Assistant not properly initialized. Please check the logs."
                 f"💡 Try rephrasing your question or check if relevant documents exist in the knowledge base."
             )
+        # Combine contexts into a single block and track sources
+        combined_context = []
+        used_sources = set()
+        for ctx, source, score in contexts:
+            used_sources.add(source)
+            combined_context.append(f"[Source: {source}]\n{ctx}")
+        combined_text = "\n\n".join(combined_context)
+        # Limit context length to keep it manageable for the model
+        max_context_chars = 4000
+        if len(combined_text) > max_context_chars:
+            combined_text = combined_text[:max_context_chars]
+        # Prompt for the generative model
+        prompt = (
+            "You are an AI assistant that answers questions using only the provided context. "
+            "If the answer cannot be found in the context, reply exactly with: "
+            "\"I don't know based on the provided documents.\"\n\n"
+            f"Context:\n{combined_text}\n\n"
+            f"Question: {question}\n\n"
+            "Answer:"
+        )
+        try:
+            result = self.qa_pipeline(
+                prompt,
+                max_new_tokens=256,
+                do_sample=False,
+            )
+            # text2text-generation returns list of dicts with 'generated_text'
+            answer_text = result[0]["generated_text"].strip()
+        except Exception as e:
+            print(f"Generation error: {e}")
             return (
+                "There was an error while generating the answer. "
+                "Please try again with a shorter question or different wording."
             )
+        sources_str = ", ".join(sorted(used_sources)) if used_sources else "N/A"
         return (
+            f"**Answer:** {answer_text}\n\n"
+            f"**Sources:** {sources_str}"
         )
     )
+# Description + optional examples
 description = WELCOME_MSG
 if not rag_index.initialized or rag_index.index is None or not rag_index.chunks:
     description += (
 ]
 if not examples and rag_index.initialized and rag_index.index is not None and rag_index.chunks:
     examples = [
+        "What is a knowledge base?",
+        "What are best practices for maintaining a KB?",
+        "How should I structure knowledge base articles?",
     ]