Spaces:

sofzcc
/

Full_RAG_Assistant

Sleeping

App Files Files Community

sofzcc commited on Dec 2, 2025

Commit

4a4bfce

verified ·

1 Parent(s): dd1add7

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -69

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import glob
 import yaml
-from typing import List, Tuple, Optional
 import faiss
 import numpy as np
@@ -34,27 +34,27 @@ def get_default_config():
     return {
         "kb": {
             "directory": "./knowledge_base",
-            "index_directory": "./index"
         },
         "models": {
             "embedding": "all-MiniLM-L6-v2",
-            "qa": "deepset/roberta-base-squad2"
         },
         "chunking": {
             "chunk_size": 500,
-            "overlap": 50
         },
         "thresholds": {
-            "similarity": 0.3
         },
         "messages": {
             "welcome": "Ask me anything about the documents in the knowledge base!",
-            "no_answer": "I couldn't find a relevant answer in the knowledge base."
         },
         "client": {
-            "name": "RAG AI Assistant"
         },
-        "quick_actions": []
     }
@@ -79,23 +79,23 @@ def chunk_text(text: str, chunk_size: int, overlap: int) -> List[str]:
     """Split text into overlapping chunks"""
     if not text or not text.strip():
         return []
     chunks = []
     start = 0
     text_len = len(text)
     while start < text_len:
         end = min(start + chunk_size, text_len)
         chunk = text[start:end].strip()
         if chunk and len(chunk) > 20:  # Avoid tiny chunks
             chunks.append(chunk)
         if end >= text_len:
             break
         start += chunk_size - overlap
     return chunks
@@ -103,9 +103,9 @@ def load_file_text(path: str) -> str:
     """Load text from various file formats with error handling"""
     if not os.path.exists(path):
         raise FileNotFoundError(f"File not found: {path}")
     ext = os.path.splitext(path)[1].lower()
     try:
         if ext == ".pdf":
             reader = PdfReader(path)
@@ -115,15 +115,15 @@ def load_file_text(path: str) -> str:
                 if page_text:
                     text_parts.append(page_text)
             return "\n".join(text_parts)
         elif ext in [".docx", ".doc"]:
             doc = docx.Document(path)
             return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
         else:  # .txt, .md, etc.
             with open(path, "r", encoding="utf-8", errors="ignore") as f:
                 return f.read()
     except Exception as e:
         print(f"Error reading {path}: {e}")
         raise
@@ -131,30 +131,30 @@ def load_file_text(path: str) -> str:
 def load_kb_documents(kb_dir: str) -> List[Tuple[str, str]]:
     """Load all documents from knowledge base directory"""
-    docs = []
     if not os.path.exists(kb_dir):
         print(f"⚠️ Knowledge base directory not found: {kb_dir}")
         print(f"Creating directory: {kb_dir}")
         os.makedirs(kb_dir, exist_ok=True)
         return docs
     if not os.path.isdir(kb_dir):
         print(f"⚠️ {kb_dir} is not a directory")
         return docs
     # Support multiple file formats
     patterns = ["*.txt", "*.md", "*.pdf", "*.docx", "*.doc"]
     paths = []
     for pattern in patterns:
         paths.extend(glob.glob(os.path.join(kb_dir, pattern)))
     if not paths:
         print(f"⚠️ No documents found in {kb_dir}")
         return docs
     print(f"Found {len(paths)} documents in knowledge base")
     for path in paths:
         try:
             text = load_file_text(path)
@@ -165,7 +165,7 @@ def load_kb_documents(kb_dir: str) -> List[Tuple[str, str]]:
                 print(f"⚠️ Empty file: {os.path.basename(path)}")
         except Exception as e:
             print(f"✗ Could not read {path}: {e}")
     return docs
@@ -181,7 +181,7 @@ class RAGIndex:
         self.chunk_sources: List[str] = []
         self.index = None
         self.initialized = False
         try:
             print("🔄 Initializing RAG Assistant...")
             self._initialize_models()
@@ -197,7 +197,7 @@ class RAGIndex:
         try:
             print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}")
             self.embedder = SentenceTransformer(EMBEDDING_MODEL_NAME)
             print(f"Loading QA model: {QA_MODEL_NAME}")
             self.qa_pipeline = pipeline(
                 "question-answering",
@@ -232,16 +232,16 @@ class RAGIndex:
         # Build new index
         print("Building new FAISS index from knowledge base...")
         docs = load_kb_documents(KB_DIR)
         if not docs:
             print("⚠️ No documents found in knowledge base")
             print(f"   Please add .txt, .md, .pdf, or .docx files to: {KB_DIR}")
             self.index = None
             return
-        all_chunks = []
-        all_sources = []
         for source, text in docs:
             chunks = chunk_text(text, CHUNK_SIZE, CHUNK_OVERLAP)
             for chunk in chunks:
@@ -255,14 +255,14 @@ class RAGIndex:
         print(f"Created {len(all_chunks)} chunks from {len(docs)} documents")
         print("Generating embeddings...")
         embeddings = self.embedder.encode(
-            all_chunks,
-            show_progress_bar=True,
             convert_to_numpy=True,
-            batch_size=32
         )
         dimension = embeddings.shape[1]
         index = faiss.IndexFlatIP(dimension)
@@ -273,10 +273,13 @@ class RAGIndex:
         # Save index
         try:
             faiss.write_index(index, idx_path)
-            np.save(meta_path, {
-                "chunks": np.array(all_chunks, dtype=object),
-                "sources": np.array(all_sources, dtype=object)
-            })
             print("✓ Index saved successfully")
         except Exception as e:
             print(f"⚠️ Could not save index: {e}")
@@ -289,25 +292,27 @@ class RAGIndex:
         """Retrieve relevant chunks for a query"""
         if not query or not query.strip():
             return []
         if self.index is None or not self.initialized:
             return []
         try:
             q_emb = self.embedder.encode([query], convert_to_numpy=True)
             faiss.normalize_L2(q_emb)
             scores, idxs = self.index.search(q_emb, min(top_k, len(self.chunks)))
-            results = []
             for score, idx in zip(scores[0], idxs[0]):
                 if idx == -1 or idx >= len(self.chunks):
                     continue
                 if score < SIM_THRESHOLD:
                     continue
-                results.append((self.chunks[idx], self.chunk_sources[idx], float(score)))
             return results
         except Exception as e:
             print(f"Retrieval error: {e}")
             return []
@@ -316,20 +321,20 @@ class RAGIndex:
         """Answer a question using RAG"""
         if not self.initialized:
             return "❌ Assistant not properly initialized. Please check the logs."
         if not question or not question.strip():
             return "Please ask a question."
         if self.index is None:
             return (
                 f"📚 Knowledge base is empty.\n\n"
                 f"Please add documents to: `{KB_DIR}`\n"
                 f"Supported formats: .txt, .md, .pdf, .docx"
             )
         # Retrieve relevant contexts
         contexts = self.retrieve(question, top_k=3)
         if not contexts:
             return (
                 f"{NO_ANSWER_MSG}\n\n"
@@ -342,17 +347,17 @@ class RAGIndex:
             # Truncate context if too long (max 512 tokens for most QA models)
             max_context_length = 2000  # characters, roughly 512 tokens
             truncated_ctx = ctx[:max_context_length]
             qa_input = {"question": question, "context": truncated_ctx}
             try:
                 result = self.qa_pipeline(qa_input)
                 answer_text = result.get("answer", "").strip()
                 answer_score = result.get("score", 0.0)
                 if answer_text and answer_score > 0.01:  # Minimum confidence threshold
                     answers.append((answer_text, source, answer_score, score))
             except Exception as e:
                 print(f"QA error on context from {source}: {e}")
                 continue
@@ -388,32 +393,39 @@ print("=" * 50)
 # GRADIO CHAT
 # -----------------------------
-def rag_respond(message: str, history):
     """Handle chat messages"""
-    if not message or not message.strip():
         return "Please enter a question."
-    return rag_index.answer(message)
 # Build interface
 description = WELCOME_MSG
 if not rag_index.initialized or rag_index.index is None:
-    description += f"\n\n⚠️ **Note:** Knowledge base is empty. Add documents to `{KB_DIR}` and restart."
-examples = [qa.get("query") for qa in CONFIG.get("quick_actions", []) if qa.get("query")]
 if not examples and rag_index.initialized and rag_index.index is not None:
     examples = [
         "What is this document about?",
         "Can you summarize the main points?",
-        "What are the key findings?"
     ]
 chat = gr.ChatInterface(
     fn=rag_respond,
     title=CONFIG["client"]["name"],
     description=description,
-    type="messages",
     examples=examples if examples else None,
     cache_examples=False,
     retry_btn="🔄 Retry",
@@ -423,8 +435,9 @@ chat = gr.ChatInterface(
 if __name__ == "__main__":
     # Launch with better settings for Hugging Face Spaces
     chat.launch(
         server_name="0.0.0.0",
-        server_port=7860,
-        share=False
-    )

 import os
 import glob
 import yaml
+from typing import List, Tuple
 import faiss
 import numpy as np
     return {
         "kb": {
             "directory": "./knowledge_base",
+            "index_directory": "./index",
         },
         "models": {
             "embedding": "all-MiniLM-L6-v2",
+            "qa": "deepset/roberta-base-squad2",
         },
         "chunking": {
             "chunk_size": 500,
+            "overlap": 50,
         },
         "thresholds": {
+            "similarity": 0.3,
         },
         "messages": {
             "welcome": "Ask me anything about the documents in the knowledge base!",
+            "no_answer": "I couldn't find a relevant answer in the knowledge base.",
         },
         "client": {
+            "name": "RAG AI Assistant",
         },
+        "quick_actions": [],
     }
     """Split text into overlapping chunks"""
     if not text or not text.strip():
         return []
     chunks = []
     start = 0
     text_len = len(text)
     while start < text_len:
         end = min(start + chunk_size, text_len)
         chunk = text[start:end].strip()
         if chunk and len(chunk) > 20:  # Avoid tiny chunks
             chunks.append(chunk)
         if end >= text_len:
             break
         start += chunk_size - overlap
     return chunks
     """Load text from various file formats with error handling"""
     if not os.path.exists(path):
         raise FileNotFoundError(f"File not found: {path}")
     ext = os.path.splitext(path)[1].lower()
     try:
         if ext == ".pdf":
             reader = PdfReader(path)
                 if page_text:
                     text_parts.append(page_text)
             return "\n".join(text_parts)
         elif ext in [".docx", ".doc"]:
             doc = docx.Document(path)
             return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
         else:  # .txt, .md, etc.
             with open(path, "r", encoding="utf-8", errors="ignore") as f:
                 return f.read()
     except Exception as e:
         print(f"Error reading {path}: {e}")
         raise
 def load_kb_documents(kb_dir: str) -> List[Tuple[str, str]]:
     """Load all documents from knowledge base directory"""
+    docs: List[Tuple[str, str]] = []
     if not os.path.exists(kb_dir):
         print(f"⚠️ Knowledge base directory not found: {kb_dir}")
         print(f"Creating directory: {kb_dir}")
         os.makedirs(kb_dir, exist_ok=True)
         return docs
     if not os.path.isdir(kb_dir):
         print(f"⚠️ {kb_dir} is not a directory")
         return docs
     # Support multiple file formats
     patterns = ["*.txt", "*.md", "*.pdf", "*.docx", "*.doc"]
     paths = []
     for pattern in patterns:
         paths.extend(glob.glob(os.path.join(kb_dir, pattern)))
     if not paths:
         print(f"⚠️ No documents found in {kb_dir}")
         return docs
     print(f"Found {len(paths)} documents in knowledge base")
     for path in paths:
         try:
             text = load_file_text(path)
                 print(f"⚠️ Empty file: {os.path.basename(path)}")
         except Exception as e:
             print(f"✗ Could not read {path}: {e}")
     return docs
         self.chunk_sources: List[str] = []
         self.index = None
         self.initialized = False
         try:
             print("🔄 Initializing RAG Assistant...")
             self._initialize_models()
         try:
             print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}")
             self.embedder = SentenceTransformer(EMBEDDING_MODEL_NAME)
             print(f"Loading QA model: {QA_MODEL_NAME}")
             self.qa_pipeline = pipeline(
                 "question-answering",
         # Build new index
         print("Building new FAISS index from knowledge base...")
         docs = load_kb_documents(KB_DIR)
         if not docs:
             print("⚠️ No documents found in knowledge base")
             print(f"   Please add .txt, .md, .pdf, or .docx files to: {KB_DIR}")
             self.index = None
             return
+        all_chunks: List[str] = []
+        all_sources: List[str] = []
         for source, text in docs:
             chunks = chunk_text(text, CHUNK_SIZE, CHUNK_OVERLAP)
             for chunk in chunks:
         print(f"Created {len(all_chunks)} chunks from {len(docs)} documents")
         print("Generating embeddings...")
         embeddings = self.embedder.encode(
+            all_chunks,
+            show_progress_bar=True,
             convert_to_numpy=True,
+            batch_size=32,
         )
         dimension = embeddings.shape[1]
         index = faiss.IndexFlatIP(dimension)
         # Save index
         try:
             faiss.write_index(index, idx_path)
+            np.save(
+                meta_path,
+                {
+                    "chunks": np.array(all_chunks, dtype=object),
+                    "sources": np.array(all_sources, dtype=object),
+                },
+            )
             print("✓ Index saved successfully")
         except Exception as e:
             print(f"⚠️ Could not save index: {e}")
         """Retrieve relevant chunks for a query"""
         if not query or not query.strip():
             return []
         if self.index is None or not self.initialized:
             return []
         try:
             q_emb = self.embedder.encode([query], convert_to_numpy=True)
             faiss.normalize_L2(q_emb)
             scores, idxs = self.index.search(q_emb, min(top_k, len(self.chunks)))
+            results: List[Tuple[str, str, float]] = []
             for score, idx in zip(scores[0], idxs[0]):
                 if idx == -1 or idx >= len(self.chunks):
                     continue
                 if score < SIM_THRESHOLD:
                     continue
+                results.append(
+                    (self.chunks[idx], self.chunk_sources[idx], float(score))
+                )
             return results
         except Exception as e:
             print(f"Retrieval error: {e}")
             return []
         """Answer a question using RAG"""
         if not self.initialized:
             return "❌ Assistant not properly initialized. Please check the logs."
         if not question or not question.strip():
             return "Please ask a question."
         if self.index is None:
             return (
                 f"📚 Knowledge base is empty.\n\n"
                 f"Please add documents to: `{KB_DIR}`\n"
                 f"Supported formats: .txt, .md, .pdf, .docx"
             )
         # Retrieve relevant contexts
         contexts = self.retrieve(question, top_k=3)
         if not contexts:
             return (
                 f"{NO_ANSWER_MSG}\n\n"
             # Truncate context if too long (max 512 tokens for most QA models)
             max_context_length = 2000  # characters, roughly 512 tokens
             truncated_ctx = ctx[:max_context_length]
             qa_input = {"question": question, "context": truncated_ctx}
             try:
                 result = self.qa_pipeline(qa_input)
                 answer_text = result.get("answer", "").strip()
                 answer_score = result.get("score", 0.0)
                 if answer_text and answer_score > 0.01:  # Minimum confidence threshold
                     answers.append((answer_text, source, answer_score, score))
             except Exception as e:
                 print(f"QA error on context from {source}: {e}")
                 continue
 # GRADIO CHAT
 # -----------------------------
+def rag_respond(message, history):
     """Handle chat messages"""
+    if not message or not str(message).strip():
         return "Please enter a question."
+    return rag_index.answer(str(message))
 # Build interface
 description = WELCOME_MSG
 if not rag_index.initialized or rag_index.index is None:
+    description += (
+        f"\n\n⚠️ **Note:** Knowledge base is empty. "
+        f"Add documents to `{KB_DIR}` and restart."
+    )
+examples = [
+    qa.get("query")
+    for qa in CONFIG.get("quick_actions", [])
+    if qa.get("query")
+]
 if not examples and rag_index.initialized and rag_index.index is not None:
     examples = [
         "What is this document about?",
         "Can you summarize the main points?",
+        "What are the key findings?",
     ]
 chat = gr.ChatInterface(
     fn=rag_respond,
     title=CONFIG["client"]["name"],
     description=description,
+    type="text",  # FIX: use text so `message` is a string
     examples=examples if examples else None,
     cache_examples=False,
     retry_btn="🔄 Retry",
 if __name__ == "__main__":
     # Launch with better settings for Hugging Face Spaces
+    port = int(os.environ.get("PORT", 7860))  # FIX: use HF port if provided
     chat.launch(
         server_name="0.0.0.0",
+        server_port=port,
+        share=False,
+    )