Spaces:

sofzcc
/

Self-Service-KB-Assistant

Sleeping

App Files Files Community

sofzcc commited on Nov 27, 2025

Commit

9670a0e

verified ·

1 Parent(s): ef49b02

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -74

app.py CHANGED Viewed

@@ -4,19 +4,17 @@ from typing import List, Tuple
 import gradio as gr
 import numpy as np
-from sentence_transformers import SentenceTransformer
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-import torch
 # -----------------------------
 # CONFIG
 # -----------------------------
 KB_DIR = "./kb"  # folder with .txt or .md files
 EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
-GEN_MODEL_NAME = "google/flan-t5-base"
 TOP_K = 3
 CHUNK_SIZE = 500  # characters
 CHUNK_OVERLAP = 100  # characters
 # -----------------------------
 # UTILITIES
@@ -151,85 +149,118 @@ class KBIndex:
 # Initialize KB index
 print("Initializing KB index...")
 kb_index = KBIndex()
-# Initialize generation model
-print("Loading generation model...")
-gen_tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL_NAME)
-gen_model = AutoModelForSeq2SeqLM.from_pretrained(GEN_MODEL_NAME)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-gen_model.to(device)
-gen_model.eval()
-print(f"Generation model ready on {device}.")
 # -----------------------------
-# CHAT LOGIC
 # -----------------------------
-def build_context_from_results(results: List[Tuple[str, str, float]]) -> str:
-    """
-    Turn retrieved chunks into a compact context string for the LLM.
     """
-    context_parts = []
-    for chunk, source, score in results:
-        cleaned = chunk.strip()
-        context_parts.append(f"From {source}:\n{cleaned}")
-    return "\n\n".join(context_parts)
-def build_answer(query: str) -> str:
     """
-    Use the KB index to retrieve relevant chunks,
-    then ask FLAN-T5 to write a natural answer based ONLY on that context.
-    """
-    results = kb_index.search(query, top_k=TOP_K)
     if not results:
         return (
-            "I couldn't find anything relevant in the knowledge base for this query yet.\n\n"
-            "If this were connected to your real KB, this would be a good moment to:\n"
-            "- Create a new article, or\n"
-            "- Improve the existing documentation for this topic."
         )
-    # Build context for the model
-    context = build_context_from_results(results)
-    # Short list of sources for citation
-    source_names = list({src for _, src, _ in results})
-    source_line = "Based on: " + ", ".join(source_names)
-    # Prompt for FLAN-T5
-    prompt = (
-        "You are a helpful knowledge base assistant.\n"
-        "Using ONLY the information in the context below, answer the user's question "
-        "in a clear, concise, and natural way. Focus on practical guidance.\n\n"
-        f"Context:\n{context}\n\n"
-        f"Question: {query}\n\n"
-        "Answer in 2–5 short paragraphs. If something is not covered in the context, say that.\n"
-    )
-    inputs = gen_tokenizer(
-        prompt,
-        return_tensors="pt",
-        truncation=True,
-        max_length=2048,
-    ).to(device)
-    with torch.no_grad():
-        output_ids = gen_model.generate(
-            **inputs,
-            max_length=512,
-            temperature=0.7,
-            top_p=0.95,
-            num_beams=4,
         )
-    answer_text = gen_tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
-    # Add source citation at the end
-    final_answer = f"{answer_text}\n\n— {source_line}"
-    return final_answer
 def chat_respond(message: str, history):
@@ -259,10 +290,15 @@ def chat_respond(message: str, history):
 # -----------------------------
 description = """
-Ask questions as if you were talking to a knowledge base assistant.
-In a real scenario, this assistant would be connected to your own
-help center or internal documentation. Here, it's using a small demo
-knowledge base to show how retrieval-based self-service can work.
 """
 # Create ChatInterface (without 'type' parameter for compatibility)

 import gradio as gr
 import numpy as np
+from sentence_transformers import SentenceTransformer
 # -----------------------------
 # CONFIG
 # -----------------------------
 KB_DIR = "./kb"  # folder with .txt or .md files
 EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 TOP_K = 3
 CHUNK_SIZE = 500  # characters
 CHUNK_OVERLAP = 100  # characters
+MIN_SIMILARITY_THRESHOLD = 0.3  # Minimum similarity score to include results
 # -----------------------------
 # UTILITIES
 # Initialize KB index
 print("Initializing KB index...")
 kb_index = KBIndex()
+print("✅ KB Assistant ready!")
 # -----------------------------
+# CHAT LOGIC (Retrieval-Only, No LLM)
 # -----------------------------
+def format_answer_from_results(query: str, results: List[Tuple[str, str, float]]) -> str:
     """
+    Format a helpful answer from retrieved chunks without using an LLM.
+    This is much faster and works well for knowledge base lookup.
     """
     if not results:
         return (
+            "❌ **I couldn't find anything relevant in the knowledge base for this query.**\n\n"
+            "**Suggestions:**\n"
+            "- Try rephrasing your question\n"
+            "- Use different keywords\n"
+            "- Check if the information exists in the knowledge base\n\n"
+            "If this information should be available, consider adding it to the KB."
         )
+    # Filter by similarity threshold
+    filtered_results = [(chunk, src, score) for chunk, src, score in results if score >= MIN_SIMILARITY_THRESHOLD]
+    if not filtered_results:
+        return (
+            "⚠️ **I found some related content, but it doesn't seem very relevant to your question.**\n\n"
+            "**Try:**\n"
+            "- Being more specific in your question\n"
+            "- Using different terminology\n"
+            "- Breaking down complex questions into simpler parts"
         )
+    # Build a concise, readable answer
+    answer_parts = []
+    # Get the best (highest scoring) result
+    best_chunk, best_source, best_score = filtered_results[0]
+    # Clean and format the content
+    cleaned_content = clean_markdown(best_chunk)
+    # Create header
+    relevance_emoji = "🟢" if best_score > 0.7 else "🟡" if best_score > 0.5 else "🟠"
+    answer_parts.append(f"{relevance_emoji} **Answer from: {best_source}**\n")
+    # Add the main content
+    answer_parts.append(cleaned_content)
+    # If there are additional relevant sources, mention them
+    if len(filtered_results) > 1:
+        other_sources = [src for _, src, _ in filtered_results[1:]]
+        unique_sources = list(set(other_sources))
+        if unique_sources:
+            answer_parts.append(f"\n\n💡 **Additional information available in:** {', '.join(unique_sources)}")
+    # Add footer
+    answer_parts.append("\n\n---")
+    all_sources = list(set([src for _, src, _ in filtered_results]))
+    answer_parts.append(f"📚 **Sources:** {', '.join(all_sources)}")
+    return "\n".join(answer_parts)
+def clean_markdown(text: str) -> str:
+    """
+    Clean up markdown text for better readability.
+    Removes excessive formatting while keeping structure.
+    """
+    lines = text.split('\n')
+    cleaned_lines = []
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        # Convert markdown headers to bold text
+        if line.startswith('#'):
+            # Remove # symbols and make bold
+            header_text = line.lstrip('#').strip()
+            if header_text:
+                cleaned_lines.append(f"\n**{header_text}**")
+        # Keep list items
+        elif line.startswith('-') or line.startswith('*'):
+            cleaned_lines.append(line)
+        # Keep numbered lists
+        elif line[0].isdigit() and '.' in line[:3]:
+            cleaned_lines.append(line)
+        # Regular text
+        else:
+            cleaned_lines.append(line)
+    # Join and clean up excessive newlines
+    result = '\n'.join(cleaned_lines)
+    # Remove multiple consecutive newlines
+    while '\n\n\n' in result:
+        result = result.replace('\n\n\n', '\n\n')
+    return result.strip()
+def build_answer(query: str) -> str:
+    """
+    Fast retrieval-based answer without LLM generation.
+    Returns formatted results from the knowledge base.
+    """
+    # Search the KB
+    results = kb_index.search(query, top_k=TOP_K)
+    # Format and return the answer
+    return format_answer_from_results(query, results)
 def chat_respond(message: str, history):
 # -----------------------------
 description = """
+🚀 **Fast Knowledge Base Search Assistant**
+Ask questions and get instant answers from the knowledge base.
+This assistant uses semantic search to find the most relevant information quickly.
+**Tips for better results:**
+- Be specific in your questions
+- Use keywords related to your topic
+- Ask one question at a time
 """
 # Create ChatInterface (without 'type' parameter for compatibility)