Spaces:

DrDavis
/

RAGDemo

Sleeping

App Files Files Community

DrDavis commited on Sep 10, 2025

Commit

4b8fca5

verified ·

1 Parent(s): a86c6e0

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -23

app.py CHANGED Viewed

@@ -1,17 +1,13 @@
 """
-RAG Mini Demo (Presidents Theme) — Strict & Deterministic RAG
 --------------------------------------------------------------
 This Gradio app compares:
-1) LLM-Only (sampling) — answers directly from the model (can hallucinate)
-2) RAG (strict) — retrieves context from a small corpus and answers ONLY from that context
-   - Deterministic decoding (no sampling) to reduce “creative” guessing
-   - Guardrail: if question asks for female US presidents and context asserts 'none', we answer that directly
-Stack (CPU-friendly):
-- sentence-transformers/all-MiniLM-L6-v2  → embeddings
-- faiss-cpu                                → fast similarity search
-- google/flan-t5-small                     → generator
-- Gradio                                   → web UI
 """
 import os, io, re, faiss
@@ -26,11 +22,11 @@ from transformers import pipeline
 # Config (easy knobs)
 # ----------------------------
 EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
-GEN_MODEL_ID   = "Qwen/Qwen2.5-0.5B-Instruct"
-CHUNK_SIZE    = 500    # chars per chunk
-CHUNK_OVERLAP = 100    # overlap to avoid boundary misses
-TOP_K         = 3      # retrieved chunks
 # ----------------------------
 # Utilities
@@ -98,6 +94,7 @@ class RAGStore:
             "killed Osama bin Laden.",
             "As of 2025, the United States has never had a female president. "
         ]
         chunks = []
@@ -174,13 +171,14 @@ def generate_llm_only(question: str, max_new_tokens: int = 128, temperature: flo
     return out[0]["generated_text"]
 # ----------------------------
-# STRICT deterministic RAG
 # ----------------------------
 STRICT_RAG_SYSTEM = (
-    "You are a careful assistant. Answer ONLY using the provided context. "
     "If the context does not contain the answer, reply exactly: "
     "\"I don't know based on the provided context.\" "
-    "Do not guess. Do not use outside knowledge."
 )
 def _mentions_no_female_president(text: str) -> bool:
@@ -199,7 +197,44 @@ def _female_president_guard(question: str, context_chunks: List[str]) -> Optiona
             return "As of 2025, the United States has never had a female president."
     return None
-def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 96):
     if not question.strip():
         return "Please enter a question.", []
@@ -231,9 +266,12 @@ def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 96)
         length_penalty=0.9,
         no_repeat_ngram_size=3,
     )
-    answer = out[0]["generated_text"].strip()
-    # 5) Enforce abstention if no context present
     if not context.strip() and "i don't know based on the provided context" not in answer.lower():
         answer = "I don't know based on the provided context."
@@ -281,8 +319,7 @@ with gr.Blocks(fill_height=True, analytics_enabled=False) as demo:
                 with gr.Column():
                     gr.Markdown("#### 📎 RAG-Grounded (Strict Deterministic)")
                     topk        = gr.Slider(1, 8, value=3, step=1, label="Top-K chunks")
-                    max_new_rag = gr.Slider(32, 256, value=96, step=8, label="Max new tokens")
-                    # keep temp/top-p sliders visible for symmetry but unused in strict RAG
                     temp_rag    = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature (unused)", interactive=False)
                     topp_rag    = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (unused)", interactive=False)
                     rag_btn     = gr.Button("Generate (RAG)")

 """
+RAG Mini Demo (Presidents Theme) — Strict, Concise, and Clean
 --------------------------------------------------------------
 This Gradio app compares:
+1) LLM-Only (sampling)  — answers directly from the model (can hallucinate)
+2) RAG (strict deterministic) — retrieves context and answers ONLY from that context
+   - Deterministic decoding (no sampling)
+   - One-sentence, terse answers (no explanations)
+   - Guardrail for the "female US presidents" query
+   - Post-clean to remove any instruction echoes or meta-talk
 """
 import os, io, re, faiss
 # Config (easy knobs)
 # ----------------------------
 EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
+GEN_MODEL_ID   = "google/flan-t5-small"
+CHUNK_SIZE    = 500
+CHUNK_OVERLAP = 100
+TOP_K         = 3
 # ----------------------------
 # Utilities
             "killed Osama bin Laden.",
             "As of 2025, the United States has never had a female president. "
+            "The current president is Donald J. Trump, the 45th and now the 47th, who took office in 2025."
         ]
         chunks = []
     return out[0]["generated_text"]
 # ----------------------------
+# STRICT deterministic RAG (concise + clean)
 # ----------------------------
 STRICT_RAG_SYSTEM = (
+    "Answer ONLY using the provided context. "
+    "Reply in one short sentence with just the answer. "
     "If the context does not contain the answer, reply exactly: "
     "\"I don't know based on the provided context.\" "
+    "Do not explain your reasoning. Do not include any extra text."
 )
 def _mentions_no_female_president(text: str) -> bool:
             return "As of 2025, the United States has never had a female president."
     return None
+def _post_clean(answer: str) -> str:
+    """
+    Remove any instruction echoes or meta-justifications.
+    Keep only the first sentence; strip surrounding quotes/spaces.
+    """
+    a = answer.strip()
+    # If the model echoed the prompt/instruction, try to cut to the "Answer:" portion
+    if "Answer:" in a:
+        a = a.split("Answer:", 1)[-1].strip()
+    # Remove leading common instruction phrases if present
+    lowers = a.lower()
+    bad_starts = [
+        "answer only using the provided context",
+        "you are a careful assistant",
+        "this answer is correct",
+        "based solely",
+        "therefore,",
+        "therefore "
+    ]
+    for bs in bad_starts:
+        if lowers.startswith(bs):
+            # take the remainder after the first period if it exists
+            a = a.split(".", 1)[-1].strip() or a
+            break
+    # Keep only the first sentence
+    if "." in a:
+        a = a.split(".", 1)[0].strip() + "."
+    # Strip surrounding quotes
+    a = a.strip(" \"'")
+    # Normalize internal whitespace
+    a = normalize_ws(a)
+    return a
+def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80):
     if not question.strip():
         return "Please enter a question.", []
         length_penalty=0.9,
         no_repeat_ngram_size=3,
     )
+    raw = out[0]["generated_text"]
+    # 5) Post-clean the model text (remove echoes/explanations)
+    answer = _post_clean(raw)
+    # 6) Enforce abstention if no context present
     if not context.strip() and "i don't know based on the provided context" not in answer.lower():
         answer = "I don't know based on the provided context."
                 with gr.Column():
                     gr.Markdown("#### 📎 RAG-Grounded (Strict Deterministic)")
                     topk        = gr.Slider(1, 8, value=3, step=1, label="Top-K chunks")
+                    max_new_rag = gr.Slider(32, 256, value=80, step=8, label="Max new tokens")
                     temp_rag    = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature (unused)", interactive=False)
                     topp_rag    = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (unused)", interactive=False)
                     rag_btn     = gr.Button("Generate (RAG)")