Spaces:

DrDavis
/

RAGDemo

Sleeping

App Files Files Community

DrDavis commited on Sep 10, 2025

Commit

10f8862

verified ·

1 Parent(s): 4b8fca5

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -16

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """
-RAG Mini Demo (Presidents Theme) — Strict, Concise, and Clean
---------------------------------------------------------------
 This Gradio app compares:
 1) LLM-Only (sampling)  — answers directly from the model (can hallucinate)
 2) RAG (strict deterministic) — retrieves context and answers ONLY from that context
    - Deterministic decoding (no sampling)
-   - One-sentence, terse answers (no explanations)
    - Guardrail for the "female US presidents" query
    - Post-clean to remove any instruction echoes or meta-talk
 """
@@ -93,8 +93,9 @@ class RAGStore:
             "His major achievements include passing the Affordable Care Act and ordering the military operation that "
             "killed Osama bin Laden.",
-            "As of 2025, the United States has never had a female president. "
-            "The current president is Donald J. Trump, the 45th and now the 47th, who took office in 2025."
         ]
         chunks = []
@@ -171,11 +172,12 @@ def generate_llm_only(question: str, max_new_tokens: int = 128, temperature: flo
     return out[0]["generated_text"]
 # ----------------------------
-# STRICT deterministic RAG (concise + clean)
 # ----------------------------
 STRICT_RAG_SYSTEM = (
     "Answer ONLY using the provided context. "
     "Reply in one short sentence with just the answer. "
     "If the context does not contain the answer, reply exactly: "
     "\"I don't know based on the provided context.\" "
     "Do not explain your reasoning. Do not include any extra text."
@@ -200,14 +202,14 @@ def _female_president_guard(question: str, context_chunks: List[str]) -> Optiona
 def _post_clean(answer: str) -> str:
     """
     Remove any instruction echoes or meta-justifications.
-    Keep only the first sentence; strip surrounding quotes/spaces.
     """
     a = answer.strip()
-    # If the model echoed the prompt/instruction, try to cut to the "Answer:" portion
     if "Answer:" in a:
         a = a.split("Answer:", 1)[-1].strip()
-    # Remove leading common instruction phrases if present
     lowers = a.lower()
     bad_starts = [
         "answer only using the provided context",
@@ -219,10 +221,12 @@ def _post_clean(answer: str) -> str:
     ]
     for bs in bad_starts:
         if lowers.startswith(bs):
-            # take the remainder after the first period if it exists
             a = a.split(".", 1)[-1].strip() or a
             break
     # Keep only the first sentence
     if "." in a:
         a = a.split(".", 1)[0].strip() + "."
@@ -232,6 +236,11 @@ def _post_clean(answer: str) -> str:
     # Normalize internal whitespace
     a = normalize_ws(a)
     return a
 def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80):
@@ -241,14 +250,16 @@ def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80)
     # 1) Retrieve
     hits = rag_store.retrieve(question, k=k)
     chunks = [c for _, c in hits]
-    context = "\n\n".join([f"[{i+1}] {c}" for i, c in enumerate(chunks)]) if chunks else ""
     # 2) Guardrail: female-president question
     override = _female_president_guard(question, chunks)
     if override is not None:
         return override, hits
-    # 3) Build strict prompt
     prompt = (
         f"{STRICT_RAG_SYSTEM}\n\n"
         f"Context:\n{context}\n\n"
@@ -256,7 +267,7 @@ def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80)
         f"Answer:"
     )
-    # 4) Deterministic decoding (no sampling)
     out = generator(
         prompt,
         max_new_tokens=int(max_new_tokens),
@@ -268,10 +279,10 @@ def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80)
     )
     raw = out[0]["generated_text"]
-    # 5) Post-clean the model text (remove echoes/explanations)
     answer = _post_clean(raw)
-    # 6) Enforce abstention if no context present
     if not context.strip() and "i don't know based on the provided context" not in answer.lower():
         answer = "I don't know based on the provided context."
@@ -301,7 +312,7 @@ with gr.Blocks(fill_height=True, analytics_enabled=False) as demo:
         with gr.Column(scale=2):
             question = gr.Textbox(
                 label="Your question",
-                placeholder="Example: Name the last two female presidents of the United States.",
                 lines=3
             )

 """
+RAG Mini Demo (Presidents Theme) — Strict, Concise, and Clean (Refreshed)
+-----------------------------------------------------------------------
 This Gradio app compares:
 1) LLM-Only (sampling)  — answers directly from the model (can hallucinate)
 2) RAG (strict deterministic) — retrieves context and answers ONLY from that context
    - Deterministic decoding (no sampling)
+   - One-sentence answers, no explanations, no brackets/citations
    - Guardrail for the "female US presidents" query
    - Post-clean to remove any instruction echoes or meta-talk
 """
             "His major achievements include passing the Affordable Care Act and ordering the military operation that "
             "killed Osama bin Laden.",
+            "As of 2025, the United States has never had a female president.",
+            "As of 2025, the current president is Joe Biden, the 46th, who took office in 2021."
         ]
         chunks = []
     return out[0]["generated_text"]
 # ----------------------------
+# STRICT deterministic RAG (concise + clean, no brackets)
 # ----------------------------
 STRICT_RAG_SYSTEM = (
     "Answer ONLY using the provided context. "
     "Reply in one short sentence with just the answer. "
+    "Do not include citations, brackets, or numbers in your answer. "
     "If the context does not contain the answer, reply exactly: "
     "\"I don't know based on the provided context.\" "
     "Do not explain your reasoning. Do not include any extra text."
 def _post_clean(answer: str) -> str:
     """
     Remove any instruction echoes or meta-justifications.
+    Keep only the first sentence; strip brackets/quotes; normalize spaces.
     """
     a = answer.strip()
+    # Trim if model echoed "Answer:" or instruction
     if "Answer:" in a:
         a = a.split("Answer:", 1)[-1].strip()
     lowers = a.lower()
     bad_starts = [
         "answer only using the provided context",
     ]
     for bs in bad_starts:
         if lowers.startswith(bs):
             a = a.split(".", 1)[-1].strip() or a
             break
+    # Strip bracketed numeric citations like [1], [23], etc.
+    a = re.sub(r"\s*\[\d+\]\s*", " ", a).strip()
     # Keep only the first sentence
     if "." in a:
         a = a.split(".", 1)[0].strip() + "."
     # Normalize internal whitespace
     a = normalize_ws(a)
+    # If post-clean left us empty or only brackets, abstain
+    if not a or re.fullmatch(r"\[\d+\]", a):
+        a = "I don't know based on the provided context."
     return a
 def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80):
     # 1) Retrieve
     hits = rag_store.retrieve(question, k=k)
     chunks = [c for _, c in hits]
     # 2) Guardrail: female-president question
     override = _female_president_guard(question, chunks)
     if override is not None:
         return override, hits
+    # 3) Build context with bullets (no bracket labels)
+    context = "\n\n".join([f"- {c}" for c in chunks]) if chunks else ""
+    # 4) Build strict prompt
     prompt = (
         f"{STRICT_RAG_SYSTEM}\n\n"
         f"Context:\n{context}\n\n"
         f"Answer:"
     )
+    # 5) Deterministic decoding (no sampling)
     out = generator(
         prompt,
         max_new_tokens=int(max_new_tokens),
     )
     raw = out[0]["generated_text"]
+    # 6) Post-clean the model text (remove echoes/explanations/brackets)
     answer = _post_clean(raw)
+    # 7) Enforce abstention if no context present
     if not context.strip() and "i don't know based on the provided context" not in answer.lower():
         answer = "I don't know based on the provided context."
         with gr.Column(scale=2):
             question = gr.Textbox(
                 label="Your question",
+                placeholder="Example: Who is the current president of the United States?",
                 lines=3
             )