Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,13 @@
|
|
| 1 |
"""
|
| 2 |
-
RAG Mini Demo (Presidents Theme) — Strict
|
| 3 |
--------------------------------------------------------------
|
| 4 |
This Gradio app compares:
|
| 5 |
-
1) LLM-Only (sampling)
|
| 6 |
-
2) RAG (strict) — retrieves context
|
| 7 |
-
- Deterministic decoding (no sampling)
|
| 8 |
-
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
- sentence-transformers/all-MiniLM-L6-v2 → embeddings
|
| 12 |
-
- faiss-cpu → fast similarity search
|
| 13 |
-
- google/flan-t5-small → generator
|
| 14 |
-
- Gradio → web UI
|
| 15 |
"""
|
| 16 |
|
| 17 |
import os, io, re, faiss
|
|
@@ -26,11 +22,11 @@ from transformers import pipeline
|
|
| 26 |
# Config (easy knobs)
|
| 27 |
# ----------------------------
|
| 28 |
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
|
| 29 |
-
GEN_MODEL_ID = "
|
| 30 |
|
| 31 |
-
CHUNK_SIZE = 500
|
| 32 |
-
CHUNK_OVERLAP = 100
|
| 33 |
-
TOP_K = 3
|
| 34 |
|
| 35 |
# ----------------------------
|
| 36 |
# Utilities
|
|
@@ -98,6 +94,7 @@ class RAGStore:
|
|
| 98 |
"killed Osama bin Laden.",
|
| 99 |
|
| 100 |
"As of 2025, the United States has never had a female president. "
|
|
|
|
| 101 |
]
|
| 102 |
|
| 103 |
chunks = []
|
|
@@ -174,13 +171,14 @@ def generate_llm_only(question: str, max_new_tokens: int = 128, temperature: flo
|
|
| 174 |
return out[0]["generated_text"]
|
| 175 |
|
| 176 |
# ----------------------------
|
| 177 |
-
# STRICT deterministic RAG
|
| 178 |
# ----------------------------
|
| 179 |
STRICT_RAG_SYSTEM = (
|
| 180 |
-
"
|
|
|
|
| 181 |
"If the context does not contain the answer, reply exactly: "
|
| 182 |
"\"I don't know based on the provided context.\" "
|
| 183 |
-
"Do not
|
| 184 |
)
|
| 185 |
|
| 186 |
def _mentions_no_female_president(text: str) -> bool:
|
|
@@ -199,7 +197,44 @@ def _female_president_guard(question: str, context_chunks: List[str]) -> Optiona
|
|
| 199 |
return "As of 2025, the United States has never had a female president."
|
| 200 |
return None
|
| 201 |
|
| 202 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
if not question.strip():
|
| 204 |
return "Please enter a question.", []
|
| 205 |
|
|
@@ -231,9 +266,12 @@ def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 96)
|
|
| 231 |
length_penalty=0.9,
|
| 232 |
no_repeat_ngram_size=3,
|
| 233 |
)
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
-
#
|
| 237 |
if not context.strip() and "i don't know based on the provided context" not in answer.lower():
|
| 238 |
answer = "I don't know based on the provided context."
|
| 239 |
|
|
@@ -281,8 +319,7 @@ with gr.Blocks(fill_height=True, analytics_enabled=False) as demo:
|
|
| 281 |
with gr.Column():
|
| 282 |
gr.Markdown("#### 📎 RAG-Grounded (Strict Deterministic)")
|
| 283 |
topk = gr.Slider(1, 8, value=3, step=1, label="Top-K chunks")
|
| 284 |
-
max_new_rag = gr.Slider(32, 256, value=
|
| 285 |
-
# keep temp/top-p sliders visible for symmetry but unused in strict RAG
|
| 286 |
temp_rag = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature (unused)", interactive=False)
|
| 287 |
topp_rag = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (unused)", interactive=False)
|
| 288 |
rag_btn = gr.Button("Generate (RAG)")
|
|
|
|
| 1 |
"""
|
| 2 |
+
RAG Mini Demo (Presidents Theme) — Strict, Concise, and Clean
|
| 3 |
--------------------------------------------------------------
|
| 4 |
This Gradio app compares:
|
| 5 |
+
1) LLM-Only (sampling) — answers directly from the model (can hallucinate)
|
| 6 |
+
2) RAG (strict deterministic) — retrieves context and answers ONLY from that context
|
| 7 |
+
- Deterministic decoding (no sampling)
|
| 8 |
+
- One-sentence, terse answers (no explanations)
|
| 9 |
+
- Guardrail for the "female US presidents" query
|
| 10 |
+
- Post-clean to remove any instruction echoes or meta-talk
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
"""
|
| 12 |
|
| 13 |
import os, io, re, faiss
|
|
|
|
| 22 |
# Config (easy knobs)
|
| 23 |
# ----------------------------
|
| 24 |
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
|
| 25 |
+
GEN_MODEL_ID = "google/flan-t5-small"
|
| 26 |
|
| 27 |
+
CHUNK_SIZE = 500
|
| 28 |
+
CHUNK_OVERLAP = 100
|
| 29 |
+
TOP_K = 3
|
| 30 |
|
| 31 |
# ----------------------------
|
| 32 |
# Utilities
|
|
|
|
| 94 |
"killed Osama bin Laden.",
|
| 95 |
|
| 96 |
"As of 2025, the United States has never had a female president. "
|
| 97 |
+
"The current president is Donald J. Trump, the 45th and now the 47th, who took office in 2025."
|
| 98 |
]
|
| 99 |
|
| 100 |
chunks = []
|
|
|
|
| 171 |
return out[0]["generated_text"]
|
| 172 |
|
| 173 |
# ----------------------------
|
| 174 |
+
# STRICT deterministic RAG (concise + clean)
|
| 175 |
# ----------------------------
|
| 176 |
STRICT_RAG_SYSTEM = (
|
| 177 |
+
"Answer ONLY using the provided context. "
|
| 178 |
+
"Reply in one short sentence with just the answer. "
|
| 179 |
"If the context does not contain the answer, reply exactly: "
|
| 180 |
"\"I don't know based on the provided context.\" "
|
| 181 |
+
"Do not explain your reasoning. Do not include any extra text."
|
| 182 |
)
|
| 183 |
|
| 184 |
def _mentions_no_female_president(text: str) -> bool:
|
|
|
|
| 197 |
return "As of 2025, the United States has never had a female president."
|
| 198 |
return None
|
| 199 |
|
| 200 |
+
def _post_clean(answer: str) -> str:
|
| 201 |
+
"""
|
| 202 |
+
Remove any instruction echoes or meta-justifications.
|
| 203 |
+
Keep only the first sentence; strip surrounding quotes/spaces.
|
| 204 |
+
"""
|
| 205 |
+
a = answer.strip()
|
| 206 |
+
# If the model echoed the prompt/instruction, try to cut to the "Answer:" portion
|
| 207 |
+
if "Answer:" in a:
|
| 208 |
+
a = a.split("Answer:", 1)[-1].strip()
|
| 209 |
+
|
| 210 |
+
# Remove leading common instruction phrases if present
|
| 211 |
+
lowers = a.lower()
|
| 212 |
+
bad_starts = [
|
| 213 |
+
"answer only using the provided context",
|
| 214 |
+
"you are a careful assistant",
|
| 215 |
+
"this answer is correct",
|
| 216 |
+
"based solely",
|
| 217 |
+
"therefore,",
|
| 218 |
+
"therefore "
|
| 219 |
+
]
|
| 220 |
+
for bs in bad_starts:
|
| 221 |
+
if lowers.startswith(bs):
|
| 222 |
+
# take the remainder after the first period if it exists
|
| 223 |
+
a = a.split(".", 1)[-1].strip() or a
|
| 224 |
+
break
|
| 225 |
+
|
| 226 |
+
# Keep only the first sentence
|
| 227 |
+
if "." in a:
|
| 228 |
+
a = a.split(".", 1)[0].strip() + "."
|
| 229 |
+
|
| 230 |
+
# Strip surrounding quotes
|
| 231 |
+
a = a.strip(" \"'")
|
| 232 |
+
|
| 233 |
+
# Normalize internal whitespace
|
| 234 |
+
a = normalize_ws(a)
|
| 235 |
+
return a
|
| 236 |
+
|
| 237 |
+
def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80):
|
| 238 |
if not question.strip():
|
| 239 |
return "Please enter a question.", []
|
| 240 |
|
|
|
|
| 266 |
length_penalty=0.9,
|
| 267 |
no_repeat_ngram_size=3,
|
| 268 |
)
|
| 269 |
+
raw = out[0]["generated_text"]
|
| 270 |
+
|
| 271 |
+
# 5) Post-clean the model text (remove echoes/explanations)
|
| 272 |
+
answer = _post_clean(raw)
|
| 273 |
|
| 274 |
+
# 6) Enforce abstention if no context present
|
| 275 |
if not context.strip() and "i don't know based on the provided context" not in answer.lower():
|
| 276 |
answer = "I don't know based on the provided context."
|
| 277 |
|
|
|
|
| 319 |
with gr.Column():
|
| 320 |
gr.Markdown("#### 📎 RAG-Grounded (Strict Deterministic)")
|
| 321 |
topk = gr.Slider(1, 8, value=3, step=1, label="Top-K chunks")
|
| 322 |
+
max_new_rag = gr.Slider(32, 256, value=80, step=8, label="Max new tokens")
|
|
|
|
| 323 |
temp_rag = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature (unused)", interactive=False)
|
| 324 |
topp_rag = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (unused)", interactive=False)
|
| 325 |
rag_btn = gr.Button("Generate (RAG)")
|