Update app.py
Browse files
app.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
-
RAG Mini Demo (Presidents Theme) — Strict, Concise, and Clean
|
| 3 |
-
|
| 4 |
This Gradio app compares:
|
| 5 |
1) LLM-Only (sampling) — answers directly from the model (can hallucinate)
|
| 6 |
2) RAG (strict deterministic) — retrieves context and answers ONLY from that context
|
| 7 |
- Deterministic decoding (no sampling)
|
| 8 |
-
- One-sentence,
|
| 9 |
- Guardrail for the "female US presidents" query
|
| 10 |
- Post-clean to remove any instruction echoes or meta-talk
|
| 11 |
"""
|
|
@@ -93,8 +93,9 @@ class RAGStore:
|
|
| 93 |
"His major achievements include passing the Affordable Care Act and ordering the military operation that "
|
| 94 |
"killed Osama bin Laden.",
|
| 95 |
|
| 96 |
-
"As of 2025, the United States has never had a female president.
|
| 97 |
-
|
|
|
|
| 98 |
]
|
| 99 |
|
| 100 |
chunks = []
|
|
@@ -171,11 +172,12 @@ def generate_llm_only(question: str, max_new_tokens: int = 128, temperature: flo
|
|
| 171 |
return out[0]["generated_text"]
|
| 172 |
|
| 173 |
# ----------------------------
|
| 174 |
-
# STRICT deterministic RAG (concise + clean)
|
| 175 |
# ----------------------------
|
| 176 |
STRICT_RAG_SYSTEM = (
|
| 177 |
"Answer ONLY using the provided context. "
|
| 178 |
"Reply in one short sentence with just the answer. "
|
|
|
|
| 179 |
"If the context does not contain the answer, reply exactly: "
|
| 180 |
"\"I don't know based on the provided context.\" "
|
| 181 |
"Do not explain your reasoning. Do not include any extra text."
|
|
@@ -200,14 +202,14 @@ def _female_president_guard(question: str, context_chunks: List[str]) -> Optiona
|
|
| 200 |
def _post_clean(answer: str) -> str:
|
| 201 |
"""
|
| 202 |
Remove any instruction echoes or meta-justifications.
|
| 203 |
-
Keep only the first sentence; strip
|
| 204 |
"""
|
| 205 |
a = answer.strip()
|
| 206 |
-
|
|
|
|
| 207 |
if "Answer:" in a:
|
| 208 |
a = a.split("Answer:", 1)[-1].strip()
|
| 209 |
|
| 210 |
-
# Remove leading common instruction phrases if present
|
| 211 |
lowers = a.lower()
|
| 212 |
bad_starts = [
|
| 213 |
"answer only using the provided context",
|
|
@@ -219,10 +221,12 @@ def _post_clean(answer: str) -> str:
|
|
| 219 |
]
|
| 220 |
for bs in bad_starts:
|
| 221 |
if lowers.startswith(bs):
|
| 222 |
-
# take the remainder after the first period if it exists
|
| 223 |
a = a.split(".", 1)[-1].strip() or a
|
| 224 |
break
|
| 225 |
|
|
|
|
|
|
|
|
|
|
| 226 |
# Keep only the first sentence
|
| 227 |
if "." in a:
|
| 228 |
a = a.split(".", 1)[0].strip() + "."
|
|
@@ -232,6 +236,11 @@ def _post_clean(answer: str) -> str:
|
|
| 232 |
|
| 233 |
# Normalize internal whitespace
|
| 234 |
a = normalize_ws(a)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
return a
|
| 236 |
|
| 237 |
def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80):
|
|
@@ -241,14 +250,16 @@ def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80)
|
|
| 241 |
# 1) Retrieve
|
| 242 |
hits = rag_store.retrieve(question, k=k)
|
| 243 |
chunks = [c for _, c in hits]
|
| 244 |
-
context = "\n\n".join([f"[{i+1}] {c}" for i, c in enumerate(chunks)]) if chunks else ""
|
| 245 |
|
| 246 |
# 2) Guardrail: female-president question
|
| 247 |
override = _female_president_guard(question, chunks)
|
| 248 |
if override is not None:
|
| 249 |
return override, hits
|
| 250 |
|
| 251 |
-
# 3) Build
|
|
|
|
|
|
|
|
|
|
| 252 |
prompt = (
|
| 253 |
f"{STRICT_RAG_SYSTEM}\n\n"
|
| 254 |
f"Context:\n{context}\n\n"
|
|
@@ -256,7 +267,7 @@ def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80)
|
|
| 256 |
f"Answer:"
|
| 257 |
)
|
| 258 |
|
| 259 |
-
#
|
| 260 |
out = generator(
|
| 261 |
prompt,
|
| 262 |
max_new_tokens=int(max_new_tokens),
|
|
@@ -268,10 +279,10 @@ def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80)
|
|
| 268 |
)
|
| 269 |
raw = out[0]["generated_text"]
|
| 270 |
|
| 271 |
-
#
|
| 272 |
answer = _post_clean(raw)
|
| 273 |
|
| 274 |
-
#
|
| 275 |
if not context.strip() and "i don't know based on the provided context" not in answer.lower():
|
| 276 |
answer = "I don't know based on the provided context."
|
| 277 |
|
|
@@ -301,7 +312,7 @@ with gr.Blocks(fill_height=True, analytics_enabled=False) as demo:
|
|
| 301 |
with gr.Column(scale=2):
|
| 302 |
question = gr.Textbox(
|
| 303 |
label="Your question",
|
| 304 |
-
placeholder="Example:
|
| 305 |
lines=3
|
| 306 |
)
|
| 307 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
RAG Mini Demo (Presidents Theme) — Strict, Concise, and Clean (Refreshed)
|
| 3 |
+
-----------------------------------------------------------------------
|
| 4 |
This Gradio app compares:
|
| 5 |
1) LLM-Only (sampling) — answers directly from the model (can hallucinate)
|
| 6 |
2) RAG (strict deterministic) — retrieves context and answers ONLY from that context
|
| 7 |
- Deterministic decoding (no sampling)
|
| 8 |
+
- One-sentence answers, no explanations, no brackets/citations
|
| 9 |
- Guardrail for the "female US presidents" query
|
| 10 |
- Post-clean to remove any instruction echoes or meta-talk
|
| 11 |
"""
|
|
|
|
| 93 |
"His major achievements include passing the Affordable Care Act and ordering the military operation that "
|
| 94 |
"killed Osama bin Laden.",
|
| 95 |
|
| 96 |
+
"As of 2025, the United States has never had a female president.",
|
| 97 |
+
|
| 98 |
+
"As of 2025, the current president is Joe Biden, the 46th, who took office in 2021."
|
| 99 |
]
|
| 100 |
|
| 101 |
chunks = []
|
|
|
|
| 172 |
return out[0]["generated_text"]
|
| 173 |
|
| 174 |
# ----------------------------
|
| 175 |
+
# STRICT deterministic RAG (concise + clean, no brackets)
|
| 176 |
# ----------------------------
|
| 177 |
STRICT_RAG_SYSTEM = (
|
| 178 |
"Answer ONLY using the provided context. "
|
| 179 |
"Reply in one short sentence with just the answer. "
|
| 180 |
+
"Do not include citations, brackets, or numbers in your answer. "
|
| 181 |
"If the context does not contain the answer, reply exactly: "
|
| 182 |
"\"I don't know based on the provided context.\" "
|
| 183 |
"Do not explain your reasoning. Do not include any extra text."
|
|
|
|
| 202 |
def _post_clean(answer: str) -> str:
|
| 203 |
"""
|
| 204 |
Remove any instruction echoes or meta-justifications.
|
| 205 |
+
Keep only the first sentence; strip brackets/quotes; normalize spaces.
|
| 206 |
"""
|
| 207 |
a = answer.strip()
|
| 208 |
+
|
| 209 |
+
# Trim if model echoed "Answer:" or instruction
|
| 210 |
if "Answer:" in a:
|
| 211 |
a = a.split("Answer:", 1)[-1].strip()
|
| 212 |
|
|
|
|
| 213 |
lowers = a.lower()
|
| 214 |
bad_starts = [
|
| 215 |
"answer only using the provided context",
|
|
|
|
| 221 |
]
|
| 222 |
for bs in bad_starts:
|
| 223 |
if lowers.startswith(bs):
|
|
|
|
| 224 |
a = a.split(".", 1)[-1].strip() or a
|
| 225 |
break
|
| 226 |
|
| 227 |
+
# Strip bracketed numeric citations like [1], [23], etc.
|
| 228 |
+
a = re.sub(r"\s*\[\d+\]\s*", " ", a).strip()
|
| 229 |
+
|
| 230 |
# Keep only the first sentence
|
| 231 |
if "." in a:
|
| 232 |
a = a.split(".", 1)[0].strip() + "."
|
|
|
|
| 236 |
|
| 237 |
# Normalize internal whitespace
|
| 238 |
a = normalize_ws(a)
|
| 239 |
+
|
| 240 |
+
# If post-clean left us empty or only brackets, abstain
|
| 241 |
+
if not a or re.fullmatch(r"\[\d+\]", a):
|
| 242 |
+
a = "I don't know based on the provided context."
|
| 243 |
+
|
| 244 |
return a
|
| 245 |
|
| 246 |
def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80):
|
|
|
|
| 250 |
# 1) Retrieve
|
| 251 |
hits = rag_store.retrieve(question, k=k)
|
| 252 |
chunks = [c for _, c in hits]
|
|
|
|
| 253 |
|
| 254 |
# 2) Guardrail: female-president question
|
| 255 |
override = _female_president_guard(question, chunks)
|
| 256 |
if override is not None:
|
| 257 |
return override, hits
|
| 258 |
|
| 259 |
+
# 3) Build context with bullets (no bracket labels)
|
| 260 |
+
context = "\n\n".join([f"- {c}" for c in chunks]) if chunks else ""
|
| 261 |
+
|
| 262 |
+
# 4) Build strict prompt
|
| 263 |
prompt = (
|
| 264 |
f"{STRICT_RAG_SYSTEM}\n\n"
|
| 265 |
f"Context:\n{context}\n\n"
|
|
|
|
| 267 |
f"Answer:"
|
| 268 |
)
|
| 269 |
|
| 270 |
+
# 5) Deterministic decoding (no sampling)
|
| 271 |
out = generator(
|
| 272 |
prompt,
|
| 273 |
max_new_tokens=int(max_new_tokens),
|
|
|
|
| 279 |
)
|
| 280 |
raw = out[0]["generated_text"]
|
| 281 |
|
| 282 |
+
# 6) Post-clean the model text (remove echoes/explanations/brackets)
|
| 283 |
answer = _post_clean(raw)
|
| 284 |
|
| 285 |
+
# 7) Enforce abstention if no context present
|
| 286 |
if not context.strip() and "i don't know based on the provided context" not in answer.lower():
|
| 287 |
answer = "I don't know based on the provided context."
|
| 288 |
|
|
|
|
| 312 |
with gr.Column(scale=2):
|
| 313 |
question = gr.Textbox(
|
| 314 |
label="Your question",
|
| 315 |
+
placeholder="Example: Who is the current president of the United States?",
|
| 316 |
lines=3
|
| 317 |
)
|
| 318 |
|