DrDavis commited on
Commit
4b8fca5
·
verified ·
1 Parent(s): a86c6e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -23
app.py CHANGED
@@ -1,17 +1,13 @@
1
  """
2
- RAG Mini Demo (Presidents Theme) — Strict & Deterministic RAG
3
  --------------------------------------------------------------
4
  This Gradio app compares:
5
- 1) LLM-Only (sampling) — answers directly from the model (can hallucinate)
6
- 2) RAG (strict) — retrieves context from a small corpus and answers ONLY from that context
7
- - Deterministic decoding (no sampling) to reduce “creative” guessing
8
- - Guardrail: if question asks for female US presidents and context asserts 'none', we answer that directly
9
-
10
- Stack (CPU-friendly):
11
- - sentence-transformers/all-MiniLM-L6-v2 → embeddings
12
- - faiss-cpu → fast similarity search
13
- - google/flan-t5-small → generator
14
- - Gradio → web UI
15
  """
16
 
17
  import os, io, re, faiss
@@ -26,11 +22,11 @@ from transformers import pipeline
26
  # Config (easy knobs)
27
  # ----------------------------
28
  EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
29
- GEN_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
30
 
31
- CHUNK_SIZE = 500 # chars per chunk
32
- CHUNK_OVERLAP = 100 # overlap to avoid boundary misses
33
- TOP_K = 3 # retrieved chunks
34
 
35
  # ----------------------------
36
  # Utilities
@@ -98,6 +94,7 @@ class RAGStore:
98
  "killed Osama bin Laden.",
99
 
100
  "As of 2025, the United States has never had a female president. "
 
101
  ]
102
 
103
  chunks = []
@@ -174,13 +171,14 @@ def generate_llm_only(question: str, max_new_tokens: int = 128, temperature: flo
174
  return out[0]["generated_text"]
175
 
176
  # ----------------------------
177
- # STRICT deterministic RAG
178
  # ----------------------------
179
  STRICT_RAG_SYSTEM = (
180
- "You are a careful assistant. Answer ONLY using the provided context. "
 
181
  "If the context does not contain the answer, reply exactly: "
182
  "\"I don't know based on the provided context.\" "
183
- "Do not guess. Do not use outside knowledge."
184
  )
185
 
186
  def _mentions_no_female_president(text: str) -> bool:
@@ -199,7 +197,44 @@ def _female_president_guard(question: str, context_chunks: List[str]) -> Optiona
199
  return "As of 2025, the United States has never had a female president."
200
  return None
201
 
202
- def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 96):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  if not question.strip():
204
  return "Please enter a question.", []
205
 
@@ -231,9 +266,12 @@ def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 96)
231
  length_penalty=0.9,
232
  no_repeat_ngram_size=3,
233
  )
234
- answer = out[0]["generated_text"].strip()
 
 
 
235
 
236
- # 5) Enforce abstention if no context present
237
  if not context.strip() and "i don't know based on the provided context" not in answer.lower():
238
  answer = "I don't know based on the provided context."
239
 
@@ -281,8 +319,7 @@ with gr.Blocks(fill_height=True, analytics_enabled=False) as demo:
281
  with gr.Column():
282
  gr.Markdown("#### 📎 RAG-Grounded (Strict Deterministic)")
283
  topk = gr.Slider(1, 8, value=3, step=1, label="Top-K chunks")
284
- max_new_rag = gr.Slider(32, 256, value=96, step=8, label="Max new tokens")
285
- # keep temp/top-p sliders visible for symmetry but unused in strict RAG
286
  temp_rag = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature (unused)", interactive=False)
287
  topp_rag = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (unused)", interactive=False)
288
  rag_btn = gr.Button("Generate (RAG)")
 
1
  """
2
+ RAG Mini Demo (Presidents Theme) — Strict, Concise, and Clean
3
  --------------------------------------------------------------
4
  This Gradio app compares:
5
+ 1) LLM-Only (sampling) — answers directly from the model (can hallucinate)
6
+ 2) RAG (strict deterministic) — retrieves context and answers ONLY from that context
7
+ - Deterministic decoding (no sampling)
8
+ - One-sentence, terse answers (no explanations)
9
+ - Guardrail for the "female US presidents" query
10
+ - Post-clean to remove any instruction echoes or meta-talk
 
 
 
 
11
  """
12
 
13
  import os, io, re, faiss
 
22
  # Config (easy knobs)
23
  # ----------------------------
24
  EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
25
+ GEN_MODEL_ID = "google/flan-t5-small"
26
 
27
+ CHUNK_SIZE = 500
28
+ CHUNK_OVERLAP = 100
29
+ TOP_K = 3
30
 
31
  # ----------------------------
32
  # Utilities
 
94
  "killed Osama bin Laden.",
95
 
96
  "As of 2025, the United States has never had a female president. "
97
+ "The current president is Donald J. Trump, the 45th and now the 47th, who took office in 2025."
98
  ]
99
 
100
  chunks = []
 
171
  return out[0]["generated_text"]
172
 
173
  # ----------------------------
174
+ # STRICT deterministic RAG (concise + clean)
175
  # ----------------------------
176
  STRICT_RAG_SYSTEM = (
177
+ "Answer ONLY using the provided context. "
178
+ "Reply in one short sentence with just the answer. "
179
  "If the context does not contain the answer, reply exactly: "
180
  "\"I don't know based on the provided context.\" "
181
+ "Do not explain your reasoning. Do not include any extra text."
182
  )
183
 
184
  def _mentions_no_female_president(text: str) -> bool:
 
197
  return "As of 2025, the United States has never had a female president."
198
  return None
199
 
200
+ def _post_clean(answer: str) -> str:
201
+ """
202
+ Remove any instruction echoes or meta-justifications.
203
+ Keep only the first sentence; strip surrounding quotes/spaces.
204
+ """
205
+ a = answer.strip()
206
+ # If the model echoed the prompt/instruction, try to cut to the "Answer:" portion
207
+ if "Answer:" in a:
208
+ a = a.split("Answer:", 1)[-1].strip()
209
+
210
+ # Remove leading common instruction phrases if present
211
+ lowers = a.lower()
212
+ bad_starts = [
213
+ "answer only using the provided context",
214
+ "you are a careful assistant",
215
+ "this answer is correct",
216
+ "based solely",
217
+ "therefore,",
218
+ "therefore "
219
+ ]
220
+ for bs in bad_starts:
221
+ if lowers.startswith(bs):
222
+ # take the remainder after the first period if it exists
223
+ a = a.split(".", 1)[-1].strip() or a
224
+ break
225
+
226
+ # Keep only the first sentence
227
+ if "." in a:
228
+ a = a.split(".", 1)[0].strip() + "."
229
+
230
+ # Strip surrounding quotes
231
+ a = a.strip(" \"'")
232
+
233
+ # Normalize internal whitespace
234
+ a = normalize_ws(a)
235
+ return a
236
+
237
+ def generate_rag_strict(question: str, k: int = TOP_K, max_new_tokens: int = 80):
238
  if not question.strip():
239
  return "Please enter a question.", []
240
 
 
266
  length_penalty=0.9,
267
  no_repeat_ngram_size=3,
268
  )
269
+ raw = out[0]["generated_text"]
270
+
271
+ # 5) Post-clean the model text (remove echoes/explanations)
272
+ answer = _post_clean(raw)
273
 
274
+ # 6) Enforce abstention if no context present
275
  if not context.strip() and "i don't know based on the provided context" not in answer.lower():
276
  answer = "I don't know based on the provided context."
277
 
 
319
  with gr.Column():
320
  gr.Markdown("#### 📎 RAG-Grounded (Strict Deterministic)")
321
  topk = gr.Slider(1, 8, value=3, step=1, label="Top-K chunks")
322
+ max_new_rag = gr.Slider(32, 256, value=80, step=8, label="Max new tokens")
 
323
  temp_rag = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature (unused)", interactive=False)
324
  topp_rag = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p (unused)", interactive=False)
325
  rag_btn = gr.Button("Generate (RAG)")