DrDavis commited on
Commit
59a2df9
·
verified ·
1 Parent(s): 09f6cee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -1
app.py CHANGED
@@ -132,4 +132,127 @@ def fmt_ctx(snips: List[Dict[str, Any]]) -> str:
132
  # ----------------------------
133
  STRICT_RAG_SYSTEM = (
134
  'Role: You are a careful assistant. Your first duty is factual fidelity to the provided CONTEXT; '
135
- 'your second
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  # ----------------------------
133
  STRICT_RAG_SYSTEM = (
134
  'Role: You are a careful assistant. Your first duty is factual fidelity to the provided CONTEXT; '
135
+ 'your second duty is to apply light stylistic polish (headings/bullets/concise wording) without adding, '
136
+ 'removing, or rephrasing facts. Golden rule (priority): 1) RAG facts 2) User instructions 3) Style. '
137
+ 'Answer ONLY using CONTEXT; if the context does not contain the answer, reply exactly: '
138
+ '"I don\'t know based on the provided context." Do not use outside knowledge. Keep all names/dates/numbers '
139
+ 'exactly as in CONTEXT. Use inline [C#] citations at the end of each sentence that relies on CONTEXT. '
140
+ 'Style guardrails: you may adjust tone for clarity and flow and use brief headings or bullets; you may NOT '
141
+ 'introduce new claims, imply certainty not present in CONTEXT, or add evaluative language. If support is partial, '
142
+ 'state plainly what is unknown. Produce the answer now with inline [C#] citations.'
143
+ )
144
+
145
+
146
+ def rag_prompt(question: str, ctx: str) -> str:
147
+ return (
148
+ f"{STRICT_RAG_SYSTEM}\n\n"
149
+ f"CONTEXT:\n{ctx}\n\n"
150
+ f"USER_TASK:\n{question}\n\n"
151
+ f"Assistant: Provide the answer now with inline [C#] citations."
152
+ )
153
+
154
+
155
+ # ----------------------------
156
+ # Deterministic generation
157
+ # ----------------------------
158
+ def det_generate(
159
+ prompt: str,
160
+ strategy: str,
161
+ beams: int,
162
+ max_new_tokens: int
163
+ ) -> str:
164
+ """Greedy vs. Beam-search (deterministic decoding)."""
165
+ seed_all(0)
166
+ P = get_pipe()
167
+ if strategy == "beam":
168
+ out = P(
169
+ prompt,
170
+ do_sample=False,
171
+ num_beams=max(1, beams),
172
+ early_stopping=True,
173
+ max_new_tokens=max_new_tokens,
174
+ eos_token_id=_tok.eos_token_id if _tok and _tok.eos_token_id is not None else None,
175
+ )
176
+ return out[0]["generated_text"]
177
+ else:
178
+ out = P(
179
+ prompt,
180
+ do_sample=False,
181
+ max_new_tokens=max_new_tokens,
182
+ eos_token_id=_tok.eos_token_id if _tok and _tok.eos_token_id is not None else None,
183
+ )
184
+ return out[0]["generated_text"]
185
+
186
+
187
+ # ----------------------------
188
+ # RAG (deterministic decoding: beams + length penalty)
189
+ # ----------------------------
190
+ def rag_answer(
191
+ question: str,
192
+ top_k: int,
193
+ beams: int,
194
+ length_penalty: float,
195
+ max_new_tokens: int
196
+ ) -> str:
197
+ """RAG grounded answer with deterministic decoding controls."""
198
+ hits = retrieve(question, k=top_k)
199
+ if not hits:
200
+ return "I don't know based on the provided context."
201
+ ctx = fmt_ctx(hits)
202
+ prompt = rag_prompt(question, ctx)
203
+
204
+ P = get_pipe()
205
+ out = P(
206
+ prompt,
207
+ do_sample=False, # no sampling (deterministic)
208
+ num_beams=max(1, beams), # beam search
209
+ length_penalty=float(length_penalty), # >1.0 favors longer sequences
210
+ early_stopping=True,
211
+ max_new_tokens=max_new_tokens,
212
+ eos_token_id=_tok.eos_token_id if _tok and _tok.eos_token_id is not None else None,
213
+ )
214
+ return out[0]["generated_text"]
215
+
216
+
217
+ # ----------------------------
218
+ # Build index at import
219
+ # ----------------------------
220
+ _docs = load_corpus("./corpus")
221
+ build_index(_docs)
222
+
223
+
224
+ # ----------------------------
225
+ # Gradio UI
226
+ # ----------------------------
227
+ with gr.Blocks(title="ITC 754 — Deterministic & RAG (Beams + Length Penalty)") as demo:
228
+ gr.Markdown(
229
+ "## ITC 754 — Deterministic vs RAG-Grounded\n"
230
+ "RAG side now uses **Beams** and **Length Penalty** to align with deterministic decoding.\n"
231
+ "Put `.txt` files into `./corpus` and ask questions grounded in that content."
232
+ )
233
+
234
+ with gr.Tab("Deterministic Text"):
235
+ inp = gr.Textbox(label="Prompt", placeholder="Explain beam search in one paragraph.")
236
+ strat = gr.Dropdown(choices=["greedy", "beam"], value="beam", label="Strategy")
237
+ beams = gr.Slider(1, 8, step=1, value=4, label="Beams (num_beams)")
238
+ mxt = gr.Slider(16, 512, step=16, value=128, label="Max new tokens")
239
+ btn = gr.Button("Generate")
240
+ out = gr.Textbox(label="Output", lines=8)
241
+ btn.click(det_generate, [inp, strat, beams, mxt], [out])
242
+
243
+ with gr.Tab("RAG-Grounded"):
244
+ q = gr.Textbox(label="Question", placeholder="Ask a question answerable from your ./corpus/*.txt files.")
245
+ topk = gr.Slider(1, 10, step=1, value=4, label="Top-K Passages")
246
+ r_beams = gr.Slider(1, 8, step=1, value=4, label="Beams (num_beams)")
247
+ lp = gr.Slider(0.5, 2.0, step=0.1, value=1.0, label="Length Penalty")
248
+ r_mxt = gr.Slider(16, 512, step=16, value=180, label="Max new tokens")
249
+ r_btn = gr.Button("Answer from RAG")
250
+ r_out = gr.Textbox(label="Answer", lines=12)
251
+ r_btn.click(rag_answer, [q, topk, r_beams, lp, r_mxt], [r_out])
252
+
253
+
254
+ # ----------------------------
255
+ # Launch
256
+ # ----------------------------
257
+ if __name__ == "__main__":
258
+ demo.launch()