Update app.py
Browse files
app.py
CHANGED
|
@@ -132,4 +132,127 @@ def fmt_ctx(snips: List[Dict[str, Any]]) -> str:
|
|
| 132 |
# ----------------------------
|
| 133 |
STRICT_RAG_SYSTEM = (
|
| 134 |
'Role: You are a careful assistant. Your first duty is factual fidelity to the provided CONTEXT; '
|
| 135 |
-
'your second
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
# ----------------------------
|
| 133 |
STRICT_RAG_SYSTEM = (
|
| 134 |
'Role: You are a careful assistant. Your first duty is factual fidelity to the provided CONTEXT; '
|
| 135 |
+
'your second duty is to apply light stylistic polish (headings/bullets/concise wording) without adding, '
|
| 136 |
+
'removing, or rephrasing facts. Golden rule (priority): 1) RAG facts 2) User instructions 3) Style. '
|
| 137 |
+
'Answer ONLY using CONTEXT; if the context does not contain the answer, reply exactly: '
|
| 138 |
+
'"I don\'t know based on the provided context." Do not use outside knowledge. Keep all names/dates/numbers '
|
| 139 |
+
'exactly as in CONTEXT. Use inline [C#] citations at the end of each sentence that relies on CONTEXT. '
|
| 140 |
+
'Style guardrails: you may adjust tone for clarity and flow and use brief headings or bullets; you may NOT '
|
| 141 |
+
'introduce new claims, imply certainty not present in CONTEXT, or add evaluative language. If support is partial, '
|
| 142 |
+
'state plainly what is unknown. Produce the answer now with inline [C#] citations.'
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def rag_prompt(question: str, ctx: str) -> str:
|
| 147 |
+
return (
|
| 148 |
+
f"{STRICT_RAG_SYSTEM}\n\n"
|
| 149 |
+
f"CONTEXT:\n{ctx}\n\n"
|
| 150 |
+
f"USER_TASK:\n{question}\n\n"
|
| 151 |
+
f"Assistant: Provide the answer now with inline [C#] citations."
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
# ----------------------------
|
| 156 |
+
# Deterministic generation
|
| 157 |
+
# ----------------------------
|
| 158 |
+
def det_generate(
|
| 159 |
+
prompt: str,
|
| 160 |
+
strategy: str,
|
| 161 |
+
beams: int,
|
| 162 |
+
max_new_tokens: int
|
| 163 |
+
) -> str:
|
| 164 |
+
"""Greedy vs. Beam-search (deterministic decoding)."""
|
| 165 |
+
seed_all(0)
|
| 166 |
+
P = get_pipe()
|
| 167 |
+
if strategy == "beam":
|
| 168 |
+
out = P(
|
| 169 |
+
prompt,
|
| 170 |
+
do_sample=False,
|
| 171 |
+
num_beams=max(1, beams),
|
| 172 |
+
early_stopping=True,
|
| 173 |
+
max_new_tokens=max_new_tokens,
|
| 174 |
+
eos_token_id=_tok.eos_token_id if _tok and _tok.eos_token_id is not None else None,
|
| 175 |
+
)
|
| 176 |
+
return out[0]["generated_text"]
|
| 177 |
+
else:
|
| 178 |
+
out = P(
|
| 179 |
+
prompt,
|
| 180 |
+
do_sample=False,
|
| 181 |
+
max_new_tokens=max_new_tokens,
|
| 182 |
+
eos_token_id=_tok.eos_token_id if _tok and _tok.eos_token_id is not None else None,
|
| 183 |
+
)
|
| 184 |
+
return out[0]["generated_text"]
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
# ----------------------------
|
| 188 |
+
# RAG (deterministic decoding: beams + length penalty)
|
| 189 |
+
# ----------------------------
|
| 190 |
+
def rag_answer(
|
| 191 |
+
question: str,
|
| 192 |
+
top_k: int,
|
| 193 |
+
beams: int,
|
| 194 |
+
length_penalty: float,
|
| 195 |
+
max_new_tokens: int
|
| 196 |
+
) -> str:
|
| 197 |
+
"""RAG grounded answer with deterministic decoding controls."""
|
| 198 |
+
hits = retrieve(question, k=top_k)
|
| 199 |
+
if not hits:
|
| 200 |
+
return "I don't know based on the provided context."
|
| 201 |
+
ctx = fmt_ctx(hits)
|
| 202 |
+
prompt = rag_prompt(question, ctx)
|
| 203 |
+
|
| 204 |
+
P = get_pipe()
|
| 205 |
+
out = P(
|
| 206 |
+
prompt,
|
| 207 |
+
do_sample=False, # no sampling (deterministic)
|
| 208 |
+
num_beams=max(1, beams), # beam search
|
| 209 |
+
length_penalty=float(length_penalty), # >1.0 favors longer sequences
|
| 210 |
+
early_stopping=True,
|
| 211 |
+
max_new_tokens=max_new_tokens,
|
| 212 |
+
eos_token_id=_tok.eos_token_id if _tok and _tok.eos_token_id is not None else None,
|
| 213 |
+
)
|
| 214 |
+
return out[0]["generated_text"]
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
# ----------------------------
|
| 218 |
+
# Build index at import
|
| 219 |
+
# ----------------------------
|
| 220 |
+
_docs = load_corpus("./corpus")
|
| 221 |
+
build_index(_docs)
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
# ----------------------------
|
| 225 |
+
# Gradio UI
|
| 226 |
+
# ----------------------------
|
| 227 |
+
with gr.Blocks(title="ITC 754 — Deterministic & RAG (Beams + Length Penalty)") as demo:
|
| 228 |
+
gr.Markdown(
|
| 229 |
+
"## ITC 754 — Deterministic vs RAG-Grounded\n"
|
| 230 |
+
"RAG side now uses **Beams** and **Length Penalty** to align with deterministic decoding.\n"
|
| 231 |
+
"Put `.txt` files into `./corpus` and ask questions grounded in that content."
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
with gr.Tab("Deterministic Text"):
|
| 235 |
+
inp = gr.Textbox(label="Prompt", placeholder="Explain beam search in one paragraph.")
|
| 236 |
+
strat = gr.Dropdown(choices=["greedy", "beam"], value="beam", label="Strategy")
|
| 237 |
+
beams = gr.Slider(1, 8, step=1, value=4, label="Beams (num_beams)")
|
| 238 |
+
mxt = gr.Slider(16, 512, step=16, value=128, label="Max new tokens")
|
| 239 |
+
btn = gr.Button("Generate")
|
| 240 |
+
out = gr.Textbox(label="Output", lines=8)
|
| 241 |
+
btn.click(det_generate, [inp, strat, beams, mxt], [out])
|
| 242 |
+
|
| 243 |
+
with gr.Tab("RAG-Grounded"):
|
| 244 |
+
q = gr.Textbox(label="Question", placeholder="Ask a question answerable from your ./corpus/*.txt files.")
|
| 245 |
+
topk = gr.Slider(1, 10, step=1, value=4, label="Top-K Passages")
|
| 246 |
+
r_beams = gr.Slider(1, 8, step=1, value=4, label="Beams (num_beams)")
|
| 247 |
+
lp = gr.Slider(0.5, 2.0, step=0.1, value=1.0, label="Length Penalty")
|
| 248 |
+
r_mxt = gr.Slider(16, 512, step=16, value=180, label="Max new tokens")
|
| 249 |
+
r_btn = gr.Button("Answer from RAG")
|
| 250 |
+
r_out = gr.Textbox(label="Answer", lines=12)
|
| 251 |
+
r_btn.click(rag_answer, [q, topk, r_beams, lp, r_mxt], [r_out])
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
# ----------------------------
|
| 255 |
+
# Launch
|
| 256 |
+
# ----------------------------
|
| 257 |
+
if __name__ == "__main__":
|
| 258 |
+
demo.launch()
|