Spaces:

DrDavis
/

RAGDemo

Sleeping

App Files Files Community

DrDavis commited on Sep 10, 2025

Commit

59a2df9

verified ·

1 Parent(s): 09f6cee

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -1

app.py CHANGED Viewed

@@ -132,4 +132,127 @@ def fmt_ctx(snips: List[Dict[str, Any]]) -> str:
 # ----------------------------
 STRICT_RAG_SYSTEM = (
     'Role: You are a careful assistant. Your first duty is factual fidelity to the provided CONTEXT; '
-    'your second

 # ----------------------------
 STRICT_RAG_SYSTEM = (
     'Role: You are a careful assistant. Your first duty is factual fidelity to the provided CONTEXT; '
+    'your second duty is to apply light stylistic polish (headings/bullets/concise wording) without adding, '
+    'removing, or rephrasing facts. Golden rule (priority): 1) RAG facts 2) User instructions 3) Style. '
+    'Answer ONLY using CONTEXT; if the context does not contain the answer, reply exactly: '
+    '"I don\'t know based on the provided context." Do not use outside knowledge. Keep all names/dates/numbers '
+    'exactly as in CONTEXT. Use inline [C#] citations at the end of each sentence that relies on CONTEXT. '
+    'Style guardrails: you may adjust tone for clarity and flow and use brief headings or bullets; you may NOT '
+    'introduce new claims, imply certainty not present in CONTEXT, or add evaluative language. If support is partial, '
+    'state plainly what is unknown. Produce the answer now with inline [C#] citations.'
+)
+def rag_prompt(question: str, ctx: str) -> str:
+    return (
+        f"{STRICT_RAG_SYSTEM}\n\n"
+        f"CONTEXT:\n{ctx}\n\n"
+        f"USER_TASK:\n{question}\n\n"
+        f"Assistant: Provide the answer now with inline [C#] citations."
+    )
+# ----------------------------
+# Deterministic generation
+# ----------------------------
+def det_generate(
+    prompt: str,
+    strategy: str,
+    beams: int,
+    max_new_tokens: int
+) -> str:
+    """Greedy vs. Beam-search (deterministic decoding)."""
+    seed_all(0)
+    P = get_pipe()
+    if strategy == "beam":
+        out = P(
+            prompt,
+            do_sample=False,
+            num_beams=max(1, beams),
+            early_stopping=True,
+            max_new_tokens=max_new_tokens,
+            eos_token_id=_tok.eos_token_id if _tok and _tok.eos_token_id is not None else None,
+        )
+        return out[0]["generated_text"]
+    else:
+        out = P(
+            prompt,
+            do_sample=False,
+            max_new_tokens=max_new_tokens,
+            eos_token_id=_tok.eos_token_id if _tok and _tok.eos_token_id is not None else None,
+        )
+        return out[0]["generated_text"]
+# ----------------------------
+# RAG (deterministic decoding: beams + length penalty)
+# ----------------------------
+def rag_answer(
+    question: str,
+    top_k: int,
+    beams: int,
+    length_penalty: float,
+    max_new_tokens: int
+) -> str:
+    """RAG grounded answer with deterministic decoding controls."""
+    hits = retrieve(question, k=top_k)
+    if not hits:
+        return "I don't know based on the provided context."
+    ctx = fmt_ctx(hits)
+    prompt = rag_prompt(question, ctx)
+    P = get_pipe()
+    out = P(
+        prompt,
+        do_sample=False,                      # no sampling (deterministic)
+        num_beams=max(1, beams),              # beam search
+        length_penalty=float(length_penalty), # >1.0 favors longer sequences
+        early_stopping=True,
+        max_new_tokens=max_new_tokens,
+        eos_token_id=_tok.eos_token_id if _tok and _tok.eos_token_id is not None else None,
+    )
+    return out[0]["generated_text"]
+# ----------------------------
+# Build index at import
+# ----------------------------
+_docs = load_corpus("./corpus")
+build_index(_docs)
+# ----------------------------
+# Gradio UI
+# ----------------------------
+with gr.Blocks(title="ITC 754 — Deterministic & RAG (Beams + Length Penalty)") as demo:
+    gr.Markdown(
+        "## ITC 754 — Deterministic vs RAG-Grounded\n"
+        "RAG side now uses **Beams** and **Length Penalty** to align with deterministic decoding.\n"
+        "Put `.txt` files into `./corpus` and ask questions grounded in that content."
+    )
+    with gr.Tab("Deterministic Text"):
+        inp = gr.Textbox(label="Prompt", placeholder="Explain beam search in one paragraph.")
+        strat = gr.Dropdown(choices=["greedy", "beam"], value="beam", label="Strategy")
+        beams = gr.Slider(1, 8, step=1, value=4, label="Beams (num_beams)")
+        mxt = gr.Slider(16, 512, step=16, value=128, label="Max new tokens")
+        btn = gr.Button("Generate")
+        out = gr.Textbox(label="Output", lines=8)
+        btn.click(det_generate, [inp, strat, beams, mxt], [out])
+    with gr.Tab("RAG-Grounded"):
+        q = gr.Textbox(label="Question", placeholder="Ask a question answerable from your ./corpus/*.txt files.")
+        topk = gr.Slider(1, 10, step=1, value=4, label="Top-K Passages")
+        r_beams = gr.Slider(1, 8, step=1, value=4, label="Beams (num_beams)")
+        lp = gr.Slider(0.5, 2.0, step=0.1, value=1.0, label="Length Penalty")
+        r_mxt = gr.Slider(16, 512, step=16, value=180, label="Max new tokens")
+        r_btn = gr.Button("Answer from RAG")
+        r_out = gr.Textbox(label="Answer", lines=12)
+        r_btn.click(rag_answer, [q, topk, r_beams, lp, r_mxt], [r_out])
+# ----------------------------
+# Launch
+# ----------------------------
+if __name__ == "__main__":
+    demo.launch()