medibill / scripts /build_deck.py
Anuj424614's picture
Upload folder using huggingface_hub
a09b1f5 verified
"""Build the MediBill-Env pitch deck as a .pptx file (opens in Keynote).
Run:
python3 scripts/build_deck.py
Output:
docs/medibill_pitch.pptx
"""
from pathlib import Path
from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.enum.shapes import MSO_SHAPE
from pptx.enum.text import PP_ALIGN
from pptx.util import Inches, Pt
OUT = Path(__file__).resolve().parent.parent / "docs" / "medibill_pitch.pptx"
# Brand-ish palette (kept simple β€” dark ink on white)
INK = RGBColor(0x10, 0x10, 0x10)
ACCENT = RGBColor(0x0E, 0x6B, 0xA8) # blue
HIGHLIGHT = RGBColor(0x0A, 0x84, 0x3D) # green for hero numbers
RULE = RGBColor(0xCC, 0xCC, 0xCC)
def add_title(slide, text):
title = slide.shapes.title
title.text = text
for para in title.text_frame.paragraphs:
para.alignment = PP_ALIGN.LEFT
for run in para.runs:
run.font.size = Pt(40)
run.font.bold = True
run.font.color.rgb = INK
def add_bullets(slide, bullets):
body = slide.placeholders[1]
tf = body.text_frame
tf.word_wrap = True
for i, b in enumerate(bullets):
p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
p.text = b
p.level = 0
for run in p.runs:
run.font.size = Pt(22)
run.font.color.rgb = INK
def add_notes(slide, notes):
nf = slide.notes_slide.notes_text_frame
nf.text = notes
def add_table(slide, left, top, width, height, headers, rows, *, hero_cells=None):
rows_n = 1 + len(rows)
cols_n = len(headers)
tbl_shape = slide.shapes.add_table(rows_n, cols_n, left, top, width, height)
tbl = tbl_shape.table
# Header row
for j, h in enumerate(headers):
cell = tbl.cell(0, j)
cell.text = h
for p in cell.text_frame.paragraphs:
for r in p.runs:
r.font.bold = True
r.font.size = Pt(16)
r.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
cell.fill.solid()
cell.fill.fore_color.rgb = ACCENT
# Body rows
hero_cells = hero_cells or set()
for i, row in enumerate(rows, start=1):
for j, val in enumerate(row):
cell = tbl.cell(i, j)
cell.text = str(val)
for p in cell.text_frame.paragraphs:
for r in p.runs:
r.font.size = Pt(14)
r.font.color.rgb = INK
if (i, j) in hero_cells:
r.font.bold = True
r.font.color.rgb = HIGHLIGHT
def main() -> None:
prs = Presentation()
prs.slide_width = Inches(13.333)
prs.slide_height = Inches(7.5)
title_content = prs.slide_layouts[1] # Title + content
blank = prs.slide_layouts[5] # Title only (we'll add tables)
# ---- Slide 0: Cover -------------------------------------------------
cover_layout = prs.slide_layouts[0]
cover = prs.slides.add_slide(cover_layout)
cover.shapes.title.text = "MediBill-Env"
sub = cover.placeholders[1]
sub.text = "Silent policy drift in Indian health-insurance claims\nMeta Γ— Scaler OpenEnv Hackathon β€” Round 2"
add_notes(cover, "Hi, I'm Anuj. MediBill-Env is an OpenEnv environment for testing whether an LLM agent can detect and recover from silent policy drift in medical claims billing.")
# ---- Slide 1: The regulatory clock ---------------------------------
s1 = prs.slides.add_slide(title_content)
add_title(s1, "180 minutes to close the claim.")
add_bullets(s1, [
"IRDAI mandate (May 2024): 1 hour pre-auth, 3 hours discharge",
"Miss the 3-hour clock β†’ insurer eats the cost from shareholder funds",
"FY24: ~β‚Ή26,000 cr health-claim disallowed",
"~13% of pre-auths still miss the window",
])
add_notes(s1, (
"In India, IRDAI gives hospitals one hour for pre-authorization and three hours for final "
"discharge on every cashless claim. Miss the three-hour clock, and the overrun comes out of "
"the insurer's shareholder fund. Industry estimates put FY24 disallowed health-claim value "
"around twenty-six thousand crore rupees. Roughly thirteen percent of pre-auths still miss "
"the one-hour window. The bottleneck is a human coder racing a clock, and the policies keep "
"changing on them."
))
# ---- Slide 2: Problem is staleness ----------------------------------
s2 = prs.slides.add_slide(title_content)
add_title(s2, "Why agents fail here")
add_bullets(s2, [
"Rules engines handle static schema validation",
"They do not handle staleness β€” yesterday's correct rule, today wrong",
"Agents that imitate one month's trajectories fail quietly the next month",
"We need an agent that knows to re-check before submitting",
])
add_notes(s2, (
"Most agent benchmarks check whether the agent can fill a form correctly. That is schema "
"validation, and rules engines already do it. The real failure mode in this domain is "
"staleness β€” the policy changed, the agent did not notice, the claim is wrong. An agent "
"that learned by imitating last month's expert trajectories will reproduce last month's "
"rules. We want an agent that knows to re-check before submitting."
))
# ---- Slide 3: The environment ---------------------------------------
s3 = prs.slides.add_slide(blank)
add_title(s3, "MediBill-Env: 5 tools, 3 task tiers, 6-axis grader")
# Tools table
add_table(
s3,
left=Inches(0.6), top=Inches(1.4), width=Inches(6.5), height=Inches(3.5),
headers=["Tool", "Purpose"],
rows=[
["ehr_query", "Read patient record"],
["insurance_lookup", "Fetch active policy rules"],
["coding_engine", "Write a policy-sensitive field"],
["escalate_to_human", "Calibrated abstention"],
["submit_claim", "Lock claim for grading"],
],
)
# Tasks table
add_table(
s3,
left=Inches(7.4), top=Inches(1.4), width=Inches(5.4), height=Inches(2.5),
headers=["Task tier", "Drift?"],
rows=[
["easy_cashless", "no"],
["medium_multi_payer", "no"],
["hard_drift", "yes β€” silent, mid-episode"],
],
)
# Footer
txt = s3.shapes.add_textbox(Inches(0.6), Inches(5.5), Inches(12.0), Inches(1.0))
tf = txt.text_frame
tf.word_wrap = True
p = tf.paragraphs[0]
p.text = "6-axis deterministic grader Β· disjoint identity/policy partition asserted at import Β· 5 reward-hacking attacks neutralised"
for r in p.runs:
r.font.size = Pt(18)
r.font.italic = True
r.font.color.rgb = INK
add_notes(s3, (
"The agent has five tools: query the patient record, look up the insurer's active policy, "
"write fields, escalate when uncertain, and submit. Three task tiers β€” easy, medium, and "
"hard, where the policy mutates mid-episode. The grader has six axes with a disjoint field "
"partition asserted at import time, so identity correctness and policy compliance never "
"overlap."
))
# ---- Slide 4: The hero mechanic -------------------------------------
s4 = prs.slides.add_slide(title_content)
add_title(s4, "Silent multi-field policy drift")
add_bullets(s4, [
"Active policy mutates 3–7 fields at a seed-randomized step",
"No announcement β€” no observation flag, no metadata key, no event",
"submit_claim is graded against the policy at submit time",
"Only path to new rules: a fresh insurance_lookup after the drift step",
"12 claim types Γ— 3 tiers Γ— randomized drift = ~12k+ unique trajectories",
"Scripted baseline: 1.00 on easy, 0.7611 on drift β€” the 0.24 gap is the signal",
])
add_notes(s4, (
"On hard_drift tasks the active policy mutates mid-episode across three to seven fields β€” "
"pre-auth thresholds, required signatures, narrative requirements, discharge attachment "
"rules. Multi-field mutation, not a boolean. No announcement, no flag, no event. The only "
"path to the new rules is a fresh insurance_lookup after the unknown drift step. "
"Submissions are graded against the policy at submit time. Twelve claim types, three "
"tiers, seed-randomized drift = over twelve thousand unique trajectories. Scripted "
"baseline drops from one-zero on easy to zero-seven-six on drift. That zero-two-four gap "
"is the trainable signal."
))
# ---- Slide 5: HEADLINE β€” measurements -------------------------------
s5 = prs.slides.add_slide(blank)
add_title(s5, "Base 0.00 β†’ SFT v2 0.9999 avg. Teacher engineering broke through GRPO saturation.")
# Hero table β€” Base β†’ SFT v2
add_table(
s5,
left=Inches(0.5), top=Inches(1.3), width=Inches(6.3), height=Inches(2.6),
headers=["task", "base Qwen", "SFT v2", "lift"],
rows=[
["easy_cashless", "0.0000", "1.0000", "+1.000"],
["medium_multi_payer", "0.0000", "1.0000", "+1.000"],
["hard_drift", "0.0000", "0.9996 Β± 0.0008", "+0.9996"],
["AVERAGE", "0.0000", "0.9999", "+0.9999"],
],
hero_cells={(3, 2), (3, 3), (4, 1), (4, 2), (4, 3)},
)
# Iteration table
add_table(
s5,
left=Inches(7.0), top=Inches(1.3), width=Inches(6.0), height=Inches(2.6),
headers=["checkpoint", "hard_drift", "what changed"],
rows=[
["Base Qwen 2.5 3B", "0.0000", "untrained"],
["SFT v1", "0.7573", "scripted teacher (parity)"],
["GRPO over SFT v1", "0.7575 (Δ±0.0002)", "rewards saturated β€” calibration"],
["SFT v2", "0.9996", "drift-aware teacher"],
],
hero_cells={(4, 1), (4, 2)},
)
# Footer bullets
foot = s5.shapes.add_textbox(Inches(0.5), Inches(4.5), Inches(12.5), Inches(2.5))
tf = foot.text_frame
tf.word_wrap = True
p1 = tf.paragraphs[0]
p1.text = "β€’ 5 exploit patterns explicitly neutralised β€” all five score ≀ no_op"
p2 = tf.add_paragraph()
p2.text = "β€’ Pivot was teacher engineering, not RL β€” +0.2423 lift on hard_drift in 90 trajectories + 33 min retraining"
p3 = tf.add_paragraph()
p3.text = "β€’ Verified via Codex reproducibility protocol: sha256 byte-match of adapter weights + fresh-subprocess re-eval Γ— 2"
for p in (p1, p2, p3):
for r in p.runs:
r.font.size = Pt(18)
r.font.color.rgb = INK
add_notes(s5, (
"Six bars on hard_drift, left to right: base Qwen at zero, random at eleven, no-op at "
"eight, scripted at seventy-six, SFT v1 at seventy-six, our final SFT v2 at "
"zero-point-nine-nine-nine-six. Untrained, the 3B model scores literal zero β€” zero parse "
"failures across fifteen episodes β€” it can format JSON, it just has no policy reasoning. "
"SFT v1 hit scripted-teacher parity. Then GRPO with five reward functions saturated β€” "
"delta two ten-thousandths, gradient ten-to-minus-seven. Diagnosis: SFT extracts "
"everything the rewards can grip on. So we engineered a stronger teacher β€” Scripted plus "
"plus, which escalates ambiguous cells and does a fresh insurance lookup before each "
"submit. Ninety new trajectories, thirty-three minutes of retraining. SFT v2: one-zero-zero "
"on easy and medium, zero-point-nine-nine-nine-six on hard. Average lift base to SFT v2: "
"zero-point-nine-nine-nine-nine."
))
# ---- Slide 6: Scope + close -----------------------------------------
s6 = prs.slides.add_slide(title_content)
add_title(s6, "Environment-first submission under Theme 3.1")
add_bullets(s6, [
"Shipping today: env + grader + 5-attack exploit suite + scripted baseline + SFT v2 adapter (0.9999 avg)",
"Two of six axes β€” abstention_quality and drift_bonus β€” are RL-only targets (spec v3 Β§7.6)",
"Code enforces every claim: disjoint partition asserted at import, 5 exploit tests, prompt-version handshake",
"Theme 3.1 β€” DataOps Copilot. Enterprise reasoning under shifting business rules.",
"Repo: github.com/Algoace1403/METAHackthon2026",
"HF Space (LIVE): huggingface.co/spaces/Anuj424614/medibill-env",
])
add_notes(s6, (
"We submit under Theme 3.1, DataOps Copilot. Shipping today: the environment, six-axis "
"deterministic grader, silent drift mechanic, five-attack exploit suite, scripted "
"baseline, and a trained SFT v2 adapter that hits zero-point-nine-nine-nine-nine average "
"across all three difficulty tiers β€” table on slide five. Two axes β€” abstention and "
"drift_bonus β€” are RL-only by design. Disjoint partition at import, five exploit tests, "
"prompt-version handshake. Repo and live HF Space on screen. Thank you."
))
OUT.parent.mkdir(parents=True, exist_ok=True)
prs.save(OUT)
print(f"Saved deck to: {OUT}")
print(f"Slide count: {len(prs.slides)}")
if __name__ == "__main__":
main()