Spaces:
Sleeping
Sleeping
File size: 13,287 Bytes
a09b1f5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 | """Build the MediBill-Env pitch deck as a .pptx file (opens in Keynote).
Run:
python3 scripts/build_deck.py
Output:
docs/medibill_pitch.pptx
"""
from pathlib import Path
from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.enum.shapes import MSO_SHAPE
from pptx.enum.text import PP_ALIGN
from pptx.util import Inches, Pt
OUT = Path(__file__).resolve().parent.parent / "docs" / "medibill_pitch.pptx"
# Brand-ish palette (kept simple β dark ink on white)
INK = RGBColor(0x10, 0x10, 0x10)
ACCENT = RGBColor(0x0E, 0x6B, 0xA8) # blue
HIGHLIGHT = RGBColor(0x0A, 0x84, 0x3D) # green for hero numbers
RULE = RGBColor(0xCC, 0xCC, 0xCC)
def add_title(slide, text):
title = slide.shapes.title
title.text = text
for para in title.text_frame.paragraphs:
para.alignment = PP_ALIGN.LEFT
for run in para.runs:
run.font.size = Pt(40)
run.font.bold = True
run.font.color.rgb = INK
def add_bullets(slide, bullets):
body = slide.placeholders[1]
tf = body.text_frame
tf.word_wrap = True
for i, b in enumerate(bullets):
p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
p.text = b
p.level = 0
for run in p.runs:
run.font.size = Pt(22)
run.font.color.rgb = INK
def add_notes(slide, notes):
nf = slide.notes_slide.notes_text_frame
nf.text = notes
def add_table(slide, left, top, width, height, headers, rows, *, hero_cells=None):
rows_n = 1 + len(rows)
cols_n = len(headers)
tbl_shape = slide.shapes.add_table(rows_n, cols_n, left, top, width, height)
tbl = tbl_shape.table
# Header row
for j, h in enumerate(headers):
cell = tbl.cell(0, j)
cell.text = h
for p in cell.text_frame.paragraphs:
for r in p.runs:
r.font.bold = True
r.font.size = Pt(16)
r.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
cell.fill.solid()
cell.fill.fore_color.rgb = ACCENT
# Body rows
hero_cells = hero_cells or set()
for i, row in enumerate(rows, start=1):
for j, val in enumerate(row):
cell = tbl.cell(i, j)
cell.text = str(val)
for p in cell.text_frame.paragraphs:
for r in p.runs:
r.font.size = Pt(14)
r.font.color.rgb = INK
if (i, j) in hero_cells:
r.font.bold = True
r.font.color.rgb = HIGHLIGHT
def main() -> None:
prs = Presentation()
prs.slide_width = Inches(13.333)
prs.slide_height = Inches(7.5)
title_content = prs.slide_layouts[1] # Title + content
blank = prs.slide_layouts[5] # Title only (we'll add tables)
# ---- Slide 0: Cover -------------------------------------------------
cover_layout = prs.slide_layouts[0]
cover = prs.slides.add_slide(cover_layout)
cover.shapes.title.text = "MediBill-Env"
sub = cover.placeholders[1]
sub.text = "Silent policy drift in Indian health-insurance claims\nMeta Γ Scaler OpenEnv Hackathon β Round 2"
add_notes(cover, "Hi, I'm Anuj. MediBill-Env is an OpenEnv environment for testing whether an LLM agent can detect and recover from silent policy drift in medical claims billing.")
# ---- Slide 1: The regulatory clock ---------------------------------
s1 = prs.slides.add_slide(title_content)
add_title(s1, "180 minutes to close the claim.")
add_bullets(s1, [
"IRDAI mandate (May 2024): 1 hour pre-auth, 3 hours discharge",
"Miss the 3-hour clock β insurer eats the cost from shareholder funds",
"FY24: ~βΉ26,000 cr health-claim disallowed",
"~13% of pre-auths still miss the window",
])
add_notes(s1, (
"In India, IRDAI gives hospitals one hour for pre-authorization and three hours for final "
"discharge on every cashless claim. Miss the three-hour clock, and the overrun comes out of "
"the insurer's shareholder fund. Industry estimates put FY24 disallowed health-claim value "
"around twenty-six thousand crore rupees. Roughly thirteen percent of pre-auths still miss "
"the one-hour window. The bottleneck is a human coder racing a clock, and the policies keep "
"changing on them."
))
# ---- Slide 2: Problem is staleness ----------------------------------
s2 = prs.slides.add_slide(title_content)
add_title(s2, "Why agents fail here")
add_bullets(s2, [
"Rules engines handle static schema validation",
"They do not handle staleness β yesterday's correct rule, today wrong",
"Agents that imitate one month's trajectories fail quietly the next month",
"We need an agent that knows to re-check before submitting",
])
add_notes(s2, (
"Most agent benchmarks check whether the agent can fill a form correctly. That is schema "
"validation, and rules engines already do it. The real failure mode in this domain is "
"staleness β the policy changed, the agent did not notice, the claim is wrong. An agent "
"that learned by imitating last month's expert trajectories will reproduce last month's "
"rules. We want an agent that knows to re-check before submitting."
))
# ---- Slide 3: The environment ---------------------------------------
s3 = prs.slides.add_slide(blank)
add_title(s3, "MediBill-Env: 5 tools, 3 task tiers, 6-axis grader")
# Tools table
add_table(
s3,
left=Inches(0.6), top=Inches(1.4), width=Inches(6.5), height=Inches(3.5),
headers=["Tool", "Purpose"],
rows=[
["ehr_query", "Read patient record"],
["insurance_lookup", "Fetch active policy rules"],
["coding_engine", "Write a policy-sensitive field"],
["escalate_to_human", "Calibrated abstention"],
["submit_claim", "Lock claim for grading"],
],
)
# Tasks table
add_table(
s3,
left=Inches(7.4), top=Inches(1.4), width=Inches(5.4), height=Inches(2.5),
headers=["Task tier", "Drift?"],
rows=[
["easy_cashless", "no"],
["medium_multi_payer", "no"],
["hard_drift", "yes β silent, mid-episode"],
],
)
# Footer
txt = s3.shapes.add_textbox(Inches(0.6), Inches(5.5), Inches(12.0), Inches(1.0))
tf = txt.text_frame
tf.word_wrap = True
p = tf.paragraphs[0]
p.text = "6-axis deterministic grader Β· disjoint identity/policy partition asserted at import Β· 5 reward-hacking attacks neutralised"
for r in p.runs:
r.font.size = Pt(18)
r.font.italic = True
r.font.color.rgb = INK
add_notes(s3, (
"The agent has five tools: query the patient record, look up the insurer's active policy, "
"write fields, escalate when uncertain, and submit. Three task tiers β easy, medium, and "
"hard, where the policy mutates mid-episode. The grader has six axes with a disjoint field "
"partition asserted at import time, so identity correctness and policy compliance never "
"overlap."
))
# ---- Slide 4: The hero mechanic -------------------------------------
s4 = prs.slides.add_slide(title_content)
add_title(s4, "Silent multi-field policy drift")
add_bullets(s4, [
"Active policy mutates 3β7 fields at a seed-randomized step",
"No announcement β no observation flag, no metadata key, no event",
"submit_claim is graded against the policy at submit time",
"Only path to new rules: a fresh insurance_lookup after the drift step",
"12 claim types Γ 3 tiers Γ randomized drift = ~12k+ unique trajectories",
"Scripted baseline: 1.00 on easy, 0.7611 on drift β the 0.24 gap is the signal",
])
add_notes(s4, (
"On hard_drift tasks the active policy mutates mid-episode across three to seven fields β "
"pre-auth thresholds, required signatures, narrative requirements, discharge attachment "
"rules. Multi-field mutation, not a boolean. No announcement, no flag, no event. The only "
"path to the new rules is a fresh insurance_lookup after the unknown drift step. "
"Submissions are graded against the policy at submit time. Twelve claim types, three "
"tiers, seed-randomized drift = over twelve thousand unique trajectories. Scripted "
"baseline drops from one-zero on easy to zero-seven-six on drift. That zero-two-four gap "
"is the trainable signal."
))
# ---- Slide 5: HEADLINE β measurements -------------------------------
s5 = prs.slides.add_slide(blank)
add_title(s5, "Base 0.00 β SFT v2 0.9999 avg. Teacher engineering broke through GRPO saturation.")
# Hero table β Base β SFT v2
add_table(
s5,
left=Inches(0.5), top=Inches(1.3), width=Inches(6.3), height=Inches(2.6),
headers=["task", "base Qwen", "SFT v2", "lift"],
rows=[
["easy_cashless", "0.0000", "1.0000", "+1.000"],
["medium_multi_payer", "0.0000", "1.0000", "+1.000"],
["hard_drift", "0.0000", "0.9996 Β± 0.0008", "+0.9996"],
["AVERAGE", "0.0000", "0.9999", "+0.9999"],
],
hero_cells={(3, 2), (3, 3), (4, 1), (4, 2), (4, 3)},
)
# Iteration table
add_table(
s5,
left=Inches(7.0), top=Inches(1.3), width=Inches(6.0), height=Inches(2.6),
headers=["checkpoint", "hard_drift", "what changed"],
rows=[
["Base Qwen 2.5 3B", "0.0000", "untrained"],
["SFT v1", "0.7573", "scripted teacher (parity)"],
["GRPO over SFT v1", "0.7575 (ΞΒ±0.0002)", "rewards saturated β calibration"],
["SFT v2", "0.9996", "drift-aware teacher"],
],
hero_cells={(4, 1), (4, 2)},
)
# Footer bullets
foot = s5.shapes.add_textbox(Inches(0.5), Inches(4.5), Inches(12.5), Inches(2.5))
tf = foot.text_frame
tf.word_wrap = True
p1 = tf.paragraphs[0]
p1.text = "β’ 5 exploit patterns explicitly neutralised β all five score β€ no_op"
p2 = tf.add_paragraph()
p2.text = "β’ Pivot was teacher engineering, not RL β +0.2423 lift on hard_drift in 90 trajectories + 33 min retraining"
p3 = tf.add_paragraph()
p3.text = "β’ Verified via Codex reproducibility protocol: sha256 byte-match of adapter weights + fresh-subprocess re-eval Γ 2"
for p in (p1, p2, p3):
for r in p.runs:
r.font.size = Pt(18)
r.font.color.rgb = INK
add_notes(s5, (
"Six bars on hard_drift, left to right: base Qwen at zero, random at eleven, no-op at "
"eight, scripted at seventy-six, SFT v1 at seventy-six, our final SFT v2 at "
"zero-point-nine-nine-nine-six. Untrained, the 3B model scores literal zero β zero parse "
"failures across fifteen episodes β it can format JSON, it just has no policy reasoning. "
"SFT v1 hit scripted-teacher parity. Then GRPO with five reward functions saturated β "
"delta two ten-thousandths, gradient ten-to-minus-seven. Diagnosis: SFT extracts "
"everything the rewards can grip on. So we engineered a stronger teacher β Scripted plus "
"plus, which escalates ambiguous cells and does a fresh insurance lookup before each "
"submit. Ninety new trajectories, thirty-three minutes of retraining. SFT v2: one-zero-zero "
"on easy and medium, zero-point-nine-nine-nine-six on hard. Average lift base to SFT v2: "
"zero-point-nine-nine-nine-nine."
))
# ---- Slide 6: Scope + close -----------------------------------------
s6 = prs.slides.add_slide(title_content)
add_title(s6, "Environment-first submission under Theme 3.1")
add_bullets(s6, [
"Shipping today: env + grader + 5-attack exploit suite + scripted baseline + SFT v2 adapter (0.9999 avg)",
"Two of six axes β abstention_quality and drift_bonus β are RL-only targets (spec v3 Β§7.6)",
"Code enforces every claim: disjoint partition asserted at import, 5 exploit tests, prompt-version handshake",
"Theme 3.1 β DataOps Copilot. Enterprise reasoning under shifting business rules.",
"Repo: github.com/Algoace1403/METAHackthon2026",
"HF Space (LIVE): huggingface.co/spaces/Anuj424614/medibill-env",
])
add_notes(s6, (
"We submit under Theme 3.1, DataOps Copilot. Shipping today: the environment, six-axis "
"deterministic grader, silent drift mechanic, five-attack exploit suite, scripted "
"baseline, and a trained SFT v2 adapter that hits zero-point-nine-nine-nine-nine average "
"across all three difficulty tiers β table on slide five. Two axes β abstention and "
"drift_bonus β are RL-only by design. Disjoint partition at import, five exploit tests, "
"prompt-version handshake. Repo and live HF Space on screen. Thank you."
))
OUT.parent.mkdir(parents=True, exist_ok=True)
prs.save(OUT)
print(f"Saved deck to: {OUT}")
print(f"Slide count: {len(prs.slides)}")
if __name__ == "__main__":
main()
|