File size: 13,287 Bytes
a09b1f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
"""Build the MediBill-Env pitch deck as a .pptx file (opens in Keynote).

Run:
    python3 scripts/build_deck.py

Output:
    docs/medibill_pitch.pptx
"""

from pathlib import Path

from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.enum.shapes import MSO_SHAPE
from pptx.enum.text import PP_ALIGN
from pptx.util import Inches, Pt


OUT = Path(__file__).resolve().parent.parent / "docs" / "medibill_pitch.pptx"

# Brand-ish palette (kept simple β€” dark ink on white)
INK = RGBColor(0x10, 0x10, 0x10)
ACCENT = RGBColor(0x0E, 0x6B, 0xA8)        # blue
HIGHLIGHT = RGBColor(0x0A, 0x84, 0x3D)     # green for hero numbers
RULE = RGBColor(0xCC, 0xCC, 0xCC)


def add_title(slide, text):
    title = slide.shapes.title
    title.text = text
    for para in title.text_frame.paragraphs:
        para.alignment = PP_ALIGN.LEFT
        for run in para.runs:
            run.font.size = Pt(40)
            run.font.bold = True
            run.font.color.rgb = INK


def add_bullets(slide, bullets):
    body = slide.placeholders[1]
    tf = body.text_frame
    tf.word_wrap = True
    for i, b in enumerate(bullets):
        p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
        p.text = b
        p.level = 0
        for run in p.runs:
            run.font.size = Pt(22)
            run.font.color.rgb = INK


def add_notes(slide, notes):
    nf = slide.notes_slide.notes_text_frame
    nf.text = notes


def add_table(slide, left, top, width, height, headers, rows, *, hero_cells=None):
    rows_n = 1 + len(rows)
    cols_n = len(headers)
    tbl_shape = slide.shapes.add_table(rows_n, cols_n, left, top, width, height)
    tbl = tbl_shape.table

    # Header row
    for j, h in enumerate(headers):
        cell = tbl.cell(0, j)
        cell.text = h
        for p in cell.text_frame.paragraphs:
            for r in p.runs:
                r.font.bold = True
                r.font.size = Pt(16)
                r.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
        cell.fill.solid()
        cell.fill.fore_color.rgb = ACCENT

    # Body rows
    hero_cells = hero_cells or set()
    for i, row in enumerate(rows, start=1):
        for j, val in enumerate(row):
            cell = tbl.cell(i, j)
            cell.text = str(val)
            for p in cell.text_frame.paragraphs:
                for r in p.runs:
                    r.font.size = Pt(14)
                    r.font.color.rgb = INK
                    if (i, j) in hero_cells:
                        r.font.bold = True
                        r.font.color.rgb = HIGHLIGHT


def main() -> None:
    prs = Presentation()
    prs.slide_width = Inches(13.333)
    prs.slide_height = Inches(7.5)

    title_content = prs.slide_layouts[1]   # Title + content
    blank = prs.slide_layouts[5]           # Title only (we'll add tables)

    # ---- Slide 0: Cover -------------------------------------------------
    cover_layout = prs.slide_layouts[0]
    cover = prs.slides.add_slide(cover_layout)
    cover.shapes.title.text = "MediBill-Env"
    sub = cover.placeholders[1]
    sub.text = "Silent policy drift in Indian health-insurance claims\nMeta Γ— Scaler OpenEnv Hackathon β€” Round 2"
    add_notes(cover, "Hi, I'm Anuj. MediBill-Env is an OpenEnv environment for testing whether an LLM agent can detect and recover from silent policy drift in medical claims billing.")

    # ---- Slide 1: The regulatory clock ---------------------------------
    s1 = prs.slides.add_slide(title_content)
    add_title(s1, "180 minutes to close the claim.")
    add_bullets(s1, [
        "IRDAI mandate (May 2024): 1 hour pre-auth, 3 hours discharge",
        "Miss the 3-hour clock β†’ insurer eats the cost from shareholder funds",
        "FY24: ~β‚Ή26,000 cr health-claim disallowed",
        "~13% of pre-auths still miss the window",
    ])
    add_notes(s1, (
        "In India, IRDAI gives hospitals one hour for pre-authorization and three hours for final "
        "discharge on every cashless claim. Miss the three-hour clock, and the overrun comes out of "
        "the insurer's shareholder fund. Industry estimates put FY24 disallowed health-claim value "
        "around twenty-six thousand crore rupees. Roughly thirteen percent of pre-auths still miss "
        "the one-hour window. The bottleneck is a human coder racing a clock, and the policies keep "
        "changing on them."
    ))

    # ---- Slide 2: Problem is staleness ----------------------------------
    s2 = prs.slides.add_slide(title_content)
    add_title(s2, "Why agents fail here")
    add_bullets(s2, [
        "Rules engines handle static schema validation",
        "They do not handle staleness β€” yesterday's correct rule, today wrong",
        "Agents that imitate one month's trajectories fail quietly the next month",
        "We need an agent that knows to re-check before submitting",
    ])
    add_notes(s2, (
        "Most agent benchmarks check whether the agent can fill a form correctly. That is schema "
        "validation, and rules engines already do it. The real failure mode in this domain is "
        "staleness β€” the policy changed, the agent did not notice, the claim is wrong. An agent "
        "that learned by imitating last month's expert trajectories will reproduce last month's "
        "rules. We want an agent that knows to re-check before submitting."
    ))

    # ---- Slide 3: The environment ---------------------------------------
    s3 = prs.slides.add_slide(blank)
    add_title(s3, "MediBill-Env: 5 tools, 3 task tiers, 6-axis grader")
    # Tools table
    add_table(
        s3,
        left=Inches(0.6), top=Inches(1.4), width=Inches(6.5), height=Inches(3.5),
        headers=["Tool", "Purpose"],
        rows=[
            ["ehr_query", "Read patient record"],
            ["insurance_lookup", "Fetch active policy rules"],
            ["coding_engine", "Write a policy-sensitive field"],
            ["escalate_to_human", "Calibrated abstention"],
            ["submit_claim", "Lock claim for grading"],
        ],
    )
    # Tasks table
    add_table(
        s3,
        left=Inches(7.4), top=Inches(1.4), width=Inches(5.4), height=Inches(2.5),
        headers=["Task tier", "Drift?"],
        rows=[
            ["easy_cashless", "no"],
            ["medium_multi_payer", "no"],
            ["hard_drift", "yes β€” silent, mid-episode"],
        ],
    )
    # Footer
    txt = s3.shapes.add_textbox(Inches(0.6), Inches(5.5), Inches(12.0), Inches(1.0))
    tf = txt.text_frame
    tf.word_wrap = True
    p = tf.paragraphs[0]
    p.text = "6-axis deterministic grader Β· disjoint identity/policy partition asserted at import Β· 5 reward-hacking attacks neutralised"
    for r in p.runs:
        r.font.size = Pt(18)
        r.font.italic = True
        r.font.color.rgb = INK
    add_notes(s3, (
        "The agent has five tools: query the patient record, look up the insurer's active policy, "
        "write fields, escalate when uncertain, and submit. Three task tiers β€” easy, medium, and "
        "hard, where the policy mutates mid-episode. The grader has six axes with a disjoint field "
        "partition asserted at import time, so identity correctness and policy compliance never "
        "overlap."
    ))

    # ---- Slide 4: The hero mechanic -------------------------------------
    s4 = prs.slides.add_slide(title_content)
    add_title(s4, "Silent multi-field policy drift")
    add_bullets(s4, [
        "Active policy mutates 3–7 fields at a seed-randomized step",
        "No announcement β€” no observation flag, no metadata key, no event",
        "submit_claim is graded against the policy at submit time",
        "Only path to new rules: a fresh insurance_lookup after the drift step",
        "12 claim types Γ— 3 tiers Γ— randomized drift = ~12k+ unique trajectories",
        "Scripted baseline: 1.00 on easy, 0.7611 on drift β€” the 0.24 gap is the signal",
    ])
    add_notes(s4, (
        "On hard_drift tasks the active policy mutates mid-episode across three to seven fields β€” "
        "pre-auth thresholds, required signatures, narrative requirements, discharge attachment "
        "rules. Multi-field mutation, not a boolean. No announcement, no flag, no event. The only "
        "path to the new rules is a fresh insurance_lookup after the unknown drift step. "
        "Submissions are graded against the policy at submit time. Twelve claim types, three "
        "tiers, seed-randomized drift = over twelve thousand unique trajectories. Scripted "
        "baseline drops from one-zero on easy to zero-seven-six on drift. That zero-two-four gap "
        "is the trainable signal."
    ))

    # ---- Slide 5: HEADLINE β€” measurements -------------------------------
    s5 = prs.slides.add_slide(blank)
    add_title(s5, "Base 0.00 β†’ SFT v2 0.9999 avg. Teacher engineering broke through GRPO saturation.")
    # Hero table β€” Base β†’ SFT v2
    add_table(
        s5,
        left=Inches(0.5), top=Inches(1.3), width=Inches(6.3), height=Inches(2.6),
        headers=["task", "base Qwen", "SFT v2", "lift"],
        rows=[
            ["easy_cashless", "0.0000", "1.0000", "+1.000"],
            ["medium_multi_payer", "0.0000", "1.0000", "+1.000"],
            ["hard_drift", "0.0000", "0.9996 Β± 0.0008", "+0.9996"],
            ["AVERAGE", "0.0000", "0.9999", "+0.9999"],
        ],
        hero_cells={(3, 2), (3, 3), (4, 1), (4, 2), (4, 3)},
    )
    # Iteration table
    add_table(
        s5,
        left=Inches(7.0), top=Inches(1.3), width=Inches(6.0), height=Inches(2.6),
        headers=["checkpoint", "hard_drift", "what changed"],
        rows=[
            ["Base Qwen 2.5 3B", "0.0000", "untrained"],
            ["SFT v1", "0.7573", "scripted teacher (parity)"],
            ["GRPO over SFT v1", "0.7575 (Δ±0.0002)", "rewards saturated β€” calibration"],
            ["SFT v2", "0.9996", "drift-aware teacher"],
        ],
        hero_cells={(4, 1), (4, 2)},
    )
    # Footer bullets
    foot = s5.shapes.add_textbox(Inches(0.5), Inches(4.5), Inches(12.5), Inches(2.5))
    tf = foot.text_frame
    tf.word_wrap = True
    p1 = tf.paragraphs[0]
    p1.text = "β€’ 5 exploit patterns explicitly neutralised β€” all five score ≀ no_op"
    p2 = tf.add_paragraph()
    p2.text = "β€’ Pivot was teacher engineering, not RL β€” +0.2423 lift on hard_drift in 90 trajectories + 33 min retraining"
    p3 = tf.add_paragraph()
    p3.text = "β€’ Verified via Codex reproducibility protocol: sha256 byte-match of adapter weights + fresh-subprocess re-eval Γ— 2"
    for p in (p1, p2, p3):
        for r in p.runs:
            r.font.size = Pt(18)
            r.font.color.rgb = INK
    add_notes(s5, (
        "Six bars on hard_drift, left to right: base Qwen at zero, random at eleven, no-op at "
        "eight, scripted at seventy-six, SFT v1 at seventy-six, our final SFT v2 at "
        "zero-point-nine-nine-nine-six. Untrained, the 3B model scores literal zero β€” zero parse "
        "failures across fifteen episodes β€” it can format JSON, it just has no policy reasoning. "
        "SFT v1 hit scripted-teacher parity. Then GRPO with five reward functions saturated β€” "
        "delta two ten-thousandths, gradient ten-to-minus-seven. Diagnosis: SFT extracts "
        "everything the rewards can grip on. So we engineered a stronger teacher β€” Scripted plus "
        "plus, which escalates ambiguous cells and does a fresh insurance lookup before each "
        "submit. Ninety new trajectories, thirty-three minutes of retraining. SFT v2: one-zero-zero "
        "on easy and medium, zero-point-nine-nine-nine-six on hard. Average lift base to SFT v2: "
        "zero-point-nine-nine-nine-nine."
    ))

    # ---- Slide 6: Scope + close -----------------------------------------
    s6 = prs.slides.add_slide(title_content)
    add_title(s6, "Environment-first submission under Theme 3.1")
    add_bullets(s6, [
        "Shipping today: env + grader + 5-attack exploit suite + scripted baseline + SFT v2 adapter (0.9999 avg)",
        "Two of six axes β€” abstention_quality and drift_bonus β€” are RL-only targets (spec v3 Β§7.6)",
        "Code enforces every claim: disjoint partition asserted at import, 5 exploit tests, prompt-version handshake",
        "Theme 3.1 β€” DataOps Copilot. Enterprise reasoning under shifting business rules.",
        "Repo: github.com/Algoace1403/METAHackthon2026",
        "HF Space (LIVE): huggingface.co/spaces/Anuj424614/medibill-env",
    ])
    add_notes(s6, (
        "We submit under Theme 3.1, DataOps Copilot. Shipping today: the environment, six-axis "
        "deterministic grader, silent drift mechanic, five-attack exploit suite, scripted "
        "baseline, and a trained SFT v2 adapter that hits zero-point-nine-nine-nine-nine average "
        "across all three difficulty tiers β€” table on slide five. Two axes β€” abstention and "
        "drift_bonus β€” are RL-only by design. Disjoint partition at import, five exploit tests, "
        "prompt-version handshake. Repo and live HF Space on screen. Thank you."
    ))

    OUT.parent.mkdir(parents=True, exist_ok=True)
    prs.save(OUT)
    print(f"Saved deck to: {OUT}")
    print(f"Slide count: {len(prs.slides)}")


if __name__ == "__main__":
    main()