Spaces:

Anuj424614
/

medibill

Sleeping

App Files Files Community

medibill / scripts /build_deck.py

Anuj424614

Upload folder using huggingface_hub

a09b1f5 verified about 1 month ago

raw

history blame contribute delete

13.3 kB

	"""Build the MediBill-Env pitch deck as a .pptx file (opens in Keynote).

	Run:
	python3 scripts/build_deck.py

	Output:
	docs/medibill_pitch.pptx
	"""

	from pathlib import Path

	from pptx import Presentation
	from pptx.dml.color import RGBColor
	from pptx.enum.shapes import MSO_SHAPE
	from pptx.enum.text import PP_ALIGN
	from pptx.util import Inches, Pt


	OUT = Path(__file__).resolve().parent.parent / "docs" / "medibill_pitch.pptx"

	# Brand-ish palette (kept simple — dark ink on white)
	INK = RGBColor(0x10, 0x10, 0x10)
	ACCENT = RGBColor(0x0E, 0x6B, 0xA8) # blue
	HIGHLIGHT = RGBColor(0x0A, 0x84, 0x3D) # green for hero numbers
	RULE = RGBColor(0xCC, 0xCC, 0xCC)


	def add_title(slide, text):
	title = slide.shapes.title
	title.text = text
	for para in title.text_frame.paragraphs:
	para.alignment = PP_ALIGN.LEFT
	for run in para.runs:
	run.font.size = Pt(40)
	run.font.bold = True
	run.font.color.rgb = INK


	def add_bullets(slide, bullets):
	body = slide.placeholders[1]
	tf = body.text_frame
	tf.word_wrap = True
	for i, b in enumerate(bullets):
	p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
	p.text = b
	p.level = 0
	for run in p.runs:
	run.font.size = Pt(22)
	run.font.color.rgb = INK


	def add_notes(slide, notes):
	nf = slide.notes_slide.notes_text_frame
	nf.text = notes


	def add_table(slide, left, top, width, height, headers, rows, *, hero_cells=None):
	rows_n = 1 + len(rows)
	cols_n = len(headers)
	tbl_shape = slide.shapes.add_table(rows_n, cols_n, left, top, width, height)
	tbl = tbl_shape.table

	# Header row
	for j, h in enumerate(headers):
	cell = tbl.cell(0, j)
	cell.text = h
	for p in cell.text_frame.paragraphs:
	for r in p.runs:
	r.font.bold = True
	r.font.size = Pt(16)
	r.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
	cell.fill.solid()
	cell.fill.fore_color.rgb = ACCENT

	# Body rows
	hero_cells = hero_cells or set()
	for i, row in enumerate(rows, start=1):
	for j, val in enumerate(row):
	cell = tbl.cell(i, j)
	cell.text = str(val)
	for p in cell.text_frame.paragraphs:
	for r in p.runs:
	r.font.size = Pt(14)
	r.font.color.rgb = INK
	if (i, j) in hero_cells:
	r.font.bold = True
	r.font.color.rgb = HIGHLIGHT


	def main() -> None:
	prs = Presentation()
	prs.slide_width = Inches(13.333)
	prs.slide_height = Inches(7.5)

	title_content = prs.slide_layouts[1] # Title + content
	blank = prs.slide_layouts[5] # Title only (we'll add tables)

	# ---- Slide 0: Cover -------------------------------------------------
	cover_layout = prs.slide_layouts[0]
	cover = prs.slides.add_slide(cover_layout)
	cover.shapes.title.text = "MediBill-Env"
	sub = cover.placeholders[1]
	sub.text = "Silent policy drift in Indian health-insurance claims\nMeta × Scaler OpenEnv Hackathon — Round 2"
	add_notes(cover, "Hi, I'm Anuj. MediBill-Env is an OpenEnv environment for testing whether an LLM agent can detect and recover from silent policy drift in medical claims billing.")

	# ---- Slide 1: The regulatory clock ---------------------------------
	s1 = prs.slides.add_slide(title_content)
	add_title(s1, "180 minutes to close the claim.")
	add_bullets(s1, [
	"IRDAI mandate (May 2024): 1 hour pre-auth, 3 hours discharge",
	"Miss the 3-hour clock → insurer eats the cost from shareholder funds",
	"FY24: ~₹26,000 cr health-claim disallowed",
	"~13% of pre-auths still miss the window",
	])
	add_notes(s1, (
	"In India, IRDAI gives hospitals one hour for pre-authorization and three hours for final "
	"discharge on every cashless claim. Miss the three-hour clock, and the overrun comes out of "
	"the insurer's shareholder fund. Industry estimates put FY24 disallowed health-claim value "
	"around twenty-six thousand crore rupees. Roughly thirteen percent of pre-auths still miss "
	"the one-hour window. The bottleneck is a human coder racing a clock, and the policies keep "
	"changing on them."
	))

	# ---- Slide 2: Problem is staleness ----------------------------------
	s2 = prs.slides.add_slide(title_content)
	add_title(s2, "Why agents fail here")
	add_bullets(s2, [
	"Rules engines handle static schema validation",
	"They do not handle staleness — yesterday's correct rule, today wrong",
	"Agents that imitate one month's trajectories fail quietly the next month",
	"We need an agent that knows to re-check before submitting",
	])
	add_notes(s2, (
	"Most agent benchmarks check whether the agent can fill a form correctly. That is schema "
	"validation, and rules engines already do it. The real failure mode in this domain is "
	"staleness — the policy changed, the agent did not notice, the claim is wrong. An agent "
	"that learned by imitating last month's expert trajectories will reproduce last month's "
	"rules. We want an agent that knows to re-check before submitting."
	))

	# ---- Slide 3: The environment ---------------------------------------
	s3 = prs.slides.add_slide(blank)
	add_title(s3, "MediBill-Env: 5 tools, 3 task tiers, 6-axis grader")
	# Tools table
	add_table(
	s3,
	left=Inches(0.6), top=Inches(1.4), width=Inches(6.5), height=Inches(3.5),
	headers=["Tool", "Purpose"],
	rows=[
	["ehr_query", "Read patient record"],
	["insurance_lookup", "Fetch active policy rules"],
	["coding_engine", "Write a policy-sensitive field"],
	["escalate_to_human", "Calibrated abstention"],
	["submit_claim", "Lock claim for grading"],
	],
	)
	# Tasks table
	add_table(
	s3,
	left=Inches(7.4), top=Inches(1.4), width=Inches(5.4), height=Inches(2.5),
	headers=["Task tier", "Drift?"],
	rows=[
	["easy_cashless", "no"],
	["medium_multi_payer", "no"],
	["hard_drift", "yes — silent, mid-episode"],
	],
	)
	# Footer
	txt = s3.shapes.add_textbox(Inches(0.6), Inches(5.5), Inches(12.0), Inches(1.0))
	tf = txt.text_frame
	tf.word_wrap = True
	p = tf.paragraphs[0]
	p.text = "6-axis deterministic grader · disjoint identity/policy partition asserted at import · 5 reward-hacking attacks neutralised"
	for r in p.runs:
	r.font.size = Pt(18)
	r.font.italic = True
	r.font.color.rgb = INK
	add_notes(s3, (
	"The agent has five tools: query the patient record, look up the insurer's active policy, "
	"write fields, escalate when uncertain, and submit. Three task tiers — easy, medium, and "
	"hard, where the policy mutates mid-episode. The grader has six axes with a disjoint field "
	"partition asserted at import time, so identity correctness and policy compliance never "
	"overlap."
	))

	# ---- Slide 4: The hero mechanic -------------------------------------
	s4 = prs.slides.add_slide(title_content)
	add_title(s4, "Silent multi-field policy drift")
	add_bullets(s4, [
	"Active policy mutates 3–7 fields at a seed-randomized step",
	"No announcement — no observation flag, no metadata key, no event",
	"submit_claim is graded against the policy at submit time",
	"Only path to new rules: a fresh insurance_lookup after the drift step",
	"12 claim types × 3 tiers × randomized drift = ~12k+ unique trajectories",
	"Scripted baseline: 1.00 on easy, 0.7611 on drift — the 0.24 gap is the signal",
	])
	add_notes(s4, (
	"On hard_drift tasks the active policy mutates mid-episode across three to seven fields — "
	"pre-auth thresholds, required signatures, narrative requirements, discharge attachment "
	"rules. Multi-field mutation, not a boolean. No announcement, no flag, no event. The only "
	"path to the new rules is a fresh insurance_lookup after the unknown drift step. "
	"Submissions are graded against the policy at submit time. Twelve claim types, three "
	"tiers, seed-randomized drift = over twelve thousand unique trajectories. Scripted "
	"baseline drops from one-zero on easy to zero-seven-six on drift. That zero-two-four gap "
	"is the trainable signal."
	))

	# ---- Slide 5: HEADLINE — measurements -------------------------------
	s5 = prs.slides.add_slide(blank)
	add_title(s5, "Base 0.00 → SFT v2 0.9999 avg. Teacher engineering broke through GRPO saturation.")
	# Hero table — Base → SFT v2
	add_table(
	s5,
	left=Inches(0.5), top=Inches(1.3), width=Inches(6.3), height=Inches(2.6),
	headers=["task", "base Qwen", "SFT v2", "lift"],
	rows=[
	["easy_cashless", "0.0000", "1.0000", "+1.000"],
	["medium_multi_payer", "0.0000", "1.0000", "+1.000"],
	["hard_drift", "0.0000", "0.9996 ± 0.0008", "+0.9996"],
	["AVERAGE", "0.0000", "0.9999", "+0.9999"],
	],
	hero_cells={(3, 2), (3, 3), (4, 1), (4, 2), (4, 3)},
	)
	# Iteration table
	add_table(
	s5,
	left=Inches(7.0), top=Inches(1.3), width=Inches(6.0), height=Inches(2.6),
	headers=["checkpoint", "hard_drift", "what changed"],
	rows=[
	["Base Qwen 2.5 3B", "0.0000", "untrained"],
	["SFT v1", "0.7573", "scripted teacher (parity)"],
	["GRPO over SFT v1", "0.7575 (Δ±0.0002)", "rewards saturated — calibration"],
	["SFT v2", "0.9996", "drift-aware teacher"],
	],
	hero_cells={(4, 1), (4, 2)},
	)
	# Footer bullets
	foot = s5.shapes.add_textbox(Inches(0.5), Inches(4.5), Inches(12.5), Inches(2.5))
	tf = foot.text_frame
	tf.word_wrap = True
	p1 = tf.paragraphs[0]
	p1.text = "• 5 exploit patterns explicitly neutralised — all five score ≤ no_op"
	p2 = tf.add_paragraph()
	p2.text = "• Pivot was teacher engineering, not RL — +0.2423 lift on hard_drift in 90 trajectories + 33 min retraining"
	p3 = tf.add_paragraph()
	p3.text = "• Verified via Codex reproducibility protocol: sha256 byte-match of adapter weights + fresh-subprocess re-eval × 2"
	for p in (p1, p2, p3):
	for r in p.runs:
	r.font.size = Pt(18)
	r.font.color.rgb = INK
	add_notes(s5, (
	"Six bars on hard_drift, left to right: base Qwen at zero, random at eleven, no-op at "
	"eight, scripted at seventy-six, SFT v1 at seventy-six, our final SFT v2 at "
	"zero-point-nine-nine-nine-six. Untrained, the 3B model scores literal zero — zero parse "
	"failures across fifteen episodes — it can format JSON, it just has no policy reasoning. "
	"SFT v1 hit scripted-teacher parity. Then GRPO with five reward functions saturated — "
	"delta two ten-thousandths, gradient ten-to-minus-seven. Diagnosis: SFT extracts "
	"everything the rewards can grip on. So we engineered a stronger teacher — Scripted plus "
	"plus, which escalates ambiguous cells and does a fresh insurance lookup before each "
	"submit. Ninety new trajectories, thirty-three minutes of retraining. SFT v2: one-zero-zero "
	"on easy and medium, zero-point-nine-nine-nine-six on hard. Average lift base to SFT v2: "
	"zero-point-nine-nine-nine-nine."
	))

	# ---- Slide 6: Scope + close -----------------------------------------
	s6 = prs.slides.add_slide(title_content)
	add_title(s6, "Environment-first submission under Theme 3.1")
	add_bullets(s6, [
	"Shipping today: env + grader + 5-attack exploit suite + scripted baseline + SFT v2 adapter (0.9999 avg)",
	"Two of six axes — abstention_quality and drift_bonus — are RL-only targets (spec v3 §7.6)",
	"Code enforces every claim: disjoint partition asserted at import, 5 exploit tests, prompt-version handshake",
	"Theme 3.1 — DataOps Copilot. Enterprise reasoning under shifting business rules.",
	"Repo: github.com/Algoace1403/METAHackthon2026",
	"HF Space (LIVE): huggingface.co/spaces/Anuj424614/medibill-env",
	])
	add_notes(s6, (
	"We submit under Theme 3.1, DataOps Copilot. Shipping today: the environment, six-axis "
	"deterministic grader, silent drift mechanic, five-attack exploit suite, scripted "
	"baseline, and a trained SFT v2 adapter that hits zero-point-nine-nine-nine-nine average "
	"across all three difficulty tiers — table on slide five. Two axes — abstention and "
	"drift_bonus — are RL-only by design. Disjoint partition at import, five exploit tests, "
	"prompt-version handshake. Repo and live HF Space on screen. Thank you."
	))

	OUT.parent.mkdir(parents=True, exist_ok=True)
	prs.save(OUT)
	print(f"Saved deck to: {OUT}")
	print(f"Slide count: {len(prs.slides)}")


	if __name__ == "__main__":
	main()