fineprint-env / server /static /index.html
vigneshmoovendhan's picture
ui refined
916c16e
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>FinePrint — Policy Drift Detection RL Environment</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&family=JetBrains+Mono:wght@400;500;600&display=swap" rel="stylesheet">
<style>
:root{
--bg:#fafbfc;--card:#fff;--border:#e6e9ef;--border-h:#cdd3dc;
--text:#0d1117;--text2:#57606a;--text3:#8b949e;
--accent:#1a56db;--accent-l:#dbeafe;--accent-d:#1e40af;
--green:#059669;--green-bg:#ecfdf5;--green-b:#a7f3d0;
--amber:#d97706;--amber-bg:#fffbeb;--amber-b:#fde68a;
--red:#dc2626;--red-bg:#fef2f2;--red-b:#fecaca;
--mono:'JetBrains Mono',ui-monospace,monospace;
--sans:'Inter',-apple-system,BlinkMacSystemFont,sans-serif;
--r:14px;--rs:10px;
--sh:0 1px 3px rgba(0,0,0,.05),0 1px 2px rgba(0,0,0,.03);
--sh-md:0 4px 12px rgba(0,0,0,.06);
--sh-lg:0 12px 32px rgba(0,0,0,.08);
--t:180ms ease;
}
*{margin:0;padding:0;box-sizing:border-box}
html{scroll-behavior:smooth}
body{font-family:var(--sans);background:var(--bg);color:var(--text);line-height:1.6;-webkit-font-smoothing:antialiased}
a{color:var(--accent);text-decoration:none}
a:hover{text-decoration:underline}
code{font-family:var(--mono);font-size:.82em;background:#f0f3f6;padding:2px 6px;border-radius:5px}
img{max-width:100%}
/* NAV */
nav{position:sticky;top:0;z-index:100;background:rgba(250,251,252,.88);backdrop-filter:blur(16px);border-bottom:1px solid var(--border)}
.nav-inner{max-width:1140px;margin:0 auto;padding:0 24px;height:52px;display:flex;align-items:center}
.brand{font-weight:900;font-size:1.15rem;color:var(--text);letter-spacing:-.02em}
.nav-r{margin-left:auto;display:flex;align-items:center;gap:4px}
.nav-r a{font-size:12.5px;font-weight:500;color:var(--text2);padding:5px 10px;border-radius:6px;transition:all var(--t);text-decoration:none}
.nav-r a:hover{background:#f0f3f6;color:var(--text)}
/* HERO */
.hero{max-width:1140px;margin:0 auto;padding:64px 24px 0;display:grid;grid-template-columns:1fr 1fr;gap:40px;align-items:center}
@media(max-width:860px){.hero{grid-template-columns:1fr;gap:24px}}
.hero-left{}
.hero-kicker{display:inline-flex;align-items:center;gap:6px;font-size:12px;font-weight:600;color:var(--accent);background:var(--accent-l);padding:5px 14px;border-radius:20px;margin-bottom:20px;letter-spacing:.2px}
.hero h1{font-size:clamp(2rem,5vw,3.2rem);font-weight:900;letter-spacing:-.035em;line-height:1.1;max-width:720px}
.hero h1 em{font-style:normal;background:linear-gradient(135deg,#1a56db,#3b82f6);-webkit-background-clip:text;-webkit-text-fill-color:transparent}
.hero-sub{margin-top:16px;font-size:1.1rem;color:var(--text2);max-width:600px;line-height:1.7}
.hero-quote{margin-top:28px;border-left:3px solid var(--red);padding:12px 20px;background:var(--red-bg);border-radius:0 var(--rs) var(--rs) 0;max-width:580px}
.hero-quote p{font-size:.92rem;color:var(--red);font-style:italic;line-height:1.55}
.hero-quote cite{display:block;font-size:.78rem;color:var(--text3);font-style:normal;margin-top:4px}
.hero-cta{margin-top:28px;display:flex;gap:10px;flex-wrap:wrap}
/* HERO TERMINAL */
.hero-terminal{background:#0d1117;border-radius:12px;overflow:hidden;box-shadow:0 20px 60px rgba(0,0,0,.18);border:1px solid #30363d;position:relative}
.hero-terminal::before{content:'';position:absolute;top:0;left:0;right:0;height:1px;background:linear-gradient(90deg,transparent,rgba(88,166,255,.3),transparent)}
.term-bar{height:36px;background:#161b22;display:flex;align-items:center;gap:6px;padding:0 14px;border-bottom:1px solid #21262d}
.term-dot{width:10px;height:10px;border-radius:50%}
.term-dot.r{background:#ff5f57}.term-dot.y{background:#febc2e}.term-dot.g{background:#28c840}
.term-title{margin-left:8px;font-family:var(--mono);font-size:11px;color:#484f58}
.term-body{padding:16px;font-family:var(--mono);font-size:12.5px;line-height:2;color:#c9d1d9;min-height:280px}
.term-body .t-prompt{color:#8b949e}
.term-body .t-cmd{color:#79c0ff}
.term-body .t-ok{color:#7ee787}
.term-body .t-warn{color:#d29922}
.term-body .t-err{color:#ff7b72}
.term-body .t-dim{color:#484f58}
.term-body .t-val{color:#a5d6ff}
.term-body .t-rew{color:#d2a8ff}
.term-line{opacity:0;animation:termReveal .3s forwards}
@keyframes termReveal{to{opacity:1}}
.cursor-blink{display:inline-block;width:8px;height:15px;background:#58a6ff;animation:curBlink 1s step-end infinite;vertical-align:middle;margin-left:2px}
@keyframes curBlink{0%,100%{opacity:1}50%{opacity:0}}
.btn{display:inline-flex;align-items:center;gap:6px;font-family:var(--sans);font-size:13px;font-weight:600;padding:10px 22px;border-radius:8px;border:none;cursor:pointer;transition:all var(--t);text-decoration:none}
.btn:hover{text-decoration:none;transform:translateY(-1px)}
.btn-p{background:var(--text);color:#fff;box-shadow:var(--sh)}
.btn-p:hover{background:#333;box-shadow:var(--sh-md)}
.btn-s{background:var(--card);color:var(--text);border:1px solid var(--border);box-shadow:var(--sh)}
.btn-s:hover{border-color:var(--border-h);background:#f8f9fb}
.btn-ghost{background:transparent;color:var(--accent);padding:10px 14px}
.btn-ghost:hover{background:var(--accent-l)}
/* PROBLEM SECTION */
.problem{max-width:1140px;margin:0 auto;padding:72px 24px 0}
.problem-grid{display:grid;grid-template-columns:repeat(3,1fr);gap:16px;margin-top:20px}
@media(max-width:768px){.problem-grid{grid-template-columns:1fr}}
.problem-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:24px;box-shadow:var(--sh);transition:all var(--t)}
.problem-card:hover{box-shadow:var(--sh-md);transform:translateY(-2px)}
.pc-icon{width:40px;height:40px;border-radius:10px;display:flex;align-items:center;justify-content:center;font-size:18px;margin-bottom:14px}
.pc-icon-red{background:var(--red-bg);color:var(--red)}
.pc-icon-amber{background:var(--amber-bg);color:var(--amber)}
.pc-icon-blue{background:var(--accent-l);color:var(--accent)}
.problem-card h3{font-size:.92rem;font-weight:700;margin-bottom:6px}
.problem-card p{font-size:.84rem;color:var(--text2);line-height:1.55}
.problem-stat{margin-top:12px;font-family:var(--mono);font-size:1.4rem;font-weight:700}
.problem-stat.red{color:var(--red)}
.problem-stat.amber{color:var(--amber)}
.problem-stat.blue{color:var(--accent)}
/* SECTION COMMON */
.sec{max-width:1140px;margin:0 auto;padding:64px 24px 0}
.sec-label{font-size:11px;font-weight:700;color:var(--text3);text-transform:uppercase;letter-spacing:.12em;margin-bottom:6px}
.sec-title{font-size:1.6rem;font-weight:800;letter-spacing:-.025em;margin-bottom:6px}
.sec-desc{font-size:.92rem;color:var(--text2);max-width:540px;margin-bottom:20px}
/* HOW IT WORKS - TIMELINE */
.timeline{position:relative;padding-left:32px}
.timeline::before{content:'';position:absolute;left:11px;top:8px;bottom:8px;width:2px;background:var(--border)}
.tl-step{position:relative;padding-bottom:28px}
.tl-step:last-child{padding-bottom:0}
.tl-dot{position:absolute;left:-32px;top:2px;width:22px;height:22px;border-radius:50%;border:2px solid var(--border);background:var(--card);display:flex;align-items:center;justify-content:center;font-size:10px;font-weight:700;color:var(--text2)}
.tl-step.active .tl-dot{border-color:var(--accent);background:var(--accent);color:#fff}
.tl-step.danger .tl-dot{border-color:var(--red);background:var(--red);color:#fff}
.tl-content h4{font-size:.88rem;font-weight:700;margin-bottom:3px}
.tl-content p{font-size:.82rem;color:var(--text2);line-height:1.5}
.tl-code{margin-top:8px;background:#0d1117;border-radius:var(--rs);padding:12px 16px;font-family:var(--mono);font-size:.76rem;color:#c9d1d9;line-height:1.6;overflow-x:auto}
.tl-code .kw{color:#ff7b72}.tl-code .str{color:#a5d6ff}.tl-code .cm{color:#8b949e}.tl-code .fn{color:#d2a8ff}.tl-code .num{color:#79c0ff}
/* TASKS */
.tasks-grid{display:grid;grid-template-columns:repeat(3,1fr);gap:14px}
@media(max-width:768px){.tasks-grid{grid-template-columns:1fr}}
.task-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);overflow:hidden;box-shadow:var(--sh);transition:all var(--t)}
.task-card:hover{box-shadow:var(--sh-md);transform:translateY(-2px)}
.task-bar{height:3px}
.task-bar.easy{background:var(--green)}.task-bar.med{background:var(--amber)}.task-bar.hard{background:var(--red)}
.task-body{padding:20px}
.task-top{display:flex;justify-content:space-between;align-items:center;margin-bottom:10px}
.task-name{font-family:var(--mono);font-weight:600;font-size:.88rem}
.badge{font-size:10px;font-weight:700;padding:3px 9px;border-radius:4px;text-transform:uppercase;letter-spacing:.4px}
.b-easy{color:var(--green);background:var(--green-bg)}.b-med{color:var(--amber);background:var(--amber-bg)}.b-hard{color:var(--red);background:var(--red-bg)}
.task-desc{font-size:.82rem;color:var(--text2);line-height:1.5;margin-bottom:12px}
.task-chips{display:flex;gap:6px;flex-wrap:wrap}
.chip{font-size:10.5px;font-weight:500;color:var(--text3);background:var(--bg);border:1px solid var(--border);padding:3px 8px;border-radius:4px}
/* CHART */
.chart-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:24px;box-shadow:var(--sh)}
.chart-head{display:flex;justify-content:space-between;align-items:baseline;margin-bottom:4px}
.chart-t{font-weight:700;font-size:1rem}.chart-s{font-size:12px;color:var(--text3);font-family:var(--mono)}
canvas{width:100%!important;height:280px!important}
.chart-legend{display:flex;gap:18px;margin-top:10px}
.leg{display:flex;align-items:center;gap:6px;font-size:12px;color:var(--text2)}
.leg-line{width:12px;height:3px;border-radius:2px}
/* DRIFT TABLE */
.drift-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);overflow:hidden;box-shadow:var(--sh)}
.drift-table{width:100%;border-collapse:collapse}
.drift-table th{text-align:left;padding:10px 16px;background:var(--bg);font-size:11px;font-weight:700;color:var(--text3);text-transform:uppercase;letter-spacing:.06em;border-bottom:1px solid var(--border)}
.drift-table td{padding:10px 16px;font-size:.82rem;border-bottom:1px solid #f3f5f7}
.drift-table tr:last-child td{border-bottom:none}
.drift-table tr:hover td{background:#f8f9fb}
.sev{font-size:10px;font-weight:700;padding:2px 7px;border-radius:3px;text-transform:uppercase}
.sev-high{color:var(--red);background:var(--red-bg)}.sev-med{color:var(--amber);background:var(--amber-bg)}.sev-crit{color:#7c2d12;background:#fff7ed}
/* COMPARISON */
.cmp-grid{display:grid;grid-template-columns:1fr 1fr;gap:14px}
@media(max-width:640px){.cmp-grid{grid-template-columns:1fr}}
.cmp-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:20px;box-shadow:var(--sh);transition:all var(--t)}
.cmp-card:hover{box-shadow:var(--sh-md)}
.cmp-card.highlight{border-color:#93c5fd}
.cmp-hdr{display:flex;align-items:center;gap:8px;margin-bottom:14px}
.cmp-hdr h4{font-size:.88rem;font-weight:700}
.cmp-tag{font-size:10px;font-weight:600;padding:2px 8px;border-radius:4px}
.cmp-tag-b{color:var(--text3);background:#f0f3f6}.cmp-tag-t{color:var(--accent);background:var(--accent-l)}
.cmp-row{display:flex;justify-content:space-between;padding:7px 0;border-bottom:1px solid #f3f5f7;font-size:.84rem}
.cmp-row:last-child{border-bottom:none}
.cmp-l{color:var(--text2)}.cmp-v{font-weight:600;font-family:var(--mono);font-size:.82rem}
.cmp-v.g{color:var(--green)}.cmp-v.n{color:var(--text)}
/* REWARD */
.rew-grid{display:grid;grid-template-columns:1fr 1fr;gap:14px}
@media(max-width:640px){.rew-grid{grid-template-columns:1fr}}
.rew-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:20px;box-shadow:var(--sh)}
.rew-card h4{font-size:.88rem;font-weight:700;margin-bottom:10px}
.rr{display:flex;justify-content:space-between;padding:4px 0;font-size:.82rem}
.rv{font-weight:600;font-family:var(--mono)}.rv-p{color:var(--green)}.rv-n{color:var(--red)}
.swing{margin-top:20px;background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:20px;box-shadow:var(--sh);display:flex;align-items:center;gap:20px}
@media(max-width:640px){.swing{flex-direction:column;text-align:center}}
.swing-val{font-family:var(--mono);font-size:2.4rem;font-weight:800;background:linear-gradient(135deg,var(--green),var(--accent));-webkit-background-clip:text;-webkit-text-fill-color:transparent;flex-shrink:0}
.swing-text h4{font-size:.92rem;font-weight:700;margin-bottom:2px}
.swing-text p{font-size:.82rem;color:var(--text2);line-height:1.5}
/* API */
.api-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);overflow:hidden;box-shadow:var(--sh)}
.api-t{width:100%;border-collapse:collapse}
.api-t th{text-align:left;padding:10px 16px;background:var(--bg);font-size:11px;font-weight:700;color:var(--text3);text-transform:uppercase;letter-spacing:.06em;border-bottom:1px solid var(--border)}
.api-t td{padding:10px 16px;font-size:.84rem;border-bottom:1px solid #f3f5f7}
.api-t tr:last-child td{border-bottom:none}
.api-t tr:hover td{background:#f8f9fb}
.mb{font-family:var(--mono);font-weight:700;font-size:11px;padding:3px 8px;border-radius:4px}
.mb-g{background:var(--green-bg);color:var(--green)}.mb-p{background:var(--amber-bg);color:var(--amber)}
.api-ep{font-family:var(--mono);font-weight:500;font-size:.84rem}
/* DEMO */
.demo-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:24px;box-shadow:var(--sh)}
.demo-top{display:flex;justify-content:space-between;align-items:center;flex-wrap:wrap;gap:12px}
.demo-info h4{font-size:.95rem;font-weight:700;margin-bottom:2px}
.demo-info p{font-size:.82rem;color:var(--text2)}
#demo-out{margin-top:16px;background:#0d1117;border-radius:var(--rs);padding:18px;font-family:var(--mono);font-size:.76rem;color:#c9d1d9;max-height:400px;overflow-y:auto;white-space:pre-wrap;display:none;line-height:1.8}
.lc{color:#79c0ff}.lo{color:#7ee787}.ld{color:#8b949e}.le{color:#ffa198}
/* SPEC */
.spec-grid{display:flex;gap:8px;flex-wrap:wrap}
.spec-chip{font-size:12px;font-weight:500;color:var(--green);background:var(--green-bg);border:1px solid var(--green-b);padding:5px 12px;border-radius:6px;display:flex;align-items:center;gap:5px;transition:all var(--t)}
.spec-chip:hover{background:#d1fae5}
/* CTA */
.cta-section{margin-top:64px;background:var(--text);border-radius:var(--r);padding:48px;text-align:center;color:#fff}
.cta-section h2{font-size:1.8rem;font-weight:800;letter-spacing:-.02em;margin-bottom:8px}
.cta-section p{font-size:.95rem;color:#8b949e;margin-bottom:24px;max-width:480px;margin-left:auto;margin-right:auto}
.cta-actions{display:flex;gap:10px;justify-content:center;flex-wrap:wrap}
.btn-w{background:#fff;color:var(--text);font-weight:600;box-shadow:var(--sh)}
.btn-w:hover{background:#f0f3f6}
.btn-o{background:transparent;color:#fff;border:1px solid rgba(255,255,255,.2)}
.btn-o:hover{background:rgba(255,255,255,.08)}
/* FOOTER */
footer{max-width:1140px;margin:0 auto;padding:24px;display:flex;justify-content:space-between;align-items:center;font-size:12px;color:var(--text3);border-top:1px solid var(--border);margin-top:48px}
footer a{color:var(--text2)}
@media(max-width:600px){footer{flex-direction:column;gap:6px;text-align:center}}
/* ANIM */
.reveal{opacity:0;transform:translateY(16px);transition:opacity .5s ease,transform .5s ease}
.reveal.visible{opacity:1;transform:translateY(0)}
/* VS COMPARISON */
.vs-grid{display:grid;grid-template-columns:1fr 1fr 1fr;gap:14px}
@media(max-width:860px){.vs-grid{grid-template-columns:1fr}}
.vs-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:22px;box-shadow:var(--sh);transition:all var(--t)}
.vs-card:hover{box-shadow:var(--sh-md);transform:translateY(-2px)}
.vs-card.vs-fp{border-color:#93c5fd;background:linear-gradient(180deg,#f0f7ff,#fff)}
.vs-hdr{margin-bottom:14px}
.vs-hdr h4{font-size:.95rem;font-weight:800;margin-bottom:3px}
.vs-verdict{font-size:.76rem;color:var(--text3);font-style:italic}
.vs-verdict.vs-win{color:var(--green);font-weight:600;font-style:normal}
.vs-list{list-style:none;padding:0}
.vs-list li{font-size:.82rem;color:var(--text2);line-height:1.55;padding:5px 0;display:flex;gap:8px;align-items:flex-start}
.vs-x{color:var(--red);font-weight:700;font-size:.9rem;flex-shrink:0;margin-top:1px}
.vs-ok{color:var(--green);font-weight:700;font-size:.9rem;flex-shrink:0;margin-top:1px}
.vs-punchline{margin-top:18px;font-size:.88rem;color:var(--text2);line-height:1.6;text-align:center;max-width:700px;margin-left:auto;margin-right:auto}
</style>
</head>
<body>
<!-- NAV -->
<nav>
<div class="nav-inner">
<div class="brand">FinePrint</div>
<div class="nav-r">
<a href="#problem">Why</a>
<a href="#how">How</a>
<a href="#tasks">Tasks</a>
<a href="#training">Results</a>
<a href="#api">API</a>
<a href="#demo">Demo</a>
<a href="/blog">Blog</a>
<a href="/docs">Docs</a>
</div>
</div>
</nav>
<!-- HERO -->
<section class="hero">
<div class="hero-left">
<div class="hero-kicker">Meta PyTorch OpenEnv Hackathon &times; Scaler School of Technology</div>
<h1>Your AI agent just quoted<br>a policy that changed<br><em>10 minutes ago.</em></h1>
<p class="hero-sub">FinePrint is the first RL environment that trains language models to detect when their knowledge expires &mdash; before it costs your company a lawsuit.</p>
<div class="hero-quote">
<p>"The return window is 30 days!" &mdash; an AI agent, confidently citing a policy that changed to 14 days at 2 AM.</p>
<cite>This happens every day in production. No existing benchmark tests for it.</cite>
</div>
<div class="hero-cta">
<button class="btn btn-p" onclick="document.getElementById('demo').scrollIntoView({behavior:'smooth'})">Try Live Demo</button>
<a href="/docs" class="btn btn-s">API Reference</a>
<a href="#how" class="btn btn-ghost">How it works &darr;</a>
</div>
</div>
<div class="hero-terminal" aria-label="Animated terminal demo">
<div class="term-bar">
<span class="term-dot r"></span><span class="term-dot y"></span><span class="term-dot g"></span>
<span class="term-title">fineprint-env &mdash; drift detection</span>
</div>
<div class="term-body" id="heroTerm"></div>
</div>
</section>
<!-- PROBLEM -->
<section class="problem reveal" id="problem">
<div class="sec-label">The Problem</div>
<div class="sec-title">Production LLMs assume static knowledge. Reality disagrees.</div>
<div class="sec-desc">Policies, pricing, and terms change constantly. Your AI agent keeps quoting yesterday's rules with today's confidence.</div>
<div class="problem-grid">
<div class="problem-card">
<div class="pc-icon pc-icon-red">&#9888;</div>
<h3>Silent Drift</h3>
<p>Policies change without notification. No API event, no webhook, no alert. The agent's cache silently becomes wrong.</p>
<div class="problem-stat red">70%</div>
<div style="font-size:.75rem;color:var(--text3);margin-top:2px">of drifts are completely silent</div>
</div>
<div class="problem-card">
<div class="pc-icon pc-icon-amber">&#9878;</div>
<h3>Legal Liability</h3>
<p>A customer relies on your agent's quote. Ships a return on day 20 of a 14-day window. Who's liable? You are.</p>
<div class="problem-stat amber">$0&rarr;$25</div>
<div style="font-size:.75rem;color:var(--text3);margin-top:2px">cancellation fee that appeared overnight</div>
</div>
<div class="problem-card">
<div class="pc-icon pc-icon-blue">&#128269;</div>
<h3>No Benchmark</h3>
<p>CartPole tests balance. Atari tests game skill. Nothing tests whether an AI knows when to stop trusting itself.</p>
<div class="problem-stat blue">0</div>
<div style="font-size:.75rem;color:var(--text3);margin-top:2px">existing RL envs that train drift detection</div>
</div>
</div>
</section>
<!-- HOW IT WORKS -->
<section class="sec reveal" id="how">
<div class="sec-label">The Solution</div>
<div class="sec-title">One decision changes everything.</div>
<div class="sec-desc">FinePrint trains a single critical meta-skill: <strong>when to call <code>request_verification()</code></strong> &mdash; the binary decision that separates safe agents from dangerous ones.</div>
<div class="timeline">
<div class="tl-step">
<div class="tl-dot">1</div>
<div class="tl-content">
<h4>Customer asks a policy question</h4>
<p>"What's your return window? I bought this 20 days ago."</p>
</div>
</div>
<div class="tl-step">
<div class="tl-dot">2</div>
<div class="tl-content">
<h4>Agent checks its cached knowledge</h4>
<p>Cache says <code>return.window_days = 30</code>. But when was this last verified?</p>
<div class="tl-code"><span class="cm">// Agent's internal state</span>
<span class="str">"cached_policies"</span>: { <span class="str">"return.window_days"</span>: <span class="num">30</span> }
<span class="str">"steps_since_last_verify"</span>: <span class="num">7</span> <span class="cm">// &larr; stale!</span></div>
</div>
</div>
<div class="tl-step active">
<div class="tl-dot">&#10003;</div>
<div class="tl-content">
<h4>Trained agent calls <code>request_verification()</code></h4>
<p>The model learned that 7 steps without verification + a policy-sensitive question = time to check. It refreshes its cache and discovers the policy changed.</p>
<div class="tl-code"><span class="fn">request_verification</span>() <span class="cm">// +3.0 reward for timely detection</span>
<span class="cm">// Cache updated: return.window_days = 30 &rarr; 14</span></div>
</div>
</div>
<div class="tl-step">
<div class="tl-dot">4</div>
<div class="tl-content">
<h4>Agent quotes the correct, current value</h4>
<p>"Our return window is 14 days." &mdash; Correct. Compliant. No lawsuit.</p>
<div class="tl-code"><span class="fn">quote_policy</span>(<span class="str">"return.window_days"</span>, <span class="str">"14"</span>) <span class="cm">// +10.0 reward</span></div>
</div>
</div>
<div class="tl-step danger">
<div class="tl-dot">&#10007;</div>
<div class="tl-content">
<h4>Without training: agent quotes stale value</h4>
<p>"Our return window is 30 days!" &mdash; Wrong. The customer ships on day 20. Denied. Complaint filed.</p>
<div class="tl-code"><span class="fn">quote_policy</span>(<span class="str">"return.window_days"</span>, <span class="str">"30"</span>) <span class="cm">// -8.0 stale penalty</span>
<span class="cm">// User satisfaction drops &rarr; -5.0 complaint penalty</span></div>
</div>
</div>
</div>
</section>
<!-- RAG COMPARISON -->
<section class="sec reveal" id="vs">
<div class="sec-label">But wait&hellip;</div>
<div class="sec-title">&ldquo;Can&rsquo;t RAG just fix this?&rdquo;</div>
<div class="sec-desc">Sure. And a smoke detector can cook dinner. Here&rsquo;s the thing:</div>
<div class="vs-grid">
<div class="vs-card vs-rag">
<div class="vs-hdr"><h4>RAG</h4><span class="vs-verdict">retrieves everything, questions nothing</span></div>
<ul class="vs-list">
<li><span class="vs-x">&#10007;</span><span>Doesn&rsquo;t know <em>when</em> to retrieve &mdash; fetches every time or never</span></li>
<li><span class="vs-x">&#10007;</span><span>Treats a typo fix and a return-window halving identically</span></li>
<li><span class="vs-x">&#10007;</span><span>If the retriever returns stale chunks, the model quotes them <em>confidently</em></span></li>
<li><span class="vs-x">&#10007;</span><span>Reactive only &mdash; never says &ldquo;wait, let me double-check&rdquo;</span></li>
</ul>
</div>
<div class="vs-card vs-agent">
<div class="vs-hdr"><h4>Agentic Workflows</h4><span class="vs-verdict">has tools, no idea when to use them</span></div>
<ul class="vs-list">
<li><span class="vs-x">&#10007;</span><span>Tool availability &ne; tool wisdom</span></li>
<li><span class="vs-x">&#10007;</span><span>No reward signal for verifying at the right moment</span></li>
<li><span class="vs-x">&#10007;</span><span>Hardcoded <code>if steps &gt; 5: verify()</code> is a heuristic, not intelligence</span></li>
<li><span class="vs-x">&#10007;</span><span>Zero benchmarks to measure verification timing</span></li>
</ul>
</div>
<div class="vs-card vs-fp">
<div class="vs-hdr"><h4>FinePrint</h4><span class="vs-verdict vs-win">trains the judgment they both lack</span></div>
<ul class="vs-list">
<li><span class="vs-ok">&#10003;</span><span>Learns <em>when</em> to call <code>request_verification()</code> via RL rewards</span></li>
<li><span class="vs-ok">&#10003;</span><span>-8.0 penalty for stale quotes &mdash; the model <em>feels</em> the cost of being wrong</span></li>
<li><span class="vs-ok">&#10003;</span><span>+3.0 for timely detection &mdash; develops urgency, not just access</span></li>
<li><span class="vs-ok">&#10003;</span><span>Works <em>with</em> RAG &amp; agents &mdash; trains the meta-skill they&rsquo;re missing</span></li>
</ul>
</div>
</div>
<p class="vs-punchline">RAG solves <strong>access</strong>. Agents solve <strong>tools</strong>. FinePrint solves <strong>judgment</strong>. &mdash; The part where your model decides &ldquo;I should probably not trust myself right now.&rdquo;</p>
</section>
<!-- TASKS -->
<section class="sec reveal" id="tasks">
<div class="sec-label">Graded Tasks</div>
<div class="sec-title">Three levels. Increasing chaos.</div>
<div class="sec-desc">From static quoting to adversarial multi-version silent drift. Can your agent handle the storm?</div>
<div class="tasks-grid">
<div class="task-card"><div class="task-bar easy"></div><div class="task-body">
<div class="task-top"><span class="task-name">quote_accuracy</span><span class="badge b-easy">Easy</span></div>
<p class="task-desc">Handle <code>shop</code> and <code>return</code> workflows. Quote policies correctly. No drift &mdash; just comprehension.</p>
<div class="task-chips"><span class="chip">2 workflows</span><span class="chip">20 steps</span><span class="chip">0% drift</span></div>
</div></div>
<div class="task-card"><div class="task-bar med"></div><div class="task-body">
<div class="task-top"><span class="task-name">drift_detection</span><span class="badge b-med">Medium</span></div>
<p class="task-desc">Navigate 3 workflows while policies silently change. Detect drifts, adapt quotes, maintain compliance.</p>
<div class="task-chips"><span class="chip">3 workflows</span><span class="chip">30 steps</span><span class="chip">30% drift</span><span class="chip">50% silent</span></div>
</div></div>
<div class="task-card"><div class="task-bar hard"></div><div class="task-body">
<div class="task-top"><span class="task-name">compliance_storm</span><span class="badge b-hard">Hard</span></div>
<p class="task-desc">All 5 workflows under aggressive drift across 8 policy versions. CRITICAL scope changes lurk in the noise.</p>
<div class="task-chips"><span class="chip">5 workflows</span><span class="chip">45 steps</span><span class="chip">50% drift</span><span class="chip">80% silent</span></div>
</div></div>
</div>
</section>
<!-- POLICY DRIFT TABLE -->
<section class="sec reveal">
<div class="sec-label">Policy Drift Engine</div>
<div class="sec-title">8 versions. Each one breaks something.</div>
<div class="sec-desc">Policies evolve via delta merging. Each version overrides specific fields from the base while inheriting the rest.</div>
<div class="drift-card">
<table class="drift-table">
<thead><tr><th>Version</th><th>Change</th><th>Severity</th><th>Example</th></tr></thead>
<tbody>
<tr><td><code>v1_base</code></td><td>Baseline policies</td><td>&mdash;</td><td>Return: 30 days, free ship $50</td></tr>
<tr><td><code>v2</code></td><td>Return policy tightened</td><td><span class="sev sev-high">HIGH</span></td><td>Window: 30 &rarr; 14 days</td></tr>
<tr><td><code>v3</code></td><td>Shipping raised</td><td><span class="sev sev-med">MEDIUM</span></td><td>Free threshold: $50 &rarr; $75</td></tr>
<tr><td><code>v4</code></td><td>Auto-renewal mandatory</td><td><span class="sev sev-high">HIGH</span></td><td>auto_renewal: false &rarr; true</td></tr>
<tr><td><code>v5</code></td><td>Cancel fee introduced</td><td><span class="sev sev-med">MEDIUM</span></td><td>Fee: $0 &rarr; $25</td></tr>
<tr><td><code>v6</code></td><td>Compensation slashed</td><td><span class="sev sev-high">HIGH</span></td><td>Max comp: $200 &rarr; $50</td></tr>
<tr><td><code>v7</code></td><td>Scope narrowed</td><td><span class="sev sev-crit">CRITICAL</span></td><td>Electronics returns: eliminated</td></tr>
<tr><td><code>v8</code></td><td>Pricing restructured</td><td><span class="sev sev-med">MEDIUM</span></td><td>Tax included, bulk discount gone</td></tr>
</tbody>
</table>
</div>
</section>
<!-- TRAINING CHART -->
<section class="sec reveal" id="training">
<div class="sec-label">Training Results</div>
<div class="sec-title">From &minus;11 to +8.75 in 80 episodes.</div>
<div class="sec-desc">GRPO fine-tuning on Qwen2.5-1.5B-Instruct with LoRA. The model learns when to verify and when to act.</div>
<div class="chart-card">
<div class="chart-head"><span class="chart-t">Avg Reward per Update</span><span class="chart-s">80 ep &middot; 20 updates &middot; LoRA r=16</span></div>
<canvas id="trainChart"></canvas>
<div class="chart-legend">
<div class="leg"><span class="leg-line" style="background:var(--accent)"></span>Avg Reward</div>
<div class="leg"><span class="leg-line" style="background:var(--green)"></span>Entropy</div>
<div class="leg"><span class="leg-line" style="background:#e5e7eb;height:1px;width:16px"></span>Zero line</div>
</div>
</div>
</section>
<!-- COMPARISON -->
<section class="sec reveal">
<div class="sec-label">Before &amp; After</div>
<div class="sec-title">Baseline vs trained model.</div>
<div class="cmp-grid">
<div class="cmp-card">
<div class="cmp-hdr"><h4>Heuristic Baseline</h4><span class="cmp-tag cmp-tag-b">rule-based</span></div>
<div class="cmp-row"><span class="cmp-l">Avg Reward</span><span class="cmp-v n">144.2 &plusmn; 20.7</span></div>
<div class="cmp-row"><span class="cmp-l">Drift Detections / Ep</span><span class="cmp-v n">4.5</span></div>
<div class="cmp-row"><span class="cmp-l">Compliance Failure Rate</span><span class="cmp-v n">30%</span></div>
<div class="cmp-row"><span class="cmp-l">Workflows Completed</span><span class="cmp-v n">4.0 / 5</span></div>
<div class="cmp-row"><span class="cmp-l">User Satisfaction</span><span class="cmp-v n">91%</span></div>
</div>
<div class="cmp-card highlight">
<div class="cmp-hdr"><h4>Trained Model</h4><span class="cmp-tag cmp-tag-t">GRPO, 80 ep</span></div>
<div class="cmp-row"><span class="cmp-l">Avg Reward (peak)</span><span class="cmp-v g">+8.75</span></div>
<div class="cmp-row"><span class="cmp-l">Drift Detections / Ep</span><span class="cmp-v n">1.4</span></div>
<div class="cmp-row"><span class="cmp-l">Compliance Failure Rate</span><span class="cmp-v g">0%</span></div>
<div class="cmp-row"><span class="cmp-l">Valid Samples</span><span class="cmp-v g">81 &rarr; 106</span></div>
<div class="cmp-row"><span class="cmp-l">Entropy (stable)</span><span class="cmp-v g">1.15 &rarr; 1.22</span></div>
</div>
</div>
</section>
<!-- REWARD -->
<section class="sec reveal">
<div class="sec-label">Reward Design</div>
<div class="sec-title">The 26-point swing.</div>
<div class="sec-desc">A single policy-sensitive step can yield +13 (verify &rarr; detect &rarr; correct quote) or &minus;13 (skip &rarr; stale quote &rarr; complaint). This gap is what drives learning.</div>
<div class="swing">
<div class="swing-val">26pt</div>
<div class="swing-text">
<h4>Best case vs worst case per step</h4>
<p>+3 (detect) +10 (correct quote) = <strong>+13</strong> &nbsp;vs&nbsp; &minus;8 (stale) &minus;5 (complaint) = <strong>&minus;13</strong></p>
</div>
</div>
<div class="rew-grid" style="margin-top:14px">
<div class="rew-card">
<h4>Rewards</h4>
<div class="rr"><span>Correct quote</span><span class="rv rv-p">+10.0</span></div>
<div class="rr"><span>Timely drift detection</span><span class="rv rv-p">+3.0</span></div>
<div class="rr"><span>Late drift detection</span><span class="rv rv-p">+1.0</span></div>
<div class="rr"><span>Freshness bonus</span><span class="rv rv-p">+1.0</span></div>
<div class="rr"><span>High satisfaction</span><span class="rv rv-p">+2.0</span></div>
<div class="rr"><span>Clean episode (terminal)</span><span class="rv rv-p">+20.0</span></div>
</div>
<div class="rew-card">
<h4>Penalties</h4>
<div class="rr"><span>Stale policy cited</span><span class="rv rv-n">-8.0</span></div>
<div class="rr"><span>User complaint</span><span class="rv rv-n">-5.0</span></div>
<div class="rr"><span>Incorrect value</span><span class="rv rv-n">-4.0</span></div>
<div class="rr"><span>Unnecessary escalation</span><span class="rv rv-n">-4.0</span></div>
<div class="rr"><span>Unnecessary abort</span><span class="rv rv-n">-3.0</span></div>
<div class="rr"><span>Compliance failure (terminal)</span><span class="rv rv-n">-30.0</span></div>
</div>
</div>
</section>
<!-- API -->
<section class="sec reveal" id="api">
<div class="sec-label">API</div>
<div class="sec-title">OpenEnv-compatible endpoints.</div>
<div class="sec-desc">Standard REST API. Reset an episode, step through actions, check state. Full Swagger docs at <a href="/docs">/docs</a>.</div>
<div class="api-card">
<table class="api-t">
<thead><tr><th style="width:80px">Method</th><th style="width:120px">Endpoint</th><th>Description</th></tr></thead>
<tbody>
<tr><td><span class="mb mb-g">GET</span></td><td class="api-ep">/health</td><td>Liveness / readiness probe</td></tr>
<tr><td><span class="mb mb-p">POST</span></td><td class="api-ep">/reset</td><td>Start a new episode with a task</td></tr>
<tr><td><span class="mb mb-p">POST</span></td><td class="api-ep">/step</td><td>Execute an agent action, get observation + reward</td></tr>
<tr><td><span class="mb mb-g">GET</span></td><td class="api-ep">/state</td><td>Current episode metadata (step count, task, version)</td></tr>
<tr><td><span class="mb mb-g">GET</span></td><td class="api-ep">/tasks</td><td>List available graded tasks</td></tr>
<tr><td><span class="mb mb-g">GET</span></td><td class="api-ep"><a href="/docs">/docs</a></td><td>Interactive Swagger UI</td></tr>
</tbody>
</table>
</div>
</section>
<!-- DEMO -->
<section class="sec reveal" id="demo">
<div class="sec-label">Try It</div>
<div class="sec-title">See it in action.</div>
<div class="demo-card">
<div class="demo-top">
<div class="demo-info">
<h4>Live Environment Interaction</h4>
<p>Reset &rarr; view policies &rarr; quote a value &rarr; see the compliance check fire.</p>
</div>
<button class="btn btn-p" onclick="runDemo()" id="demoBtn">Run Demo</button>
</div>
<div id="demo-out"></div>
</div>
</section>
<!-- SPEC -->
<section class="sec reveal">
<div class="sec-label">Compliance</div>
<div class="sec-title">OpenEnv Spec v1 &mdash; fully compliant.</div>
<div class="spec-grid" style="margin-top:14px">
<span class="spec-chip">&#10003; step(action)</span>
<span class="spec-chip">&#10003; reset()</span>
<span class="spec-chip">&#10003; state()</span>
<span class="spec-chip">&#10003; openenv.yaml</span>
<span class="spec-chip">&#10003; Pydantic Models</span>
<span class="spec-chip">&#10003; Docker</span>
<span class="spec-chip">&#10003; 3 Graded Tasks</span>
<span class="spec-chip">&#10003; Baseline Inference</span>
<span class="spec-chip">&#10003; Mandatory Logging</span>
</div>
</section>
<!-- CTA -->
<div class="sec">
<div class="cta-section">
<h2>Train your agent to know<br>when to stop trusting itself.</h2>
<p>FinePrint is open, free, and ready. Reset an episode, connect your model, and see if it survives the compliance storm.</p>
<div class="cta-actions">
<button class="btn btn-w" onclick="document.getElementById('demo').scrollIntoView({behavior:'smooth'})">Try Live Demo</button>
<a href="/docs" class="btn btn-o">Read API Docs</a>
</div>
</div>
</div>
<footer>
<span>FinePrint-Env &mdash; Meta PyTorch OpenEnv Hackathon &times; Scaler School of Technology</span>
<div style="display:flex;gap:14px"><a href="/docs">API Docs</a><a href="/health">Health</a></div>
</footer>
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.7/dist/chart.umd.min.js"></script>
<script>
// Scroll reveal
const obs=new IntersectionObserver((entries)=>{entries.forEach(e=>{if(e.isIntersecting){e.target.classList.add('visible');obs.unobserve(e.target)}})},{threshold:.1});
document.querySelectorAll('.reveal').forEach(el=>obs.observe(el));
// Chart
const D=[{ep:4,r:-2.375,e:1.154},{ep:8,r:-0.625,e:1.14},{ep:12,r:-11.375,e:1.132},{ep:16,r:0.875,e:1.138},{ep:20,r:1.375,e:1.2},{ep:24,r:0.5,e:1.184},{ep:28,r:-1.25,e:1.187},{ep:32,r:0.75,e:1.202},{ep:36,r:4.875,e:1.177},{ep:40,r:5.125,e:1.239},{ep:44,r:6.625,e:1.222},{ep:48,r:6.125,e:1.213},{ep:52,r:4.375,e:1.224},{ep:56,r:7.75,e:1.191},{ep:60,r:8.75,e:1.209},{ep:64,r:6.0,e:1.208},{ep:68,r:7.125,e:1.224},{ep:72,r:6.625,e:1.225},{ep:76,r:7.25,e:1.22},{ep:80,r:7.75,e:1.223}];
new Chart(document.getElementById('trainChart').getContext('2d'),{
type:'line',
data:{labels:D.map(d=>d.ep),datasets:[
{label:'Avg Reward',data:D.map(d=>d.r),borderColor:'#1a56db',backgroundColor:'rgba(26,86,219,.06)',fill:true,tension:.35,pointRadius:4,pointHoverRadius:7,pointBackgroundColor:'#1a56db',pointBorderColor:'#fff',pointBorderWidth:2,borderWidth:2.5,yAxisID:'y'},
{label:'Entropy',data:D.map(d=>d.e),borderColor:'#059669',borderDash:[5,4],tension:.35,pointRadius:0,borderWidth:1.5,yAxisID:'y1'}
]},
options:{responsive:true,maintainAspectRatio:false,interaction:{mode:'index',intersect:false},
plugins:{legend:{display:false},tooltip:{backgroundColor:'#fff',titleColor:'#0d1117',bodyColor:'#57606a',borderColor:'#e6e9ef',borderWidth:1,padding:12,cornerRadius:8,bodyFont:{family:'JetBrains Mono',size:11},titleFont:{size:12,weight:'700'},callbacks:{title:c=>'Episode '+c[0].label}}},
scales:{x:{title:{display:true,text:'Episodes',color:'#8b949e',font:{size:11,weight:'500'}},grid:{display:false},ticks:{color:'#8b949e',font:{size:10}}},y:{position:'left',title:{display:true,text:'Avg Reward',color:'#1a56db',font:{size:11,weight:'500'}},grid:{color:'#f3f4f6'},ticks:{color:'#8b949e',font:{size:10}}},y1:{position:'right',title:{display:true,text:'Entropy',color:'#059669',font:{size:11,weight:'500'}},grid:{display:false},ticks:{color:'#8b949e',font:{size:10}},min:1.0,max:1.35}}
}
});
// Demo
async function runDemo(){
const b=document.getElementById('demoBtn'),o=document.getElementById('demo-out');
o.style.display='block';b.disabled=true;b.textContent='Running...';
o.innerHTML='<span class="lc">$ POST /reset {task_id: "quote_accuracy"}</span>\n';
try{
let r=await fetch('/reset',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({options:{task_id:'quote_accuracy'}})});
let d=await r.json();
o.innerHTML+='<span class="lo">OK</span> <span class="ld">Episode started</span>\n';
o.innerHTML+='<span class="ld"> Task: '+d.task_description.substring(0,140)+'...</span>\n';
o.innerHTML+='<span class="ld"> Workflows: '+d.workflow_names.join(', ')+'</span>\n\n';
o.innerHTML+='<span class="lc">$ POST /step {command: "view_policies"}</span>\n';
r=await fetch('/step',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({action:{command:'view_policies',args:{}}})});
d=await r.json();
o.innerHTML+='<span class="lo">OK</span> <span class="ld">Policies loaded ('+Object.keys(JSON.parse(d.output.substring(d.output.indexOf('{'),d.output.lastIndexOf('}')+1)||'{}')).length+' categories)</span>\n';
o.innerHTML+='<span class="ld">'+d.output.substring(0,280)+'...</span>\n\n';
o.innerHTML+='<span class="lc">$ POST /step {command: "quote_policy", args: {policy_field: "return.window_days", quoted_value: "30"}}</span>\n';
r=await fetch('/step',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({action:{command:'quote_policy',args:{policy_field:'return.window_days',quoted_value:'30'}}})});
d=await r.json();
o.innerHTML+='<span class="lo">OK</span> '+d.output+'\n\n';
o.innerHTML+='<span class="ld">---</span>\n<span class="ld">The agent would continue: handle workflows, detect drifts, submit for grading.</span>\n<span class="ld">Connect your own model via the API to see how it performs.</span>';
}catch(e){o.innerHTML+='<span class="le">Error: '+e.message+'</span>';}
b.disabled=false;b.textContent='Run Demo';
}
// Smooth nav
document.querySelectorAll('a[href^="#"]').forEach(a=>{a.addEventListener('click',e=>{e.preventDefault();const t=document.querySelector(a.getAttribute('href'));if(t)t.scrollIntoView({behavior:'smooth',block:'start'})})});
// Hero terminal animation
(function(){
const el=document.getElementById('heroTerm');if(!el)return;
const lines=[
{d:0,h:'<span class="t-prompt">$</span> <span class="t-cmd">POST /reset</span> <span class="t-dim">{task: "drift_detection"}</span>'},
{d:600,h:'<span class="t-ok">&#10003;</span> Episode started &mdash; <span class="t-val">3 workflows</span>, drift_rate=<span class="t-val">0.3</span>'},
{d:1200,h:''},
{d:1500,h:'<span class="t-prompt">$</span> <span class="t-cmd">POST /step</span> <span class="t-dim">{cmd: "quote_policy", field: "return.window_days"}</span>'},
{d:2100,h:'<span class="t-ok">&#10003;</span> Quoted <span class="t-val">"30 days"</span> &mdash; matches current policy'},
{d:2500,h:' <span class="t-rew">reward: +10.0</span>'},
{d:3100,h:''},
{d:3400,h:'<span class="t-dim"> &#8943; 5 steps later &#8943;</span>'},
{d:3900,h:'<span class="t-warn">&#9888; DRIFT</span> <span class="t-dim">v1 &rarr; v2</span> return.window_days: <span class="t-err">30 &rarr; 14</span>'},
{d:4400,h:''},
{d:4700,h:'<span class="t-prompt">$</span> <span class="t-cmd">POST /step</span> <span class="t-dim">{cmd: "request_verification"}</span>'},
{d:5300,h:'<span class="t-ok">&#10003;</span> Policy refreshed &mdash; drift <span class="t-ok">detected</span>'},
{d:5700,h:' <span class="t-rew">reward: +3.0</span> <span class="t-dim">(timely detection bonus)</span>'},
{d:6300,h:''},
{d:6600,h:'<span class="t-prompt">$</span> <span class="t-cmd">POST /step</span> <span class="t-dim">{cmd: "quote_policy", value: "14"}</span>'},
{d:7200,h:'<span class="t-ok">&#10003;</span> Quoted <span class="t-val">"14 days"</span> &mdash; correct after drift'},
{d:7600,h:' <span class="t-rew">reward: +10.0</span> <span class="t-ok">&#10004; compliant</span>'},
];
function run(){
el.innerHTML='';
lines.forEach(l=>{
setTimeout(()=>{
const div=document.createElement('div');
div.className='term-line';
div.style.animationDelay='0s';
if(l.h===''){div.innerHTML='&nbsp;';}else{div.innerHTML=l.h;}
el.appendChild(div);
el.scrollTop=el.scrollHeight;
},l.d);
});
// Add cursor after last line
setTimeout(()=>{
const c=document.createElement('div');
c.className='term-line';
c.innerHTML='<span class="t-prompt">$</span> <span class="cursor-blink"></span>';
el.appendChild(c);
},lines[lines.length-1].d+400);
// Restart loop
setTimeout(run,lines[lines.length-1].d+6000);
}
run();
})();
</script>
</body>
</html>