picon-demo / index.html
willystumblr's picture
Sync from GitHub
ad21498 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Demo | PICon</title>
<meta name="description" content="Try PICon — experience the interrogation yourself or test your own persona agent">
<meta name="demo-api-url" content="https://picongithubio-production.up.railway.app">
<link rel="stylesheet" href="/assets/css/style.css">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css">
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.js"></script>
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/contrib/auto-render.min.js"
onload="renderMathInElement(document.body, {delimiters:[{left:'$$',right:'$$',display:true},{left:'$',right:'$',display:false}]});"></script>
</head>
<body>
<nav class="sidebar" id="sidebar">
<div class="sidebar-header">
<a href="https://kaist-edlab.github.io/picon/" class="sidebar-logo">PICon</a>
<button class="sidebar-toggle" id="sidebar-toggle" aria-label="Toggle navigation">
<span></span><span></span><span></span>
</button>
</div>
<ul class="sidebar-nav" id="sidebar-nav">
<li class="nav-item">
<a href="https://kaist-edlab.github.io/picon/" class="nav-link">Home</a>
</li>
<li class="nav-item">
<a href="https://kaist-edlab.github.io/picon/research/" class="nav-link">Research</a>
</li>
<li class="nav-item active">
<a href="#" class="nav-link">Demo</a>
</li>
<li class="nav-item">
<a href="https://kaist-edlab.github.io/picon/contact/" class="nav-link">Contact</a>
</li>
</ul>
<div class="sidebar-footer">
<a href="https://github.com/willystumblr/picon" class="sidebar-link" target="_blank">GitHub</a>
<a href="https://arxiv.org/abs/2603.25620" class="sidebar-link" target="_blank">arXiv</a>
</div>
</nav>
<script>
const toggle = document.getElementById('sidebar-toggle');
const sidebar = document.getElementById('sidebar');
toggle.addEventListener('click', () => {
sidebar.classList.toggle('open');
});
</script>
<div class="page-wrap">
<main class="main-content">
<div class="container-wide">
<h1>Demo</h1>
<p class="subtitle">Experience the PICon interrogation, test your own persona agent, or browse the leaderboard</p>
<!-- Mode Tabs -->
<div class="demo-tabs">
<button class="demo-tab active" data-tab="experience">Experience Mode</button>
<button class="demo-tab" data-tab="agent-test">Agent Test</button>
<button class="demo-tab" data-tab="leaderboard">Leaderboard</button>
</div>
<!-- ===== Experience Mode ===== -->
<div class="demo-panel active" id="panel-experience">
<div class="demo-description">
<strong>Experience the interrogation yourself.</strong>
You play as a persona being interrogated by PICon's multi-turn questioning system.
Answer as yourself &mdash; PICon will probe your responses with logically chained follow-ups
and verify factual claims in real time. At the end, you'll see your consistency scores across
all three dimensions.
</div>
<!-- Name entry -->
<div id="exp-start" class="agent-form">
<div class="form-row">
<div class="form-group" style="flex:2;">
<label for="exp-name">Your name</label>
<input type="text" id="exp-name" placeholder="Enter your name to begin">
</div>
<div class="form-group" style="flex:1;">
<label for="exp-turns">Turns</label>
<select id="exp-turns">
<option value="30">30 turns (quick)</option>
<option value="50" selected>50 turns (standard)</option>
<option value="75">75 turns (thorough)</option>
</select>
</div>
</div>
<button class="btn btn-primary" id="exp-start-btn">Start Interview</button>
</div>
<!-- Chat -->
<div id="exp-chat" style="display:none;">
<div class="chat-container">
<div class="chat-header">
<h4>PICon Interview</h4>
<span class="chat-progress" id="exp-progress"></span>
</div>
<div class="chat-messages" id="exp-messages"></div>
<div class="chat-input">
<input type="text" id="exp-input" placeholder="Type your response..." autocomplete="off">
<button id="exp-send" disabled>Send</button>
</div>
</div>
</div>
<!-- Results card (shown after completion) -->
<div id="exp-results" style="display:none;">
<div class="results-card">
<h3>Your Consistency Report</h3>
<div class="score-grid" id="exp-score-grid"></div>
</div>
</div>
</div>
<!-- ===== Agent Test Mode ===== -->
<div class="demo-panel" id="panel-agent-test">
<div class="demo-description">
<strong>Test your own persona agent.</strong>
PICon will run the full interrogation pipeline and return a detailed consistency report.
Results are automatically added to the leaderboard.
</div>
<!-- Submode selector -->
<div class="agent-submode-selector" id="agent-submode-selector">
<button class="agent-submode-card active" data-submode="external">
<span class="submode-icon">🔗</span>
<span class="submode-title">Connect External Agent</span>
<span class="submode-desc">Already have an agent hosted elsewhere? Just provide its endpoint URL — no API key or model config needed.</span>
</button>
<button class="agent-submode-card" data-submode="quick">
<span class="submode-icon"></span>
<span class="submode-title">Quick Agent Setup</span>
<span class="submode-desc">Pick an LLM, write a persona prompt, and PICon will build and evaluate the agent for you.</span>
</button>
</div>
<!-- External Agent form -->
<div class="agent-form" id="agent-form-external">
<div class="form-group">
<label for="ext-agent-name">Agent Name <span class="label-hint">(required)</span></label>
<input type="text" id="ext-agent-name" placeholder="e.g. MyPersonaBot v2">
</div>
<div class="form-group">
<label for="ext-agent-endpoint">Agent API Endpoint <span class="label-hint">(required — OpenAI chat-completions compatible)</span></label>
<input type="url" id="ext-agent-endpoint" placeholder="e.g. https://your-server.com/v1">
</div>
<div class="form-row">
<div class="form-group" style="flex:1;">
<label for="ext-agent-turns">Interrogation Turns</label>
<select id="ext-agent-turns">
<option value="30">30 turns (quick)</option>
<option value="50" selected>50 turns (standard)</option>
<option value="75">75 turns (thorough)</option>
</select>
</div>
<div class="form-group" style="flex:1;">
<label for="ext-agent-sessions">Sessions</label>
<select id="ext-agent-sessions">
<option value="1" selected>1 session</option>
<option value="2">2 sessions</option>
<option value="3">3 sessions</option>
</select>
</div>
</div>
<button class="btn btn-accent" id="agent-start-btn-external">Run PICon Evaluation</button>
</div>
<!-- Quick Agent form -->
<div class="agent-form" id="agent-form-quick" style="display:none;">
<div class="form-row">
<div class="form-group" style="flex:1;">
<label for="quick-agent-name">Agent Name <span class="label-hint">(required)</span></label>
<input type="text" id="quick-agent-name" placeholder="e.g. MyPersonaBot v2">
</div>
<div class="form-group" style="flex:1;">
<label for="quick-agent-model">Model <span class="label-hint">(required)</span></label>
<input type="text" id="quick-agent-model" placeholder="e.g. gpt-4o, gemini/gemini-2.5-flash">
</div>
</div>
<div class="form-group">
<label for="quick-agent-api-key">API Key <span class="label-hint">(required — covers your agent's LLM inference cost only)</span></label>
<input type="password" id="quick-agent-api-key" placeholder="Your API key (e.g. OpenAI, Gemini)">
</div>
<div class="form-group">
<label for="quick-agent-persona">Persona / System Prompt <span class="label-hint">(required)</span></label>
<textarea id="quick-agent-persona" rows="5" placeholder="Paste the system prompt that defines your agent's persona. This is sent as the system message and also used by PICon for evaluation."></textarea>
</div>
<div class="form-row">
<div class="form-group" style="flex:1;">
<label for="quick-agent-turns">Interrogation Turns</label>
<select id="quick-agent-turns">
<option value="30">30 turns (quick)</option>
<option value="50" selected>50 turns (standard)</option>
<option value="75">75 turns (thorough)</option>
</select>
</div>
<div class="form-group" style="flex:1;">
<label for="quick-agent-sessions">Sessions</label>
<select id="quick-agent-sessions">
<option value="1" selected>1 session</option>
<option value="2">2 sessions</option>
<option value="3">3 sessions</option>
</select>
</div>
</div>
<button class="btn btn-accent" id="agent-start-btn-quick">Run PICon Evaluation</button>
</div>
<!-- Agent test live log -->
<div id="agent-log" style="display:none;">
<div class="agent-terminal">
<div class="terminal-header">
<span class="terminal-title">PICon Evaluation</span>
<span class="chat-progress" id="agent-progress">Initializing...</span>
</div>
<pre class="terminal-body" id="agent-terminal-body"></pre>
</div>
<button class="btn btn-secondary" id="agent-cancel-btn" style="margin-top:0.75rem;">Cancel</button>
</div>
<!-- Agent results -->
<div id="agent-results" style="display:none;">
<div class="results-card">
<h3>Evaluation Report</h3>
<div class="score-grid" id="agent-score-grid"></div>
<p class="results-note">Your agent has been added to the leaderboard.</p>
</div>
<button class="btn btn-primary" id="agent-retry-btn" style="margin-top:0.75rem;">Test Another Agent</button>
</div>
</div>
<!-- ===== Leaderboard ===== -->
<div class="demo-panel" id="panel-leaderboard">
<div class="demo-description">
<strong>PICon Consistency Leaderboard.</strong>
Baseline scores from the paper's evaluation targets, plus community-submitted agents.
All baselines are evaluated under the same interrogation protocol (50 turns, 2 sessions).
</div>
<div class="leaderboard-controls">
<div class="leaderboard-filter">
<label>Sort by</label>
<select id="lb-sort">
<option value="area" selected>Overall (Area)</option>
<option value="ic">Internal Consistency</option>
<option value="ec">External Consistency</option>
<option value="rc">Retest Consistency</option>
</select>
</div>
<div class="leaderboard-filter">
<label>Type</label>
<select id="lb-type-filter">
<option value="all" selected>All</option>
<option value="baseline">Baseline (Paper)</option>
<option value="community">Community</option>
</select>
</div>
<div class="leaderboard-filter">
<label>Turns</label>
<select id="lb-turns-filter">
<option value="all" selected>All</option>
<option value="75">75</option>
<option value="50">50</option>
<option value="30">30</option>
</select>
</div>
</div>
<div class="leaderboard-table-wrap">
<table class="leaderboard-table" id="leaderboard-table">
<thead>
<tr>
<th class="lb-rank">#</th>
<th class="lb-name">Agent</th>
<th class="lb-type">Type</th>
<th class="lb-turns">Turns</th>
<th class="lb-score sortable" data-col="ic">IC</th>
<th class="lb-score sortable" data-col="ec">EC</th>
<th class="lb-score sortable" data-col="rc">RC</th>
<th class="lb-score sortable active" data-col="area">Area</th>
</tr>
</thead>
<tbody id="leaderboard-body"></tbody>
</table>
</div>
<p class="leaderboard-footnote">
<strong>IC</strong> = Internal Consistency (harmonic mean of non-contradiction &amp; cooperativeness).
<strong>EC</strong> = External Consistency (harmonic mean of non-refutation &amp; coverage).
<strong>RC</strong> = Retest Consistency (intra-session stability).
<strong>Area</strong> = normalized triangle area on the IC&ndash;EC&ndash;RC radar chart.
</p>
</div>
</div>
<!-- ===== Use via Python ===== -->
<div class="container" style="margin-top: 3rem;">
<div class="python-api-section">
<h2>Use via Python</h2>
<p>
Install the <code>picon</code> package to run evaluations programmatically &mdash;
no web UI needed.
</p>
<div class="code-block">
<div class="code-header">Installation</div>
<pre><code>pip install picon</code></pre>
</div>
<div class="code-block">
<div class="code-header">Quick Start</div>
<pre><code>import picon
result = picon.run(
persona="You are a 35-year-old software engineer named John...",
name="John",
model="gemini/gemini-2.5-flash",
num_turns=20,
num_sessions=2,
do_eval=True,
)
print(result.eval_scores)
# {
# "internal_harmonic_mean": 0.85,
# "internal_responsiveness": 0.90,
# "internal_consistency": 0.81,
# "external_wilson": 0.72,
# "inter_session_stability": 0.88,
# "intra_session_stability": 0.91,
# }
result.save("results/john.json")</code></pre>
</div>
<div class="code-block">
<div class="code-header">External Agent (Blackbox API)</div>
<pre><code># Only the endpoint URL is needed — persona is baked into the agent
result = picon.run(
api_base="http://your-server.com/v1", # OpenAI-compatible endpoint
num_turns=30,
)</code></pre>
</div>
<div class="code-block">
<div class="code-header">Evaluate Existing Results</div>
<pre><code>scores = picon.evaluate("results/john.json")
print(scores)</code></pre>
</div>
<p style="margin-top:1rem; font-size:0.875rem; color:var(--color-text-muted);">
See the <a href="https://github.com/willystumblr/picon">GitHub repository</a>
for full documentation, CLI usage, and advanced configuration.
</p>
</div>
</div>
</main>
<footer class="site-footer">
<div class="container">
<p>&copy; 2026 PICon Authors. Built with Jekyll &amp; GitHub Pages.</p>
</div>
</footer>
</div>
<script src="/assets/js/demo.js"></script>
</body>
</html>