Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Demo | PICon</title> | |
| <meta name="description" content="Try PICon — experience the interrogation yourself or test your own persona agent"> | |
| <meta name="demo-api-url" content="https://picongithubio-production.up.railway.app"> | |
| <link rel="stylesheet" href="/assets/css/style.css"> | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet"> | |
| <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css"> | |
| <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.js"></script> | |
| <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/contrib/auto-render.min.js" | |
| onload="renderMathInElement(document.body, {delimiters:[{left:'$$',right:'$$',display:true},{left:'$',right:'$',display:false}]});"></script> | |
| </head> | |
| <body> | |
| <nav class="sidebar" id="sidebar"> | |
| <div class="sidebar-header"> | |
| <a href="https://kaist-edlab.github.io/picon/" class="sidebar-logo">PICon</a> | |
| <button class="sidebar-toggle" id="sidebar-toggle" aria-label="Toggle navigation"> | |
| <span></span><span></span><span></span> | |
| </button> | |
| </div> | |
| <ul class="sidebar-nav" id="sidebar-nav"> | |
| <li class="nav-item"> | |
| <a href="https://kaist-edlab.github.io/picon/" class="nav-link">Home</a> | |
| </li> | |
| <li class="nav-item"> | |
| <a href="https://kaist-edlab.github.io/picon/research/" class="nav-link">Research</a> | |
| </li> | |
| <li class="nav-item active"> | |
| <a href="#" class="nav-link">Demo</a> | |
| </li> | |
| <li class="nav-item"> | |
| <a href="https://kaist-edlab.github.io/picon/contact/" class="nav-link">Contact</a> | |
| </li> | |
| </ul> | |
| <div class="sidebar-footer"> | |
| <a href="https://github.com/willystumblr/picon" class="sidebar-link" target="_blank">GitHub</a> | |
| <a href="https://arxiv.org/abs/2603.25620" class="sidebar-link" target="_blank">arXiv</a> | |
| </div> | |
| </nav> | |
| <script> | |
| const toggle = document.getElementById('sidebar-toggle'); | |
| const sidebar = document.getElementById('sidebar'); | |
| toggle.addEventListener('click', () => { | |
| sidebar.classList.toggle('open'); | |
| }); | |
| </script> | |
| <div class="page-wrap"> | |
| <main class="main-content"> | |
| <div class="container-wide"> | |
| <h1>Demo</h1> | |
| <p class="subtitle">Experience the PICon interrogation, test your own persona agent, or browse the leaderboard</p> | |
| <!-- Mode Tabs --> | |
| <div class="demo-tabs"> | |
| <button class="demo-tab active" data-tab="experience">Experience Mode</button> | |
| <button class="demo-tab" data-tab="agent-test">Agent Test</button> | |
| <button class="demo-tab" data-tab="leaderboard">Leaderboard</button> | |
| </div> | |
| <!-- ===== Experience Mode ===== --> | |
| <div class="demo-panel active" id="panel-experience"> | |
| <div class="demo-description"> | |
| <strong>Experience the interrogation yourself.</strong> | |
| You play as a persona being interrogated by PICon's multi-turn questioning system. | |
| Answer as yourself — PICon will probe your responses with logically chained follow-ups | |
| and verify factual claims in real time. At the end, you'll see your consistency scores across | |
| all three dimensions. | |
| </div> | |
| <!-- Name entry --> | |
| <div id="exp-start" class="agent-form"> | |
| <div class="form-row"> | |
| <div class="form-group" style="flex:2;"> | |
| <label for="exp-name">Your name</label> | |
| <input type="text" id="exp-name" placeholder="Enter your name to begin"> | |
| </div> | |
| <div class="form-group" style="flex:1;"> | |
| <label for="exp-turns">Turns</label> | |
| <select id="exp-turns"> | |
| <option value="30">30 turns (quick)</option> | |
| <option value="50" selected>50 turns (standard)</option> | |
| <option value="75">75 turns (thorough)</option> | |
| </select> | |
| </div> | |
| </div> | |
| <button class="btn btn-primary" id="exp-start-btn">Start Interview</button> | |
| </div> | |
| <!-- Chat --> | |
| <div id="exp-chat" style="display:none;"> | |
| <div class="chat-container"> | |
| <div class="chat-header"> | |
| <h4>PICon Interview</h4> | |
| <span class="chat-progress" id="exp-progress"></span> | |
| </div> | |
| <div class="chat-messages" id="exp-messages"></div> | |
| <div class="chat-input"> | |
| <input type="text" id="exp-input" placeholder="Type your response..." autocomplete="off"> | |
| <button id="exp-send" disabled>Send</button> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Results card (shown after completion) --> | |
| <div id="exp-results" style="display:none;"> | |
| <div class="results-card"> | |
| <h3>Your Consistency Report</h3> | |
| <div class="score-grid" id="exp-score-grid"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- ===== Agent Test Mode ===== --> | |
| <div class="demo-panel" id="panel-agent-test"> | |
| <div class="demo-description"> | |
| <strong>Test your own persona agent.</strong> | |
| PICon will run the full interrogation pipeline and return a detailed consistency report. | |
| Results are automatically added to the leaderboard. | |
| </div> | |
| <!-- Submode selector --> | |
| <div class="agent-submode-selector" id="agent-submode-selector"> | |
| <button class="agent-submode-card active" data-submode="external"> | |
| <span class="submode-icon">🔗</span> | |
| <span class="submode-title">Connect External Agent</span> | |
| <span class="submode-desc">Already have an agent hosted elsewhere? Just provide its endpoint URL — no API key or model config needed.</span> | |
| </button> | |
| <button class="agent-submode-card" data-submode="quick"> | |
| <span class="submode-icon">⚡</span> | |
| <span class="submode-title">Quick Agent Setup</span> | |
| <span class="submode-desc">Pick an LLM, write a persona prompt, and PICon will build and evaluate the agent for you.</span> | |
| </button> | |
| </div> | |
| <!-- External Agent form --> | |
| <div class="agent-form" id="agent-form-external"> | |
| <div class="form-group"> | |
| <label for="ext-agent-name">Agent Name <span class="label-hint">(required)</span></label> | |
| <input type="text" id="ext-agent-name" placeholder="e.g. MyPersonaBot v2"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="ext-agent-endpoint">Agent API Endpoint <span class="label-hint">(required — OpenAI chat-completions compatible)</span></label> | |
| <input type="url" id="ext-agent-endpoint" placeholder="e.g. https://your-server.com/v1"> | |
| </div> | |
| <div class="form-row"> | |
| <div class="form-group" style="flex:1;"> | |
| <label for="ext-agent-turns">Interrogation Turns</label> | |
| <select id="ext-agent-turns"> | |
| <option value="30">30 turns (quick)</option> | |
| <option value="50" selected>50 turns (standard)</option> | |
| <option value="75">75 turns (thorough)</option> | |
| </select> | |
| </div> | |
| <div class="form-group" style="flex:1;"> | |
| <label for="ext-agent-sessions">Sessions</label> | |
| <select id="ext-agent-sessions"> | |
| <option value="1" selected>1 session</option> | |
| <option value="2">2 sessions</option> | |
| <option value="3">3 sessions</option> | |
| </select> | |
| </div> | |
| </div> | |
| <button class="btn btn-accent" id="agent-start-btn-external">Run PICon Evaluation</button> | |
| </div> | |
| <!-- Quick Agent form --> | |
| <div class="agent-form" id="agent-form-quick" style="display:none;"> | |
| <div class="form-row"> | |
| <div class="form-group" style="flex:1;"> | |
| <label for="quick-agent-name">Agent Name <span class="label-hint">(required)</span></label> | |
| <input type="text" id="quick-agent-name" placeholder="e.g. MyPersonaBot v2"> | |
| </div> | |
| <div class="form-group" style="flex:1;"> | |
| <label for="quick-agent-model">Model <span class="label-hint">(required)</span></label> | |
| <input type="text" id="quick-agent-model" placeholder="e.g. gpt-4o, gemini/gemini-2.5-flash"> | |
| </div> | |
| </div> | |
| <div class="form-group"> | |
| <label for="quick-agent-api-key">API Key <span class="label-hint">(required — covers your agent's LLM inference cost only)</span></label> | |
| <input type="password" id="quick-agent-api-key" placeholder="Your API key (e.g. OpenAI, Gemini)"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="quick-agent-persona">Persona / System Prompt <span class="label-hint">(required)</span></label> | |
| <textarea id="quick-agent-persona" rows="5" placeholder="Paste the system prompt that defines your agent's persona. This is sent as the system message and also used by PICon for evaluation."></textarea> | |
| </div> | |
| <div class="form-row"> | |
| <div class="form-group" style="flex:1;"> | |
| <label for="quick-agent-turns">Interrogation Turns</label> | |
| <select id="quick-agent-turns"> | |
| <option value="30">30 turns (quick)</option> | |
| <option value="50" selected>50 turns (standard)</option> | |
| <option value="75">75 turns (thorough)</option> | |
| </select> | |
| </div> | |
| <div class="form-group" style="flex:1;"> | |
| <label for="quick-agent-sessions">Sessions</label> | |
| <select id="quick-agent-sessions"> | |
| <option value="1" selected>1 session</option> | |
| <option value="2">2 sessions</option> | |
| <option value="3">3 sessions</option> | |
| </select> | |
| </div> | |
| </div> | |
| <button class="btn btn-accent" id="agent-start-btn-quick">Run PICon Evaluation</button> | |
| </div> | |
| <!-- Agent test live log --> | |
| <div id="agent-log" style="display:none;"> | |
| <div class="agent-terminal"> | |
| <div class="terminal-header"> | |
| <span class="terminal-title">PICon Evaluation</span> | |
| <span class="chat-progress" id="agent-progress">Initializing...</span> | |
| </div> | |
| <pre class="terminal-body" id="agent-terminal-body"></pre> | |
| </div> | |
| <button class="btn btn-secondary" id="agent-cancel-btn" style="margin-top:0.75rem;">Cancel</button> | |
| </div> | |
| <!-- Agent results --> | |
| <div id="agent-results" style="display:none;"> | |
| <div class="results-card"> | |
| <h3>Evaluation Report</h3> | |
| <div class="score-grid" id="agent-score-grid"></div> | |
| <p class="results-note">Your agent has been added to the leaderboard.</p> | |
| </div> | |
| <button class="btn btn-primary" id="agent-retry-btn" style="margin-top:0.75rem;">Test Another Agent</button> | |
| </div> | |
| </div> | |
| <!-- ===== Leaderboard ===== --> | |
| <div class="demo-panel" id="panel-leaderboard"> | |
| <div class="demo-description"> | |
| <strong>PICon Consistency Leaderboard.</strong> | |
| Baseline scores from the paper's evaluation targets, plus community-submitted agents. | |
| All baselines are evaluated under the same interrogation protocol (50 turns, 2 sessions). | |
| </div> | |
| <div class="leaderboard-controls"> | |
| <div class="leaderboard-filter"> | |
| <label>Sort by</label> | |
| <select id="lb-sort"> | |
| <option value="area" selected>Overall (Area)</option> | |
| <option value="ic">Internal Consistency</option> | |
| <option value="ec">External Consistency</option> | |
| <option value="rc">Retest Consistency</option> | |
| </select> | |
| </div> | |
| <div class="leaderboard-filter"> | |
| <label>Type</label> | |
| <select id="lb-type-filter"> | |
| <option value="all" selected>All</option> | |
| <option value="baseline">Baseline (Paper)</option> | |
| <option value="community">Community</option> | |
| </select> | |
| </div> | |
| <div class="leaderboard-filter"> | |
| <label>Turns</label> | |
| <select id="lb-turns-filter"> | |
| <option value="all" selected>All</option> | |
| <option value="75">75</option> | |
| <option value="50">50</option> | |
| <option value="30">30</option> | |
| </select> | |
| </div> | |
| </div> | |
| <div class="leaderboard-table-wrap"> | |
| <table class="leaderboard-table" id="leaderboard-table"> | |
| <thead> | |
| <tr> | |
| <th class="lb-rank">#</th> | |
| <th class="lb-name">Agent</th> | |
| <th class="lb-type">Type</th> | |
| <th class="lb-turns">Turns</th> | |
| <th class="lb-score sortable" data-col="ic">IC</th> | |
| <th class="lb-score sortable" data-col="ec">EC</th> | |
| <th class="lb-score sortable" data-col="rc">RC</th> | |
| <th class="lb-score sortable active" data-col="area">Area</th> | |
| </tr> | |
| </thead> | |
| <tbody id="leaderboard-body"></tbody> | |
| </table> | |
| </div> | |
| <p class="leaderboard-footnote"> | |
| <strong>IC</strong> = Internal Consistency (harmonic mean of non-contradiction & cooperativeness). | |
| <strong>EC</strong> = External Consistency (harmonic mean of non-refutation & coverage). | |
| <strong>RC</strong> = Retest Consistency (intra-session stability). | |
| <strong>Area</strong> = normalized triangle area on the IC–EC–RC radar chart. | |
| </p> | |
| </div> | |
| </div> | |
| <!-- ===== Use via Python ===== --> | |
| <div class="container" style="margin-top: 3rem;"> | |
| <div class="python-api-section"> | |
| <h2>Use via Python</h2> | |
| <p> | |
| Install the <code>picon</code> package to run evaluations programmatically — | |
| no web UI needed. | |
| </p> | |
| <div class="code-block"> | |
| <div class="code-header">Installation</div> | |
| <pre><code>pip install picon</code></pre> | |
| </div> | |
| <div class="code-block"> | |
| <div class="code-header">Quick Start</div> | |
| <pre><code>import picon | |
| result = picon.run( | |
| persona="You are a 35-year-old software engineer named John...", | |
| name="John", | |
| model="gemini/gemini-2.5-flash", | |
| num_turns=20, | |
| num_sessions=2, | |
| do_eval=True, | |
| ) | |
| print(result.eval_scores) | |
| # { | |
| # "internal_harmonic_mean": 0.85, | |
| # "internal_responsiveness": 0.90, | |
| # "internal_consistency": 0.81, | |
| # "external_wilson": 0.72, | |
| # "inter_session_stability": 0.88, | |
| # "intra_session_stability": 0.91, | |
| # } | |
| result.save("results/john.json")</code></pre> | |
| </div> | |
| <div class="code-block"> | |
| <div class="code-header">External Agent (Blackbox API)</div> | |
| <pre><code># Only the endpoint URL is needed — persona is baked into the agent | |
| result = picon.run( | |
| api_base="http://your-server.com/v1", # OpenAI-compatible endpoint | |
| num_turns=30, | |
| )</code></pre> | |
| </div> | |
| <div class="code-block"> | |
| <div class="code-header">Evaluate Existing Results</div> | |
| <pre><code>scores = picon.evaluate("results/john.json") | |
| print(scores)</code></pre> | |
| </div> | |
| <p style="margin-top:1rem; font-size:0.875rem; color:var(--color-text-muted);"> | |
| See the <a href="https://github.com/willystumblr/picon">GitHub repository</a> | |
| for full documentation, CLI usage, and advanced configuration. | |
| </p> | |
| </div> | |
| </div> | |
| </main> | |
| <footer class="site-footer"> | |
| <div class="container"> | |
| <p>© 2026 PICon Authors. Built with Jekyll & GitHub Pages.</p> | |
| </div> | |
| </footer> | |
| </div> | |
| <script src="/assets/js/demo.js"></script> | |
| </body> | |
| </html> | |