| """
|
| HallucinationGuard-Env v4.2 — Production FastAPI Server with Stunning 3D Documentation
|
|
|
| Features:
|
| - Animated 3D particle background
|
| - Floating geometric objects
|
| - Glassmorphism UI elements
|
| - Gradient text and buttons
|
| - Interactive playground with live testing
|
| - Smooth animations and transitions
|
|
|
| Endpoints:
|
| Standard : POST /reset POST /step GET /state GET /health
|
| Session : POST /session/reset POST /session/step DELETE /session
|
| Leaderboard: GET /leaderboard POST /leaderboard/submit
|
| OpenEnv : GET /tasks POST /grader POST /baseline
|
|
|
| """
|
|
|
| import sys, os, uuid, logging, dataclasses, enum, time, threading
|
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
| from fastapi import FastAPI, HTTPException, Header, Request
|
| from fastapi.responses import JSONResponse, RedirectResponse, HTMLResponse
|
| from fastapi.middleware.cors import CORSMiddleware
|
| from contextlib import asynccontextmanager
|
| from typing import Dict, Any, Optional, List
|
|
|
| from models import HallucinationAction, HallucinationObservation, HallucinationState
|
| from environment import HallucinationEnvironment
|
| from metrics import get_tracker
|
|
|
| from tasks import (
|
| ALL_TASKS, get_task, task_id_for_difficulty, compute_task_score, ACTION_SCHEMA,
|
| )
|
|
|
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
| logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
| STUNNING_DOCS_HTML = """
|
| <!DOCTYPE html>
|
| <html lang="en">
|
| <head>
|
| <meta charset="UTF-8">
|
| <meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| <title>HallucinationGuard-Env | Production RL Environment</title>
|
| <link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;400;500;600;700&family=Fira+Code:wght@400;500&display=swap" rel="stylesheet">
|
| <script src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js"></script>
|
| <style>
|
| :root {
|
| --bg-deep: #030014;
|
| --bg-primary: #0a0518;
|
| --bg-secondary: #120826;
|
| --glass: rgba(255, 255, 255, 0.03);
|
| --glass-border: rgba(255, 255, 255, 0.08);
|
| --text-primary: #ffffff;
|
| --text-secondary: rgba(255, 255, 255, 0.7);
|
| --text-muted: rgba(255, 255, 255, 0.4);
|
| --accent-1: #7c3aed;
|
| --accent-2: #06b6d4;
|
| --accent-3: #f43f5e;
|
| --accent-4: #10b981;
|
| --gradient-1: linear-gradient(135deg, #7c3aed 0%, #06b6d4 50%, #10b981 100%);
|
| --gradient-2: linear-gradient(135deg, #f43f5e 0%, #7c3aed 100%);
|
| --gradient-3: linear-gradient(135deg, #06b6d4 0%, #10b981 100%);
|
| --glow-1: 0 0 40px rgba(124, 58, 237, 0.3);
|
| --glow-2: 0 0 60px rgba(6, 182, 212, 0.2);
|
| }
|
|
|
| * { margin: 0; padding: 0; box-sizing: border-box; }
|
|
|
| body {
|
| font-family: 'Space Grotesk', sans-serif;
|
| background: var(--bg-deep);
|
| color: var(--text-primary);
|
| overflow-x: hidden;
|
| min-height: 100vh;
|
| }
|
|
|
| /* Three.js Canvas Background */
|
| #bg-canvas {
|
| position: fixed;
|
| top: 0;
|
| left: 0;
|
| width: 100%;
|
| height: 100%;
|
| z-index: 0;
|
| }
|
|
|
| /* Animated Gradient Orbs */
|
| .orb {
|
| position: fixed;
|
| border-radius: 50%;
|
| filter: blur(80px);
|
| opacity: 0.4;
|
| animation: float 20s ease-in-out infinite;
|
| z-index: 1;
|
| pointer-events: none;
|
| }
|
|
|
| .orb-1 {
|
| width: 600px;
|
| height: 600px;
|
| background: var(--accent-1);
|
| top: -200px;
|
| right: -200px;
|
| animation-delay: 0s;
|
| }
|
|
|
| .orb-2 {
|
| width: 500px;
|
| height: 500px;
|
| background: var(--accent-2);
|
| bottom: -150px;
|
| left: -150px;
|
| animation-delay: -5s;
|
| }
|
|
|
| .orb-3 {
|
| width: 400px;
|
| height: 400px;
|
| background: var(--accent-3);
|
| top: 50%;
|
| left: 50%;
|
| transform: translate(-50%, -50%);
|
| animation-delay: -10s;
|
| }
|
|
|
| @keyframes float {
|
| 0%, 100% { transform: translate(0, 0) scale(1); }
|
| 25% { transform: translate(50px, -50px) scale(1.1); }
|
| 50% { transform: translate(-30px, 30px) scale(0.9); }
|
| 75% { transform: translate(-50px, -30px) scale(1.05); }
|
| }
|
|
|
| /* Grid Pattern Overlay */
|
| .grid-overlay {
|
| position: fixed;
|
| top: 0;
|
| left: 0;
|
| right: 0;
|
| bottom: 0;
|
| background-image:
|
| linear-gradient(rgba(255,255,255,0.02) 1px, transparent 1px),
|
| linear-gradient(90deg, rgba(255,255,255,0.02) 1px, transparent 1px);
|
| background-size: 50px 50px;
|
| z-index: 2;
|
| pointer-events: none;
|
| }
|
|
|
| /* Noise Texture */
|
| .noise {
|
| position: fixed;
|
| top: 0;
|
| left: 0;
|
| right: 0;
|
| bottom: 0;
|
| background: url("data:image/svg+xml,%3Csvg viewBox='0 0 200 200' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='noiseFilter'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23noiseFilter)'/%3E%3C/svg%3E");
|
| opacity: 0.03;
|
| z-index: 3;
|
| pointer-events: none;
|
| }
|
|
|
| /* Main Content Container */
|
| .content {
|
| position: relative;
|
| z-index: 10;
|
| }
|
|
|
| /* Navigation */
|
| nav {
|
| position: fixed;
|
| top: 0;
|
| left: 0;
|
| right: 0;
|
| z-index: 100;
|
| padding: 20px 40px;
|
| background: rgba(3, 0, 20, 0.6);
|
| backdrop-filter: blur(20px);
|
| border-bottom: 1px solid var(--glass-border);
|
| display: flex;
|
| align-items: center;
|
| justify-content: space-between;
|
| }
|
|
|
| .logo {
|
| display: flex;
|
| align-items: center;
|
| gap: 14px;
|
| }
|
|
|
| .logo-icon {
|
| width: 44px;
|
| height: 44px;
|
| background: var(--gradient-1);
|
| border-radius: 12px;
|
| display: flex;
|
| align-items: center;
|
| justify-content: center;
|
| font-size: 22px;
|
| box-shadow: var(--glow-1);
|
| animation: pulse-glow 3s ease-in-out infinite;
|
| }
|
|
|
| @keyframes pulse-glow {
|
| 0%, 100% { box-shadow: 0 0 20px rgba(124, 58, 237, 0.4); }
|
| 50% { box-shadow: 0 0 40px rgba(124, 58, 237, 0.6), 0 0 60px rgba(6, 182, 212, 0.3); }
|
| }
|
|
|
| .logo-text {
|
| font-size: 20px;
|
| font-weight: 600;
|
| background: var(--gradient-1);
|
| -webkit-background-clip: text;
|
| -webkit-text-fill-color: transparent;
|
| background-clip: text;
|
| }
|
|
|
| .nav-links {
|
| display: flex;
|
| gap: 8px;
|
| }
|
|
|
| .nav-link {
|
| padding: 10px 20px;
|
| border-radius: 10px;
|
| color: var(--text-secondary);
|
| text-decoration: none;
|
| font-size: 14px;
|
| font-weight: 500;
|
| transition: all 0.3s ease;
|
| border: 1px solid transparent;
|
| }
|
|
|
| .nav-link:hover {
|
| background: var(--glass);
|
| border-color: var(--glass-border);
|
| color: var(--text-primary);
|
| }
|
|
|
| .nav-link.active {
|
| background: var(--gradient-1);
|
| color: white;
|
| box-shadow: var(--glow-1);
|
| }
|
|
|
| .nav-btn {
|
| padding: 10px 24px;
|
| border-radius: 10px;
|
| background: var(--gradient-2);
|
| color: white;
|
| text-decoration: none;
|
| font-size: 14px;
|
| font-weight: 500;
|
| transition: all 0.3s ease;
|
| box-shadow: var(--glow-1);
|
| }
|
|
|
| .nav-btn:hover {
|
| transform: translateY(-2px);
|
| box-shadow: 0 0 30px rgba(244, 63, 94, 0.4);
|
| }
|
|
|
| /* Hero Section */
|
| .hero {
|
| min-height: 100vh;
|
| display: flex;
|
| flex-direction: column;
|
| align-items: center;
|
| justify-content: center;
|
| text-align: center;
|
| padding: 120px 40px 80px;
|
| }
|
|
|
| .hero-badge {
|
| display: inline-flex;
|
| align-items: center;
|
| gap: 10px;
|
| padding: 8px 20px;
|
| background: var(--glass);
|
| border: 1px solid var(--glass-border);
|
| border-radius: 50px;
|
| font-size: 13px;
|
| color: var(--text-secondary);
|
| margin-bottom: 32px;
|
| backdrop-filter: blur(10px);
|
| }
|
|
|
| .badge-dot {
|
| width: 8px;
|
| height: 8px;
|
| background: var(--accent-4);
|
| border-radius: 50%;
|
| animation: blink 2s ease-in-out infinite;
|
| }
|
|
|
| @keyframes blink {
|
| 0%, 100% { opacity: 1; box-shadow: 0 0 10px var(--accent-4); }
|
| 50% { opacity: 0.5; box-shadow: none; }
|
| }
|
|
|
| .hero h1 {
|
| font-size: 72px;
|
| font-weight: 700;
|
| line-height: 1.1;
|
| margin-bottom: 24px;
|
| background: linear-gradient(135deg, #fff 0%, rgba(255,255,255,0.7) 100%);
|
| -webkit-background-clip: text;
|
| -webkit-text-fill-color: transparent;
|
| background-clip: text;
|
| animation: fadeInUp 1s ease-out;
|
| }
|
|
|
| .hero h1 span {
|
| background: var(--gradient-1);
|
| -webkit-background-clip: text;
|
| -webkit-text-fill-color: transparent;
|
| background-clip: text;
|
| }
|
|
|
| @keyframes fadeInUp {
|
| from { opacity: 0; transform: translateY(30px); }
|
| to { opacity: 1; transform: translateY(0); }
|
| }
|
|
|
| .hero-subtitle {
|
| font-size: 22px;
|
| color: var(--text-secondary);
|
| max-width: 700px;
|
| margin-bottom: 48px;
|
| line-height: 1.6;
|
| animation: fadeInUp 1s ease-out 0.2s both;
|
| }
|
|
|
| .hero-buttons {
|
| display: flex;
|
| gap: 20px;
|
| margin-bottom: 80px;
|
| animation: fadeInUp 1s ease-out 0.4s both;
|
| }
|
|
|
| .btn {
|
| padding: 16px 36px;
|
| border-radius: 14px;
|
| font-size: 16px;
|
| font-weight: 600;
|
| text-decoration: none;
|
| display: inline-flex;
|
| align-items: center;
|
| gap: 10px;
|
| transition: all 0.3s ease;
|
| cursor: pointer;
|
| border: none;
|
| }
|
|
|
| .btn-primary {
|
| background: var(--gradient-1);
|
| color: white;
|
| box-shadow: var(--glow-1), var(--glow-2);
|
| }
|
|
|
| .btn-primary:hover {
|
| transform: translateY(-3px);
|
| box-shadow: 0 0 50px rgba(124, 58, 237, 0.5), 0 0 80px rgba(6, 182, 212, 0.3);
|
| }
|
|
|
| .btn-secondary {
|
| background: var(--glass);
|
| color: var(--text-primary);
|
| border: 1px solid var(--glass-border);
|
| backdrop-filter: blur(10px);
|
| }
|
|
|
| .btn-secondary:hover {
|
| background: rgba(255, 255, 255, 0.08);
|
| border-color: var(--accent-1);
|
| transform: translateY(-3px);
|
| }
|
|
|
| /* Stats Section */
|
| .stats-container {
|
| display: flex;
|
| justify-content: center;
|
| gap: 60px;
|
| flex-wrap: wrap;
|
| animation: fadeInUp 1s ease-out 0.6s both;
|
| }
|
|
|
| .stat-item {
|
| text-align: center;
|
| }
|
|
|
| .stat-value {
|
| font-size: 52px;
|
| font-weight: 700;
|
| background: var(--gradient-1);
|
| -webkit-background-clip: text;
|
| -webkit-text-fill-color: transparent;
|
| background-clip: text;
|
| line-height: 1;
|
| }
|
|
|
| .stat-label {
|
| font-size: 14px;
|
| color: var(--text-muted);
|
| margin-top: 8px;
|
| text-transform: uppercase;
|
| letter-spacing: 1px;
|
| }
|
|
|
| /* Floating Elements */
|
| .floating-shapes {
|
| position: fixed;
|
| top: 0;
|
| left: 0;
|
| right: 0;
|
| bottom: 0;
|
| pointer-events: none;
|
| z-index: 5;
|
| overflow: hidden;
|
| }
|
|
|
| .shape {
|
| position: absolute;
|
| opacity: 0.1;
|
| animation: shapeFloat 15s ease-in-out infinite;
|
| }
|
|
|
| .shape-1 { top: 20%; left: 10%; animation-delay: 0s; }
|
| .shape-2 { top: 60%; left: 80%; animation-delay: -3s; }
|
| .shape-3 { top: 80%; left: 20%; animation-delay: -6s; }
|
| .shape-4 { top: 30%; left: 70%; animation-delay: -9s; }
|
| .shape-5 { top: 70%; left: 50%; animation-delay: -12s; }
|
|
|
| @keyframes shapeFloat {
|
| 0%, 100% { transform: translateY(0) rotate(0deg); }
|
| 50% { transform: translateY(-30px) rotate(180deg); }
|
| }
|
|
|
| /* Section Container */
|
| .section {
|
| padding: 100px 40px;
|
| max-width: 1400px;
|
| margin: 0 auto;
|
| }
|
|
|
| .section-header {
|
| text-align: center;
|
| margin-bottom: 60px;
|
| }
|
|
|
| .section-title {
|
| font-size: 48px;
|
| font-weight: 700;
|
| margin-bottom: 16px;
|
| background: linear-gradient(135deg, #fff 0%, rgba(255,255,255,0.8) 100%);
|
| -webkit-background-clip: text;
|
| -webkit-text-fill-color: transparent;
|
| background-clip: text;
|
| }
|
|
|
| .section-subtitle {
|
| font-size: 18px;
|
| color: var(--text-secondary);
|
| }
|
|
|
| /* Glass Cards */
|
| .cards-grid {
|
| display: grid;
|
| grid-template-columns: repeat(auto-fit, minmax(350px, 1fr));
|
| gap: 24px;
|
| }
|
|
|
| .card {
|
| background: var(--glass);
|
| border: 1px solid var(--glass-border);
|
| border-radius: 20px;
|
| padding: 32px;
|
| backdrop-filter: blur(20px);
|
| transition: all 0.4s ease;
|
| position: relative;
|
| overflow: hidden;
|
| }
|
|
|
| .card::before {
|
| content: '';
|
| position: absolute;
|
| top: 0;
|
| left: 0;
|
| right: 0;
|
| height: 2px;
|
| background: var(--gradient-1);
|
| opacity: 0;
|
| transition: opacity 0.3s ease;
|
| }
|
|
|
| .card:hover {
|
| transform: translateY(-8px);
|
| border-color: var(--accent-1);
|
| box-shadow: var(--glow-1), 0 20px 40px rgba(0,0,0,0.3);
|
| }
|
|
|
| .card:hover::before {
|
| opacity: 1;
|
| }
|
|
|
| .card-icon {
|
| width: 56px;
|
| height: 56px;
|
| border-radius: 16px;
|
| display: flex;
|
| align-items: center;
|
| justify-content: center;
|
| font-size: 28px;
|
| margin-bottom: 20px;
|
| position: relative;
|
| }
|
|
|
| .card-icon.green {
|
| background: linear-gradient(135deg, rgba(16, 185, 129, 0.2) 0%, rgba(6, 182, 212, 0.2) 100%);
|
| box-shadow: 0 0 30px rgba(16, 185, 129, 0.2);
|
| }
|
|
|
| .card-icon.yellow {
|
| background: linear-gradient(135deg, rgba(251, 191, 36, 0.2) 0%, rgba(249, 115, 22, 0.2) 100%);
|
| box-shadow: 0 0 30px rgba(251, 191, 36, 0.2);
|
| }
|
|
|
| .card-icon.red {
|
| background: linear-gradient(135deg, rgba(244, 63, 94, 0.2) 0%, rgba(124, 58, 237, 0.2) 100%);
|
| box-shadow: 0 0 30px rgba(244, 63, 94, 0.2);
|
| }
|
|
|
| .card-title {
|
| font-size: 22px;
|
| font-weight: 600;
|
| margin-bottom: 12px;
|
| }
|
|
|
| .card-desc {
|
| color: var(--text-secondary);
|
| font-size: 15px;
|
| line-height: 1.6;
|
| margin-bottom: 20px;
|
| }
|
|
|
| .card-badge {
|
| display: inline-block;
|
| padding: 6px 14px;
|
| border-radius: 8px;
|
| font-size: 12px;
|
| font-weight: 600;
|
| text-transform: uppercase;
|
| letter-spacing: 0.5px;
|
| }
|
|
|
| .badge-beginner {
|
| background: rgba(16, 185, 129, 0.15);
|
| color: var(--accent-4);
|
| border: 1px solid rgba(16, 185, 129, 0.3);
|
| }
|
|
|
| .badge-intermediate {
|
| background: rgba(251, 191, 36, 0.15);
|
| color: #fbbf24;
|
| border: 1px solid rgba(251, 191, 36, 0.3);
|
| }
|
|
|
| .badge-advanced {
|
| background: rgba(244, 63, 94, 0.15);
|
| color: var(--accent-3);
|
| border: 1px solid rgba(244, 63, 94, 0.3);
|
| }
|
|
|
| /* Playground Section */
|
| .playground {
|
| background: var(--glass);
|
| border: 1px solid var(--glass-border);
|
| border-radius: 24px;
|
| overflow: hidden;
|
| backdrop-filter: blur(20px);
|
| }
|
|
|
| .playground-header {
|
| display: flex;
|
| background: rgba(255, 255, 255, 0.02);
|
| border-bottom: 1px solid var(--glass-border);
|
| }
|
|
|
| .playground-tab {
|
| padding: 18px 32px;
|
| font-size: 14px;
|
| font-weight: 500;
|
| color: var(--text-muted);
|
| cursor: pointer;
|
| border-bottom: 2px solid transparent;
|
| transition: all 0.3s ease;
|
| }
|
|
|
| .playground-tab:hover {
|
| color: var(--text-secondary);
|
| background: rgba(255, 255, 255, 0.02);
|
| }
|
|
|
| .playground-tab.active {
|
| color: var(--accent-1);
|
| border-bottom-color: var(--accent-1);
|
| background: rgba(124, 58, 237, 0.05);
|
| }
|
|
|
| .playground-body {
|
| display: grid;
|
| grid-template-columns: 1fr 1fr;
|
| min-height: 500px;
|
| }
|
|
|
| .playground-left, .playground-right {
|
| padding: 32px;
|
| }
|
|
|
| .playground-left {
|
| border-right: 1px solid var(--glass-border);
|
| }
|
|
|
| .playground-label {
|
| font-size: 11px;
|
| font-weight: 600;
|
| color: var(--text-muted);
|
| text-transform: uppercase;
|
| letter-spacing: 1px;
|
| margin-bottom: 16px;
|
| display: flex;
|
| align-items: center;
|
| gap: 8px;
|
| }
|
|
|
| .playground-label::before {
|
| content: '';
|
| width: 8px;
|
| height: 8px;
|
| background: var(--accent-1);
|
| border-radius: 2px;
|
| }
|
|
|
| .playground-textarea {
|
| width: 100%;
|
| height: 280px;
|
| background: rgba(0, 0, 0, 0.3);
|
| border: 1px solid var(--glass-border);
|
| border-radius: 12px;
|
| padding: 20px;
|
| font-family: 'Fira Code', monospace;
|
| font-size: 13px;
|
| color: var(--text-primary);
|
| resize: none;
|
| outline: none;
|
| transition: all 0.3s ease;
|
| }
|
|
|
| .playground-textarea:focus {
|
| border-color: var(--accent-1);
|
| box-shadow: 0 0 20px rgba(124, 58, 237, 0.2);
|
| }
|
|
|
| .btn-group {
|
| display: flex;
|
| gap: 16px;
|
| margin-top: 20px;
|
| }
|
|
|
| .result-box {
|
| width: 100%;
|
| height: 380px;
|
| background: rgba(0, 0, 0, 0.3);
|
| border: 1px solid var(--glass-border);
|
| border-radius: 12px;
|
| padding: 20px;
|
| font-family: 'Fira Code', monospace;
|
| font-size: 12px;
|
| color: var(--text-secondary);
|
| white-space: pre-wrap;
|
| overflow-y: auto;
|
| position: relative;
|
| }
|
|
|
| .result-box.success {
|
| border-color: var(--accent-4);
|
| box-shadow: 0 0 20px rgba(16, 185, 129, 0.1);
|
| }
|
|
|
| .result-box.error {
|
| border-color: var(--accent-3);
|
| box-shadow: 0 0 20px rgba(244, 63, 94, 0.1);
|
| }
|
|
|
| /* Endpoints Table */
|
| .endpoints-container {
|
| background: var(--glass);
|
| border: 1px solid var(--glass-border);
|
| border-radius: 20px;
|
| overflow: hidden;
|
| backdrop-filter: blur(20px);
|
| }
|
|
|
| .endpoint-row {
|
| display: grid;
|
| grid-template-columns: 100px 1fr 2fr;
|
| padding: 20px 32px;
|
| border-bottom: 1px solid var(--glass-border);
|
| transition: all 0.3s ease;
|
| align-items: center;
|
| }
|
|
|
| .endpoint-row:last-child {
|
| border-bottom: none;
|
| }
|
|
|
| .endpoint-row:hover {
|
| background: rgba(255, 255, 255, 0.02);
|
| }
|
|
|
| .method-badge {
|
| display: inline-flex;
|
| padding: 6px 12px;
|
| border-radius: 6px;
|
| font-size: 11px;
|
| font-weight: 700;
|
| font-family: 'Fira Code', monospace;
|
| letter-spacing: 0.5px;
|
| }
|
|
|
| .method-get {
|
| background: rgba(16, 185, 129, 0.15);
|
| color: var(--accent-4);
|
| border: 1px solid rgba(16, 185, 129, 0.3);
|
| }
|
|
|
| .method-post {
|
| background: rgba(124, 58, 237, 0.15);
|
| color: var(--accent-1);
|
| border: 1px solid rgba(124, 58, 237, 0.3);
|
| }
|
|
|
| .method-delete {
|
| background: rgba(244, 63, 94, 0.15);
|
| color: var(--accent-3);
|
| border: 1px solid rgba(244, 63, 94, 0.3);
|
| }
|
|
|
| .endpoint-path {
|
| font-family: 'Fira Code', monospace;
|
| font-size: 14px;
|
| color: var(--text-primary);
|
| padding-left: 20px;
|
| }
|
|
|
| .endpoint-desc {
|
| color: var(--text-secondary);
|
| font-size: 14px;
|
| padding-left: 20px;
|
| }
|
|
|
| /* Features Grid */
|
| .features-grid {
|
| display: grid;
|
| grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
|
| gap: 20px;
|
| margin-top: 40px;
|
| }
|
|
|
| .feature-item {
|
| display: flex;
|
| align-items: flex-start;
|
| gap: 16px;
|
| padding: 24px;
|
| background: var(--glass);
|
| border: 1px solid var(--glass-border);
|
| border-radius: 16px;
|
| transition: all 0.3s ease;
|
| }
|
|
|
| .feature-item:hover {
|
| border-color: var(--accent-1);
|
| transform: translateX(8px);
|
| }
|
|
|
| .feature-icon {
|
| width: 40px;
|
| height: 40px;
|
| background: var(--gradient-1);
|
| border-radius: 10px;
|
| display: flex;
|
| align-items: center;
|
| justify-content: center;
|
| font-size: 18px;
|
| flex-shrink: 0;
|
| }
|
|
|
| .feature-text h4 {
|
| font-size: 16px;
|
| font-weight: 600;
|
| margin-bottom: 4px;
|
| }
|
|
|
| .feature-text p {
|
| font-size: 13px;
|
| color: var(--text-secondary);
|
| }
|
|
|
| /* Footer */
|
| footer {
|
| padding: 60px 40px;
|
| border-top: 1px solid var(--glass-border);
|
| text-align: center;
|
| }
|
|
|
| .footer-text {
|
| color: var(--text-muted);
|
| font-size: 14px;
|
| margin-bottom: 20px;
|
| }
|
|
|
| .footer-links {
|
| display: flex;
|
| justify-content: center;
|
| gap: 32px;
|
| flex-wrap: wrap;
|
| }
|
|
|
| .footer-link {
|
| color: var(--text-secondary);
|
| text-decoration: none;
|
| font-size: 14px;
|
| transition: color 0.3s ease;
|
| display: flex;
|
| align-items: center;
|
| gap: 8px;
|
| }
|
|
|
| .footer-link:hover {
|
| color: var(--accent-1);
|
| }
|
|
|
| /* Responsive */
|
| @media (max-width: 900px) {
|
| .hero h1 { font-size: 48px; }
|
| .playground-body { grid-template-columns: 1fr; }
|
| .playground-left { border-right: none; border-bottom: 1px solid var(--glass-border); }
|
| .endpoint-row { grid-template-columns: 1fr; gap: 8px; }
|
| .nav-links { display: none; }
|
| nav { padding: 16px 20px; }
|
| .section { padding: 60px 20px; }
|
| }
|
|
|
| /* Scrollbar */
|
| ::-webkit-scrollbar { width: 8px; height: 8px; }
|
| ::-webkit-scrollbar-track { background: var(--bg-secondary); }
|
| ::-webkit-scrollbar-thumb { background: var(--glass-border); border-radius: 4px; }
|
| ::-webkit-scrollbar-thumb:hover { background: var(--accent-1); }
|
|
|
| /* Code syntax highlighting */
|
| .json-key { color: #7c3aed; }
|
| .json-string { color: #10b981; }
|
| .json-number { color: #06b6d4; }
|
| </style>
|
| </head>
|
| <body>
|
| <!-- Three.js Canvas -->
|
| <canvas id="bg-canvas"></canvas>
|
|
|
| <!-- Animated Orbs -->
|
| <div class="orb orb-1"></div>
|
| <div class="orb orb-2"></div>
|
| <div class="orb orb-3"></div>
|
|
|
| <!-- Grid Overlay -->
|
| <div class="grid-overlay"></div>
|
|
|
| <!-- Noise Texture -->
|
| <div class="noise"></div>
|
|
|
| <!-- Floating Shapes -->
|
| <div class="floating-shapes">
|
| <svg class="shape shape-1" width="60" height="60" viewBox="0 0 60 60">
|
| <polygon points="30,0 60,60 0,60" fill="none" stroke="rgba(124,58,237,0.3)" stroke-width="1"/>
|
| </svg>
|
| <svg class="shape shape-2" width="80" height="80" viewBox="0 0 80 80">
|
| <circle cx="40" cy="40" r="38" fill="none" stroke="rgba(6,182,212,0.3)" stroke-width="1"/>
|
| </svg>
|
| <svg class="shape shape-3" width="70" height="70" viewBox="0 0 70 70">
|
| <rect x="5" y="5" width="60" height="60" fill="none" stroke="rgba(244,63,94,0.3)" stroke-width="1" transform="rotate(45 35 35)"/>
|
| </svg>
|
| <svg class="shape shape-4" width="50" height="50" viewBox="0 0 50 50">
|
| <polygon points="25,0 50,25 25,50 0,25" fill="none" stroke="rgba(16,185,129,0.3)" stroke-width="1"/>
|
| </svg>
|
| <svg class="shape shape-5" width="60" height="60" viewBox="0 0 60 60">
|
| <polygon points="30,0 60,30 30,60 0,30" fill="none" stroke="rgba(124,58,237,0.3)" stroke-width="1"/>
|
| </svg>
|
| </div>
|
|
|
| <!-- Content -->
|
| <div class="content">
|
| <!-- Navigation -->
|
| <nav>
|
| <div class="logo">
|
| <div class="logo-icon">🛡️</div>
|
| <span class="logo-text">HallucinationGuard</span>
|
| </div>
|
| <div class="nav-links">
|
| <a href="#overview" class="nav-link">Overview</a>
|
| <a href="#tasks" class="nav-link">Tasks</a>
|
| <a href="#playground" class="nav-link active">Playground</a>
|
| <a href="#endpoints" class="nav-link">Endpoints</a>
|
| </div>
|
| <a href="/redoc" class="nav-btn">API Docs →</a>
|
| </nav>
|
|
|
| <!-- Hero Section -->
|
| <section class="hero">
|
| <div class="hero-badge">
|
| <span class="badge-dot"></span>
|
| <span>v4.2.0 • OpenEnv Compatible • Production Ready</span>
|
| </div>
|
| <h1>Train AI to Stop<br/><span>Hallucinating</span></h1>
|
| <p class="hero-subtitle">The production-grade RL environment for training and evaluating LLMs on hallucination avoidance. Built on 1M+ real-world examples across 38 benchmark datasets.</p>
|
| <div class="hero-buttons">
|
| <a href="#playground" class="btn btn-primary">
|
| <span>⚡</span> Try Interactive Demo
|
| </a>
|
| <a href="/redoc" class="btn btn-secondary">
|
| <span>📖</span> Full API Reference
|
| </a>
|
| </div>
|
| <div class="stats-container">
|
| <div class="stat-item">
|
| <div class="stat-value" data-count="1090163">0</div>
|
| <div class="stat-label">Examples</div>
|
| </div>
|
| <div class="stat-item">
|
| <div class="stat-value" data-count="38">0</div>
|
| <div class="stat-label">Datasets</div>
|
| </div>
|
| <div class="stat-item">
|
| <div class="stat-value" data-count="9">0</div>
|
| <div class="stat-label">Reward Components</div>
|
| </div>
|
| <div class="stat-item">
|
| <div class="stat-value" data-count="3">0</div>
|
| <div class="stat-label">Task Levels</div>
|
| </div>
|
| </div>
|
| </section>
|
|
|
| <!-- Features Section -->
|
| <section class="section" id="overview">
|
| <div class="section-header">
|
| <h2 class="section-title">Why HallucinationGuard?</h2>
|
| <p class="section-subtitle">Research-grade evaluation for grounded AI systems</p>
|
| </div>
|
| <div class="features-grid">
|
| <div class="feature-item">
|
| <div class="feature-icon">🎯</div>
|
| <div class="feature-text">
|
| <h4>Factual Grounding</h4>
|
| <p>Rewards answers derived strictly from provided context</p>
|
| </div>
|
| </div>
|
| <div class="feature-item">
|
| <div class="feature-icon">🔬</div>
|
| <div class="feature-text">
|
| <h4>9-Component Reward</h4>
|
| <p>Factual correctness, grounding, calibration, NLI, BERTScore...</p>
|
| </div>
|
| </div>
|
| <div class="feature-item">
|
| <div class="feature-icon">📊</div>
|
| <div class="feature-text">
|
| <h4>Real-World Datasets</h4>
|
| <p>SQuAD, HotpotQA, HaluEval, TruthfulQA, FEVER, and 33 more</p>
|
| </div>
|
| </div>
|
| <div class="feature-item">
|
| <div class="feature-icon">⚡</div>
|
| <div class="feature-text">
|
| <h4>Fast API</h4>
|
| <p>RESTful endpoints with OpenEnv compliance</p>
|
| </div>
|
| </div>
|
| <div class="feature-item">
|
| <div class="feature-icon">🧠</div>
|
| <div class="feature-text">
|
| <h4>NLI-Powered</h4>
|
| <p>Detects entailment and contradiction semantically</p>
|
| </div>
|
| </div>
|
| <div class="feature-item">
|
| <div class="feature-icon">🏆</div>
|
| <div class="feature-text">
|
| <h4>Leaderboard</h4>
|
| <p>Compare model performance across tasks</p>
|
| </div>
|
| </div>
|
| </div>
|
| </section>
|
|
|
| <!-- Tasks Section -->
|
| <section class="section" id="tasks">
|
| <div class="section-header">
|
| <h2 class="section-title">Three Difficulty Levels</h2>
|
| <p class="section-subtitle">Progressive curriculum from basic to adversarial</p>
|
| </div>
|
| <div class="cards-grid">
|
| <div class="card">
|
| <div class="card-icon green">🟢</div>
|
| <h3 class="card-title">Task 1: Factual Grounding</h3>
|
| <p class="card-desc">Answer straightforward factual questions from a short context passage. Single-hop retrieval with unambiguous ground truth. Perfect for initial training.</p>
|
| <span class="card-badge badge-beginner">Beginner</span>
|
| <div style="margin-top: 16px; font-size: 12px; color: var(--text-muted);">Datasets: SQuAD, BoolQ, ARC, OpenBookQA</div>
|
| </div>
|
| <div class="card">
|
| <div class="card-icon yellow">🟡</div>
|
| <h3 class="card-title">Task 2: Multi-Hop Synthesis</h3>
|
| <p class="card-desc">Synthesize evidence from multiple sentences. Connect disparate facts without fabricating bridging information. Requires reasoning chains.</p>
|
| <span class="card-badge badge-intermediate">Intermediate</span>
|
| <div style="margin-top: 16px; font-size: 12px; color: var(--text-muted);">Datasets: HotpotQA, CoQA, NQ-Open, MS-MARCO</div>
|
| </div>
|
| <div class="card">
|
| <div class="card-icon red">🔴</div>
|
| <h3 class="card-title">Task 3: Adversarial Resistance</h3>
|
| <p class="card-desc">Resist adversarial prompts designed to elicit hallucinations. Many questions are unanswerable — confident refusals are rewarded.</p>
|
| <span class="card-badge badge-advanced">Advanced</span>
|
| <div style="margin-top: 16px; font-size: 12px; color: var(--text-muted);">Datasets: HaluEval, TruthfulQA, FEVER, AdversarialQA</div>
|
| </div>
|
| </div>
|
| </section>
|
|
|
| <!-- Playground Section -->
|
| <section class="section" id="playground">
|
| <div class="section-header">
|
| <h2 class="section-title">Interactive Playground</h2>
|
| <p class="section-subtitle">Test the API directly in your browser</p>
|
| </div>
|
| <div class="playground">
|
| <div class="playground-header">
|
| <div class="playground-tab active" onclick="switchTab('reset')">🔄 Reset Episode</div>
|
| <div class="playground-tab" onclick="switchTab('step')">📝 Submit Answer</div>
|
| <div class="playground-tab" onclick="switchTab('batch')">📦 Batch Evaluate</div>
|
| <div class="playground-tab" onclick="switchTab('baseline')">🤖 Run Baseline</div>
|
| </div>
|
| <div class="playground-body">
|
| <div class="playground-left">
|
| <div class="playground-label">REQUEST BODY</div>
|
| <textarea id="request-body" class="playground-textarea" placeholder="Enter JSON request...">{
|
| "difficulty": "beginner",
|
| "seed": 42
|
| }</textarea>
|
| <div class="btn-group">
|
| <button class="btn btn-primary" onclick="sendRequest()">
|
| ▶ Send Request
|
| </button>
|
| <button class="btn btn-secondary" onclick="clearAll()">
|
| Clear
|
| </button>
|
| </div>
|
| </div>
|
| <div class="playground-right">
|
| <div class="playground-label">RESPONSE</div>
|
| <div id="result-box" class="result-box">
|
| <span style="color: var(--text-muted);">// Response will appear here...
|
| //
|
| // Click "Send Request" to test the API</span>
|
| </div>
|
| </div>
|
| </div>
|
| </div>
|
| </section>
|
|
|
| <!-- Endpoints Section -->
|
| <section class="section" id="endpoints">
|
| <div class="section-header">
|
| <h2 class="section-title">All Endpoints</h2>
|
| <p class="section-subtitle">Complete API reference at a glance</p>
|
| </div>
|
| <div class="endpoints-container">
|
| <div class="endpoint-row">
|
| <span class="method-badge method-post">POST</span>
|
| <span class="endpoint-path">/reset</span>
|
| <span class="endpoint-desc">Start a new episode with optional difficulty and seed</span>
|
| </div>
|
| <div class="endpoint-row">
|
| <span class="method-badge method-post">POST</span>
|
| <span class="endpoint-path">/step</span>
|
| <span class="endpoint-desc">Submit an answer with confidence and source citation</span>
|
| </div>
|
| <div class="endpoint-row">
|
| <span class="method-badge method-get">GET</span>
|
| <span class="endpoint-path">/state</span>
|
| <span class="endpoint-desc">Get current episode state, accuracy, and skill rating</span>
|
| </div>
|
| <div class="endpoint-row">
|
| <span class="method-badge method-get">GET</span>
|
| <span class="endpoint-path">/tasks</span>
|
| <span class="endpoint-desc">List all 3 tasks with complete action schema</span>
|
| </div>
|
| <div class="endpoint-row">
|
| <span class="method-badge method-post">POST</span>
|
| <span class="endpoint-path">/grader</span>
|
| <span class="endpoint-desc">Score a completed episode (returns 0.0–1.0)</span>
|
| </div>
|
| <div class="endpoint-row">
|
| <span class="method-badge method-post">POST</span>
|
| <span class="endpoint-path">/baseline</span>
|
| <span class="endpoint-desc">Run built-in heuristic baseline agent</span>
|
| </div>
|
| <div class="endpoint-row">
|
| <span class="method-badge method-post">POST</span>
|
| <span class="endpoint-path">/batch/evaluate</span>
|
| <span class="endpoint-desc">Evaluate multiple Q&A pairs in one request</span>
|
| </div>
|
| <div class="endpoint-row">
|
| <span class="method-badge method-get">GET</span>
|
| <span class="endpoint-path">/leaderboard</span>
|
| <span class="endpoint-desc">View ranked model performance</span>
|
| </div>
|
| <div class="endpoint-row">
|
| <span class="method-badge method-get">GET</span>
|
| <span class="endpoint-path">/health</span>
|
| <span class="endpoint-desc">Service health check</span>
|
| </div>
|
| <div class="endpoint-row">
|
| <span class="method-badge method-get">GET</span>
|
| <span class="endpoint-path">/datasets</span>
|
| <span class="endpoint-desc">Dataset statistics and distribution</span>
|
| </div>
|
| </div>
|
| </section>
|
|
|
| <!-- Footer -->
|
| <footer>
|
| <p class="footer-text">HallucinationGuard-Env — OpenEnv RL Environment for Hallucination Detection</p>
|
| <div class="footer-links">
|
| <a href="https://huggingface.co/spaces/SamSankar/hallucination-guard-env" class="footer-link">🤗 HuggingFace Space</a>
|
| <a href="https://pypi.org/project/openenv-halluguard/" class="footer-link">📦 PyPI Package</a>
|
| <a href="/redoc" class="footer-link">📖 API Reference</a>
|
| <a href="https://github.com/meta-pytorch/OpenEnv" class="footer-link">🔗 OpenEnv</a>
|
| </div>
|
| </footer>
|
| </div>
|
|
|
| <script>
|
| // ═══════════════════════════════════════════════════════════════════════════════
|
| // THREE.JS 3D BACKGROUND
|
| // ═══════════════════════════════════════════════════════════════════════════════
|
|
|
| const canvas = document.getElementById('bg-canvas');
|
| const renderer = new THREE.WebGLRenderer({ canvas, antialias: true, alpha: true });
|
| renderer.setSize(window.innerWidth, window.innerHeight);
|
| renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
|
|
|
| const scene = new THREE.Scene();
|
| const camera = new THREE.PerspectiveCamera(75, window.innerWidth / window.innerHeight, 0.1, 1000);
|
| camera.position.z = 30;
|
|
|
| // Particle system
|
| const particlesGeometry = new THREE.BufferGeometry();
|
| const particlesCount = 2000;
|
| const posArray = new Float32Array(particlesCount * 3);
|
|
|
| for(let i = 0; i < particlesCount * 3; i++) {
|
| posArray[i] = (Math.random() - 0.5) * 100;
|
| }
|
|
|
| particlesGeometry.setAttribute('position', new THREE.BufferAttribute(posArray, 3));
|
|
|
| const particlesMaterial = new THREE.PointsMaterial({
|
| size: 0.1,
|
| color: 0x7c3aed,
|
| transparent: true,
|
| opacity: 0.6,
|
| blending: THREE.AdditiveBlending
|
| });
|
|
|
| const particlesMesh = new THREE.Points(particlesGeometry, particlesMaterial);
|
| scene.add(particlesMesh);
|
|
|
| // Floating geometric objects
|
| const geometries = [
|
| new THREE.IcosahedronGeometry(2, 0),
|
| new THREE.OctahedronGeometry(2, 0),
|
| new THREE.TetrahedronGeometry(2, 0),
|
| new THREE.TorusGeometry(1.5, 0.5, 8, 16),
|
| ];
|
|
|
| const objects = [];
|
| const colors = [0x7c3aed, 0x06b6d4, 0xf43f5e, 0x10b981];
|
|
|
| geometries.forEach((geo, i) => {
|
| const material = new THREE.MeshBasicMaterial({
|
| color: colors[i],
|
| wireframe: true,
|
| transparent: true,
|
| opacity: 0.3
|
| });
|
| const mesh = new THREE.Mesh(geo, material);
|
| mesh.position.set(
|
| (Math.random() - 0.5) * 40,
|
| (Math.random() - 0.5) * 40,
|
| (Math.random() - 0.5) * 20 - 10
|
| );
|
| mesh.userData = {
|
| rotationSpeed: { x: Math.random() * 0.01, y: Math.random() * 0.01 },
|
| floatSpeed: Math.random() * 0.02 + 0.01,
|
| floatOffset: Math.random() * Math.PI * 2
|
| };
|
| objects.push(mesh);
|
| scene.add(mesh);
|
| });
|
|
|
| // Mouse movement effect
|
| let mouseX = 0, mouseY = 0;
|
| document.addEventListener('mousemove', (e) => {
|
| mouseX = (e.clientX / window.innerWidth) * 2 - 1;
|
| mouseY = -(e.clientY / window.innerHeight) * 2 + 1;
|
| });
|
|
|
| // Animation loop
|
| let time = 0;
|
| function animate() {
|
| requestAnimationFrame(animate);
|
| time += 0.01;
|
|
|
| particlesMesh.rotation.y += 0.001;
|
| particlesMesh.rotation.x += 0.0005;
|
|
|
| // Camera follows mouse slightly
|
| camera.position.x += (mouseX * 3 - camera.position.x) * 0.02;
|
| camera.position.y += (mouseY * 3 - camera.position.y) * 0.02;
|
| camera.lookAt(scene.position);
|
|
|
| // Animate floating objects
|
| objects.forEach((obj, i) => {
|
| obj.rotation.x += obj.userData.rotationSpeed.x;
|
| obj.rotation.y += obj.userData.rotationSpeed.y;
|
| obj.position.y += Math.sin(time + obj.userData.floatOffset) * 0.02;
|
| });
|
|
|
| renderer.render(scene, camera);
|
| }
|
| animate();
|
|
|
| // Resize handler
|
| window.addEventListener('resize', () => {
|
| camera.aspect = window.innerWidth / window.innerHeight;
|
| camera.updateProjectionMatrix();
|
| renderer.setSize(window.innerWidth, window.innerHeight);
|
| });
|
|
|
| // ═══════════════════════════════════════════════════════════════════════════════
|
| // PLAYGROUND FUNCTIONALITY
|
| // ═══════════════════════════════════════════════════════════════════════════════
|
|
|
| let currentTab = 'reset';
|
| const endpoints = {
|
| reset: '/reset',
|
| step: '/step',
|
| batch: '/batch/evaluate',
|
| baseline: '/baseline'
|
| };
|
|
|
| const placeholders = {
|
| reset: `{
|
| "difficulty": "beginner",
|
| "seed": 42
|
| }`,
|
| step: `{
|
| "answer": "Your answer derived from context",
|
| "confidence": 0.85,
|
| "source_quote": "Exact quote from context"
|
| }`,
|
| batch: `{
|
| "items": [
|
| {
|
| "question": "What is the capital of France?",
|
| "context": "The capital of France is Paris.",
|
| "answer": "Paris",
|
| "confidence": 0.9,
|
| "ground_truth": "Paris"
|
| }
|
| ],
|
| "task_id": "task_1_factual_grounding"
|
| }`,
|
| baseline: `{
|
| "steps_per_task": 5,
|
| "seed": 42
|
| }`
|
| };
|
|
|
| function switchTab(tab) {
|
| currentTab = tab;
|
| document.querySelectorAll('.playground-tab').forEach(t => {
|
| t.classList.toggle('active', t.textContent.toLowerCase().includes(tab));
|
| });
|
| document.getElementById('request-body').value = placeholders[tab];
|
| document.getElementById('result-box').innerHTML = '<span style="color: var(--text-muted);">// Response will appear here...</span>';
|
| document.getElementById('result-box').className = 'result-box';
|
| }
|
|
|
| async function sendRequest() {
|
| const body = document.getElementById('request-body').value;
|
| const resultBox = document.getElementById('result-box');
|
|
|
| try {
|
| resultBox.innerHTML = '<span style="color: var(--accent-2);">⏳ Sending request...</span>';
|
|
|
| const response = await fetch(endpoints[currentTab], {
|
| method: 'POST',
|
| headers: { 'Content-Type': 'application/json' },
|
| body: body
|
| });
|
|
|
| const data = await response.json();
|
| resultBox.className = 'result-box success';
|
| resultBox.textContent = JSON.stringify(data, null, 2);
|
| } catch (error) {
|
| resultBox.className = 'result-box error';
|
| resultBox.textContent = 'Error: ' + error.message;
|
| }
|
| }
|
|
|
| function clearAll() {
|
| document.getElementById('request-body').value = placeholders[currentTab];
|
| document.getElementById('result-box').innerHTML = '<span style="color: var(--text-muted);">// Response will appear here...</span>';
|
| document.getElementById('result-box').className = 'result-box';
|
| }
|
|
|
| // ════════════════════���══════════════════════════════════════════════════════════
|
| // ANIMATED COUNTERS
|
| // ═══════════════════════════════════════════════════════════════════════════════
|
|
|
| function animateCounters() {
|
| const counters = document.querySelectorAll('.stat-value[data-count]');
|
| counters.forEach(counter => {
|
| const target = parseInt(counter.getAttribute('data-count'));
|
| const duration = 2000;
|
| const start = performance.now();
|
|
|
| function update(currentTime) {
|
| const elapsed = currentTime - start;
|
| const progress = Math.min(elapsed / duration, 1);
|
| const easeOut = 1 - Math.pow(1 - progress, 3);
|
| const current = Math.floor(easeOut * target);
|
|
|
| counter.textContent = current.toLocaleString();
|
|
|
| if (progress < 1) {
|
| requestAnimationFrame(update);
|
| } else {
|
| counter.textContent = target >= 1000000 ? '1M+' : target.toLocaleString();
|
| }
|
| }
|
|
|
| requestAnimationFrame(update);
|
| });
|
| }
|
|
|
| // Intersection Observer for counter animation
|
| const statsObserver = new IntersectionObserver((entries) => {
|
| entries.forEach(entry => {
|
| if (entry.isIntersecting) {
|
| animateCounters();
|
| statsObserver.disconnect();
|
| }
|
| });
|
| }, { threshold: 0.5 });
|
|
|
| const statsContainer = document.querySelector('.stats-container');
|
| if (statsContainer) {
|
| statsObserver.observe(statsContainer);
|
| }
|
|
|
| // Smooth scroll for navigation
|
| document.querySelectorAll('a[href^="#"]').forEach(anchor => {
|
| anchor.addEventListener('click', function(e) {
|
| e.preventDefault();
|
| const target = document.querySelector(this.getAttribute('href'));
|
| if (target) {
|
| target.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| }
|
| });
|
| });
|
| </script>
|
| </body>
|
| </html>
|
| """
|
|
|
|
|
|
|
|
|
|
|
| _default_env: Optional[HallucinationEnvironment] = None
|
| _env_loading = False
|
| _env_lock = threading.Lock()
|
|
|
| def _get_default_env() -> HallucinationEnvironment:
|
| global _default_env, _env_loading
|
| if _default_env is not None:
|
| return _default_env
|
| with _env_lock:
|
| if _default_env is not None:
|
| return _default_env
|
| _env_loading = True
|
| try:
|
| logger.info("Creating HallucinationEnvironment...")
|
| _default_env = HallucinationEnvironment()
|
| logger.info(f"Environment ready — {_default_env.dataset_loader.get_total_examples():,} examples loaded.")
|
| return _default_env
|
| except Exception as e:
|
| logger.error(f"Failed to create environment: {e}")
|
|
|
| from dataset_loader import DatasetLoader
|
| class MinimalEnv:
|
| def __init__(self):
|
| self.dataset_loader = DatasetLoader()
|
| self.dataset_loader.examples = []
|
| def reset(self, **kwargs):
|
| return type('Obs', (), {'question': 'Placeholder', 'context': 'Context', 'reward': 0.0, 'done': False, 'info': {}})()
|
| def step(self, action):
|
| return type('Obs', (), {'reward': 0.0, 'done': False, 'is_hallucination': False, 'info': {}})()
|
| def state(self): return {}
|
| def close(self): pass
|
| _default_env = MinimalEnv()
|
| return _default_env
|
| finally:
|
| _env_loading = False
|
|
|
| @asynccontextmanager
|
| async def lifespan(app: FastAPI):
|
| global _default_env
|
|
|
| def preload_models():
|
| try:
|
| logger.info("Preloading ML models...")
|
| from sentence_transformers import SentenceTransformer, CrossEncoder
|
| SentenceTransformer('all-MiniLM-L6-v2')
|
| CrossEncoder('cross-encoder/nli-deberta-v3-small')
|
| from rouge_score import rouge_scorer
|
| rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
|
| try:
|
| from bert_score import BERTScorer
|
| BERTScorer(model_type='microsoft/deberta-v3-base', lang='en', device='cpu')
|
| except: pass
|
| logger.info("All ML models preloaded!")
|
| except Exception as e:
|
| logger.error(f"Model preload failed: {e}")
|
|
|
| threading.Thread(target=preload_models, daemon=True).start()
|
|
|
| def background_load():
|
| try:
|
| logger.info("Background dataset loading...")
|
| env = _get_default_env()
|
| logger.info(f"Loaded {env.dataset_loader.get_total_examples():,} examples.")
|
| except Exception as e:
|
| logger.error(f"Background loading failed: {e}")
|
|
|
| threading.Thread(target=background_load, daemon=True).start()
|
| yield
|
| if _default_env:
|
| try: _default_env.close()
|
| except: pass
|
|
|
| app = FastAPI(
|
| lifespan=lifespan,
|
| title="HallucinationGuard-Env",
|
| version="4.2.0",
|
| docs_url="/swagger",
|
| redoc_url="/redoc",
|
| )
|
|
|
| app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
|
|
|
| _sessions: Dict[str, HallucinationEnvironment] = {}
|
| import json as _json
|
| _LEADERBOARD_FILE = "/tmp/hallucination_guard_leaderboard.json"
|
|
|
| def _load_leaderboard():
|
| if os.path.exists(_LEADERBOARD_FILE):
|
| try: return _json.load(open(_LEADERBOARD_FILE))
|
| except: pass
|
| return {}
|
|
|
| def _save_leaderboard(lb):
|
| try: _json.dump(lb, open(_LEADERBOARD_FILE, "w"), indent=2)
|
| except: pass
|
|
|
| _leaderboard: Dict[str, Dict[str, Any]] = _load_leaderboard()
|
|
|
| def _safe_dict(obj):
|
| if hasattr(obj, 'model_dump'): return _safe_dict(obj.model_dump())
|
| if hasattr(obj, 'dict'): return _safe_dict(obj.dict())
|
| if dataclasses.is_dataclass(obj): return {f.name: _safe_dict(getattr(obj, f.name)) for f in dataclasses.fields(obj)}
|
| if isinstance(obj, enum.Enum): return obj.value
|
| if isinstance(obj, dict): return {k: _safe_dict(v) for k, v in obj.items()}
|
| if isinstance(obj, list): return [_safe_dict(i) for i in obj]
|
| if isinstance(obj, (str, int, float, bool, type(None))): return obj
|
| return str(obj)
|
|
|
|
|
|
|
|
|
|
|
| @app.get("/", include_in_schema=False, response_class=HTMLResponse)
|
| async def root(): return STUNNING_DOCS_HTML
|
|
|
| @app.get("/docs", include_in_schema=False, response_class=HTMLResponse)
|
| async def docs(): return STUNNING_DOCS_HTML
|
|
|
| @app.post("/reset", tags=["Environment"])
|
| async def reset(body: Dict[str, Any] = {}):
|
| try:
|
| env = _get_default_env()
|
| obs = env.reset(**{k: v for k, v in body.items() if k in ("seed", "episode_id", "difficulty")})
|
| return JSONResponse(content=_safe_dict(obs))
|
| except Exception as e:
|
| import traceback
|
| logger.error(f"Reset error: {e}\n{traceback.format_exc()}")
|
| raise HTTPException(500, str(e))
|
|
|
| @app.post("/step", tags=["Environment"])
|
| async def step(action_data: Dict[str, Any]):
|
| try:
|
| env = _get_default_env()
|
| valid = set(HallucinationAction.model_fields.keys()) if hasattr(HallucinationAction, 'model_fields') else set(HallucinationAction.__fields__.keys())
|
| action = HallucinationAction(**{k: v for k, v in action_data.items() if k in valid})
|
| return JSONResponse(content=_safe_dict(env.step(action)))
|
| except Exception as e:
|
| raise HTTPException(500, str(e))
|
|
|
| @app.get("/state", tags=["Environment"])
|
| async def get_state():
|
| try:
|
| return JSONResponse(content=_safe_dict(_get_default_env().state()))
|
| except Exception as e:
|
| raise HTTPException(500, str(e))
|
|
|
| @app.get("/tasks", tags=["OpenEnv"])
|
| async def list_tasks():
|
| ordered = ["task_1_factual_grounding", "task_2_multi_hop_synthesis", "task_3_adversarial_resistance"]
|
| return {"tasks": [ALL_TASKS[t].to_dict() for t in ordered if t in ALL_TASKS], "action_schema": ACTION_SCHEMA}
|
|
|
| @app.post("/grader", tags=["OpenEnv"])
|
| async def grade_episode(body: Dict[str, Any]):
|
| task_id = body.get("task_id")
|
| if not task_id: raise HTTPException(422, "'task_id' required")
|
| task = get_task(task_id)
|
| if not task: raise HTTPException(404, f"task_id '{task_id}' not found")
|
| rewards, infos = body.get("step_rewards", []), body.get("step_infos", [])
|
| if not infos and rewards: return {"task_id": task_id, "score": round(sum(rewards)/len(rewards), 4)}
|
| return compute_task_score(task, rewards, infos)
|
|
|
| @app.post("/baseline", tags=["OpenEnv"])
|
| async def run_baseline(body: Dict[str, Any] = {}):
|
| steps = max(3, min(10, int(body.get("steps_per_task", 5))))
|
| seed = int(body.get("seed", 42))
|
| results = []
|
| for task_id, diff in [("task_1_factual_grounding","beginner"),("task_2_multi_hop_synthesis","intermediate"),("task_3_adversarial_resistance","advanced")]:
|
| task = get_task(task_id)
|
| if not task: continue
|
| sid = f"bl_{task_id}_{seed}"
|
| if sid in _sessions: _sessions[sid].close()
|
| _sessions[sid] = HallucinationEnvironment(session_id=sid)
|
| obs = _safe_dict(_sessions[sid].reset(seed=seed, difficulty=diff))
|
| rewards, infos = [], []
|
| for _ in range(steps):
|
| if obs.get("done"): break
|
| ctx = obs.get("context", "")
|
| action = HallucinationAction(answer=ctx[:100], confidence=0.6, source_quote=ctx[:80])
|
| obs = _safe_dict(_sessions[sid].step(action))
|
| rewards.append(float(obs.get("reward") or 0))
|
| infos.append({"correctness": obs.get("grounding_score", 0), "is_hallucination": obs.get("is_hallucination", False)})
|
| results.append(compute_task_score(task, rewards, infos))
|
| try: _sessions[sid].close(); del _sessions[sid]
|
| except: pass
|
| return {"tasks": results, "summary": {"overall_score": round(sum(r["score"] for r in results)/max(len(results),1), 4)}}
|
|
|
| @app.post("/batch/evaluate", tags=["Evaluation"])
|
| async def batch_evaluate(body: Dict[str, Any]):
|
| items = body.get("items", [])
|
| if not items: raise HTTPException(422, "'items' required")
|
| from server.grader import calculate_reward
|
| results = []
|
| for i, item in enumerate(items):
|
| r, info = calculate_reward(item.get("answer",""), item.get("confidence",0.5), item.get("source_quote",""), item.get("context",""), item.get("ground_truth",""))
|
| results.append({"index": i, "reward": round(r,4), "is_hallucination": info.get("is_hallucination", False)})
|
| return {"total_items": len(results), "results": results}
|
|
|
| @app.get("/leaderboard", tags=["Leaderboard"])
|
| async def leaderboard():
|
| if not _leaderboard: return {"leaderboard": [], "message": "No submissions"}
|
| ranked = sorted(_leaderboard.values(), key=lambda x: x.get("avg_reward",0), reverse=True)
|
| for i, e in enumerate(ranked): e["rank"] = i+1
|
| return {"leaderboard": ranked}
|
|
|
| @app.post("/leaderboard/submit", tags=["Leaderboard"])
|
| async def submit_leaderboard(data: Dict[str, Any]):
|
| required = ["model_name", "avg_reward", "avg_accuracy", "hallucination_rate", "total_episodes", "total_steps"]
|
| if missing := [f for f in required if f not in data]: raise HTTPException(422, f"Missing: {missing}")
|
| _leaderboard[data["model_name"]] = {**data, "submitted_at": time.time()}
|
| _save_leaderboard(_leaderboard)
|
| return {"status": "submitted", "model_name": data["model_name"]}
|
|
|
| @app.get("/health", tags=["Info"])
|
| async def health(): return {"status": "healthy", "version": "4.2.0"}
|
|
|
| @app.get("/metadata", tags=["OpenEnv"])
|
| async def metadata(): return {"name": "hallucination-guard-env", "version": "4.2.0", "license": "MIT"}
|
|
|
| @app.get("/schema", tags=["OpenEnv"])
|
| async def schema(): return {"action": {"type": "object", "required": ["answer"]}, "observation": {"type": "object"}}
|
|
|
| @app.get("/datasets", tags=["Info"])
|
| async def datasets():
|
| try: return {"total_examples": _get_default_env().dataset_loader.get_total_examples()}
|
| except: return {"total_examples": 0}
|
|
|
| @app.post("/mcp", tags=["OpenEnv"])
|
| async def mcp(body: Dict[str, Any]):
|
| if body.get("method") == "tools/list":
|
| return {"jsonrpc": "2.0", "id": body.get("id",1), "result": {"tools": [{"name": "reset", "inputSchema": {"type": "object"}}, {"name": "step", "inputSchema": {"type": "object"}}]}}
|
| return {"jsonrpc": "2.0", "id": body.get("id",1), "result": {"name": "hallucination-guard-env", "version": "4.2.0"}}
|
|
|
| @app.middleware("http")
|
| async def log_req(request, call_next):
|
| resp = await call_next(request)
|
| logger.info(f"{request.method} {request.url.path} → {resp.status_code}")
|
| return resp
|
|
|
| def main():
|
| import uvicorn
|
| uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|