Spaces:

SamSankar
/

hallucination-guard-env

Running

App Files Files Community

hallucination-guard-env / server /app.py

SamSankar

Upload app.py

9131831 verified 7 days ago

raw

history blame contribute delete

63.6 kB

	"""
	HallucinationGuard-Env v4.2 — Production FastAPI Server with Stunning 3D Documentation

	Features:
	- Animated 3D particle background
	- Floating geometric objects
	- Glassmorphism UI elements
	- Gradient text and buttons
	- Interactive playground with live testing
	- Smooth animations and transitions

	Endpoints:
	Standard : POST /reset POST /step GET /state GET /health
	Session : POST /session/reset POST /session/step DELETE /session
	Leaderboard: GET /leaderboard POST /leaderboard/submit
	OpenEnv : GET /tasks POST /grader POST /baseline

	"""

	import sys, os, uuid, logging, dataclasses, enum, time, threading
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from fastapi import FastAPI, HTTPException, Header, Request
	from fastapi.responses import JSONResponse, RedirectResponse, HTMLResponse
	from fastapi.middleware.cors import CORSMiddleware
	from contextlib import asynccontextmanager
	from typing import Dict, Any, Optional, List

	from models import HallucinationAction, HallucinationObservation, HallucinationState
	from environment import HallucinationEnvironment
	from metrics import get_tracker

	from tasks import (
	ALL_TASKS, get_task, task_id_for_difficulty, compute_task_score, ACTION_SCHEMA,
	)

	logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
	logger = logging.getLogger(__name__)

	# ═══════════════════════════════════════════════════════════════════════════════
	# STUNNING 3D ANIMATED DOCUMENTATION
	# ═══════════════════════════════════════════════════════════════════════════════

	STUNNING_DOCS_HTML = """
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>HallucinationGuard-Env \| Production RL Environment</title>
	<link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;400;500;600;700&family=Fira+Code:wght@400;500&display=swap" rel="stylesheet">
	<script src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js"></script>
	<style>
	:root {
	--bg-deep: #030014;
	--bg-primary: #0a0518;
	--bg-secondary: #120826;
	--glass: rgba(255, 255, 255, 0.03);
	--glass-border: rgba(255, 255, 255, 0.08);
	--text-primary: #ffffff;
	--text-secondary: rgba(255, 255, 255, 0.7);
	--text-muted: rgba(255, 255, 255, 0.4);
	--accent-1: #7c3aed;
	--accent-2: #06b6d4;
	--accent-3: #f43f5e;
	--accent-4: #10b981;
	--gradient-1: linear-gradient(135deg, #7c3aed 0%, #06b6d4 50%, #10b981 100%);
	--gradient-2: linear-gradient(135deg, #f43f5e 0%, #7c3aed 100%);
	--gradient-3: linear-gradient(135deg, #06b6d4 0%, #10b981 100%);
	--glow-1: 0 0 40px rgba(124, 58, 237, 0.3);
	--glow-2: 0 0 60px rgba(6, 182, 212, 0.2);
	}

	* { margin: 0; padding: 0; box-sizing: border-box; }

	body {
	font-family: 'Space Grotesk', sans-serif;
	background: var(--bg-deep);
	color: var(--text-primary);
	overflow-x: hidden;
	min-height: 100vh;
	}

	/* Three.js Canvas Background */
	#bg-canvas {
	position: fixed;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	z-index: 0;
	}

	/* Animated Gradient Orbs */
	.orb {
	position: fixed;
	border-radius: 50%;
	filter: blur(80px);
	opacity: 0.4;
	animation: float 20s ease-in-out infinite;
	z-index: 1;
	pointer-events: none;
	}

	.orb-1 {
	width: 600px;
	height: 600px;
	background: var(--accent-1);
	top: -200px;
	right: -200px;
	animation-delay: 0s;
	}

	.orb-2 {
	width: 500px;
	height: 500px;
	background: var(--accent-2);
	bottom: -150px;
	left: -150px;
	animation-delay: -5s;
	}

	.orb-3 {
	width: 400px;
	height: 400px;
	background: var(--accent-3);
	top: 50%;
	left: 50%;
	transform: translate(-50%, -50%);
	animation-delay: -10s;
	}

	@keyframes float {
	0%, 100% { transform: translate(0, 0) scale(1); }
	25% { transform: translate(50px, -50px) scale(1.1); }
	50% { transform: translate(-30px, 30px) scale(0.9); }
	75% { transform: translate(-50px, -30px) scale(1.05); }
	}

	/* Grid Pattern Overlay */
	.grid-overlay {
	position: fixed;
	top: 0;
	left: 0;
	right: 0;
	bottom: 0;
	background-image:
	linear-gradient(rgba(255,255,255,0.02) 1px, transparent 1px),
	linear-gradient(90deg, rgba(255,255,255,0.02) 1px, transparent 1px);
	background-size: 50px 50px;
	z-index: 2;
	pointer-events: none;
	}

	/* Noise Texture */
	.noise {
	position: fixed;
	top: 0;
	left: 0;
	right: 0;
	bottom: 0;
	background: url("data:image/svg+xml,%3Csvg viewBox='0 0 200 200' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='noiseFilter'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23noiseFilter)'/%3E%3C/svg%3E");
	opacity: 0.03;
	z-index: 3;
	pointer-events: none;
	}

	/* Main Content Container */
	.content {
	position: relative;
	z-index: 10;
	}

	/* Navigation */
	nav {
	position: fixed;
	top: 0;
	left: 0;
	right: 0;
	z-index: 100;
	padding: 20px 40px;
	background: rgba(3, 0, 20, 0.6);
	backdrop-filter: blur(20px);
	border-bottom: 1px solid var(--glass-border);
	display: flex;
	align-items: center;
	justify-content: space-between;
	}

	.logo {
	display: flex;
	align-items: center;
	gap: 14px;
	}

	.logo-icon {
	width: 44px;
	height: 44px;
	background: var(--gradient-1);
	border-radius: 12px;
	display: flex;
	align-items: center;
	justify-content: center;
	font-size: 22px;
	box-shadow: var(--glow-1);
	animation: pulse-glow 3s ease-in-out infinite;
	}

	@keyframes pulse-glow {
	0%, 100% { box-shadow: 0 0 20px rgba(124, 58, 237, 0.4); }
	50% { box-shadow: 0 0 40px rgba(124, 58, 237, 0.6), 0 0 60px rgba(6, 182, 212, 0.3); }
	}

	.logo-text {
	font-size: 20px;
	font-weight: 600;
	background: var(--gradient-1);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	}

	.nav-links {
	display: flex;
	gap: 8px;
	}

	.nav-link {
	padding: 10px 20px;
	border-radius: 10px;
	color: var(--text-secondary);
	text-decoration: none;
	font-size: 14px;
	font-weight: 500;
	transition: all 0.3s ease;
	border: 1px solid transparent;
	}

	.nav-link:hover {
	background: var(--glass);
	border-color: var(--glass-border);
	color: var(--text-primary);
	}

	.nav-link.active {
	background: var(--gradient-1);
	color: white;
	box-shadow: var(--glow-1);
	}

	.nav-btn {
	padding: 10px 24px;
	border-radius: 10px;
	background: var(--gradient-2);
	color: white;
	text-decoration: none;
	font-size: 14px;
	font-weight: 500;
	transition: all 0.3s ease;
	box-shadow: var(--glow-1);
	}

	.nav-btn:hover {
	transform: translateY(-2px);
	box-shadow: 0 0 30px rgba(244, 63, 94, 0.4);
	}

	/* Hero Section */
	.hero {
	min-height: 100vh;
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	text-align: center;
	padding: 120px 40px 80px;
	}

	.hero-badge {
	display: inline-flex;
	align-items: center;
	gap: 10px;
	padding: 8px 20px;
	background: var(--glass);
	border: 1px solid var(--glass-border);
	border-radius: 50px;
	font-size: 13px;
	color: var(--text-secondary);
	margin-bottom: 32px;
	backdrop-filter: blur(10px);
	}

	.badge-dot {
	width: 8px;
	height: 8px;
	background: var(--accent-4);
	border-radius: 50%;
	animation: blink 2s ease-in-out infinite;
	}

	@keyframes blink {
	0%, 100% { opacity: 1; box-shadow: 0 0 10px var(--accent-4); }
	50% { opacity: 0.5; box-shadow: none; }
	}

	.hero h1 {
	font-size: 72px;
	font-weight: 700;
	line-height: 1.1;
	margin-bottom: 24px;
	background: linear-gradient(135deg, #fff 0%, rgba(255,255,255,0.7) 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	animation: fadeInUp 1s ease-out;
	}

	.hero h1 span {
	background: var(--gradient-1);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	}

	@keyframes fadeInUp {
	from { opacity: 0; transform: translateY(30px); }
	to { opacity: 1; transform: translateY(0); }
	}

	.hero-subtitle {
	font-size: 22px;
	color: var(--text-secondary);
	max-width: 700px;
	margin-bottom: 48px;
	line-height: 1.6;
	animation: fadeInUp 1s ease-out 0.2s both;
	}

	.hero-buttons {
	display: flex;
	gap: 20px;
	margin-bottom: 80px;
	animation: fadeInUp 1s ease-out 0.4s both;
	}

	.btn {
	padding: 16px 36px;
	border-radius: 14px;
	font-size: 16px;
	font-weight: 600;
	text-decoration: none;
	display: inline-flex;
	align-items: center;
	gap: 10px;
	transition: all 0.3s ease;
	cursor: pointer;
	border: none;
	}

	.btn-primary {
	background: var(--gradient-1);
	color: white;
	box-shadow: var(--glow-1), var(--glow-2);
	}

	.btn-primary:hover {
	transform: translateY(-3px);
	box-shadow: 0 0 50px rgba(124, 58, 237, 0.5), 0 0 80px rgba(6, 182, 212, 0.3);
	}

	.btn-secondary {
	background: var(--glass);
	color: var(--text-primary);
	border: 1px solid var(--glass-border);
	backdrop-filter: blur(10px);
	}

	.btn-secondary:hover {
	background: rgba(255, 255, 255, 0.08);
	border-color: var(--accent-1);
	transform: translateY(-3px);
	}

	/* Stats Section */
	.stats-container {
	display: flex;
	justify-content: center;
	gap: 60px;
	flex-wrap: wrap;
	animation: fadeInUp 1s ease-out 0.6s both;
	}

	.stat-item {
	text-align: center;
	}

	.stat-value {
	font-size: 52px;
	font-weight: 700;
	background: var(--gradient-1);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	line-height: 1;
	}

	.stat-label {
	font-size: 14px;
	color: var(--text-muted);
	margin-top: 8px;
	text-transform: uppercase;
	letter-spacing: 1px;
	}

	/* Floating Elements */
	.floating-shapes {
	position: fixed;
	top: 0;
	left: 0;
	right: 0;
	bottom: 0;
	pointer-events: none;
	z-index: 5;
	overflow: hidden;
	}

	.shape {
	position: absolute;
	opacity: 0.1;
	animation: shapeFloat 15s ease-in-out infinite;
	}

	.shape-1 { top: 20%; left: 10%; animation-delay: 0s; }
	.shape-2 { top: 60%; left: 80%; animation-delay: -3s; }
	.shape-3 { top: 80%; left: 20%; animation-delay: -6s; }
	.shape-4 { top: 30%; left: 70%; animation-delay: -9s; }
	.shape-5 { top: 70%; left: 50%; animation-delay: -12s; }

	@keyframes shapeFloat {
	0%, 100% { transform: translateY(0) rotate(0deg); }
	50% { transform: translateY(-30px) rotate(180deg); }
	}

	/* Section Container */
	.section {
	padding: 100px 40px;
	max-width: 1400px;
	margin: 0 auto;
	}

	.section-header {
	text-align: center;
	margin-bottom: 60px;
	}

	.section-title {
	font-size: 48px;
	font-weight: 700;
	margin-bottom: 16px;
	background: linear-gradient(135deg, #fff 0%, rgba(255,255,255,0.8) 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	}

	.section-subtitle {
	font-size: 18px;
	color: var(--text-secondary);
	}

	/* Glass Cards */
	.cards-grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(350px, 1fr));
	gap: 24px;
	}

	.card {
	background: var(--glass);
	border: 1px solid var(--glass-border);
	border-radius: 20px;
	padding: 32px;
	backdrop-filter: blur(20px);
	transition: all 0.4s ease;
	position: relative;
	overflow: hidden;
	}

	.card::before {
	content: '';
	position: absolute;
	top: 0;
	left: 0;
	right: 0;
	height: 2px;
	background: var(--gradient-1);
	opacity: 0;
	transition: opacity 0.3s ease;
	}

	.card:hover {
	transform: translateY(-8px);
	border-color: var(--accent-1);
	box-shadow: var(--glow-1), 0 20px 40px rgba(0,0,0,0.3);
	}

	.card:hover::before {
	opacity: 1;
	}

	.card-icon {
	width: 56px;
	height: 56px;
	border-radius: 16px;
	display: flex;
	align-items: center;
	justify-content: center;
	font-size: 28px;
	margin-bottom: 20px;
	position: relative;
	}

	.card-icon.green {
	background: linear-gradient(135deg, rgba(16, 185, 129, 0.2) 0%, rgba(6, 182, 212, 0.2) 100%);
	box-shadow: 0 0 30px rgba(16, 185, 129, 0.2);
	}

	.card-icon.yellow {
	background: linear-gradient(135deg, rgba(251, 191, 36, 0.2) 0%, rgba(249, 115, 22, 0.2) 100%);
	box-shadow: 0 0 30px rgba(251, 191, 36, 0.2);
	}

	.card-icon.red {
	background: linear-gradient(135deg, rgba(244, 63, 94, 0.2) 0%, rgba(124, 58, 237, 0.2) 100%);
	box-shadow: 0 0 30px rgba(244, 63, 94, 0.2);
	}

	.card-title {
	font-size: 22px;
	font-weight: 600;
	margin-bottom: 12px;
	}

	.card-desc {
	color: var(--text-secondary);
	font-size: 15px;
	line-height: 1.6;
	margin-bottom: 20px;
	}

	.card-badge {
	display: inline-block;
	padding: 6px 14px;
	border-radius: 8px;
	font-size: 12px;
	font-weight: 600;
	text-transform: uppercase;
	letter-spacing: 0.5px;
	}

	.badge-beginner {
	background: rgba(16, 185, 129, 0.15);
	color: var(--accent-4);
	border: 1px solid rgba(16, 185, 129, 0.3);
	}

	.badge-intermediate {
	background: rgba(251, 191, 36, 0.15);
	color: #fbbf24;
	border: 1px solid rgba(251, 191, 36, 0.3);
	}

	.badge-advanced {
	background: rgba(244, 63, 94, 0.15);
	color: var(--accent-3);
	border: 1px solid rgba(244, 63, 94, 0.3);
	}

	/* Playground Section */
	.playground {
	background: var(--glass);
	border: 1px solid var(--glass-border);
	border-radius: 24px;
	overflow: hidden;
	backdrop-filter: blur(20px);
	}

	.playground-header {
	display: flex;
	background: rgba(255, 255, 255, 0.02);
	border-bottom: 1px solid var(--glass-border);
	}

	.playground-tab {
	padding: 18px 32px;
	font-size: 14px;
	font-weight: 500;
	color: var(--text-muted);
	cursor: pointer;
	border-bottom: 2px solid transparent;
	transition: all 0.3s ease;
	}

	.playground-tab:hover {
	color: var(--text-secondary);
	background: rgba(255, 255, 255, 0.02);
	}

	.playground-tab.active {
	color: var(--accent-1);
	border-bottom-color: var(--accent-1);
	background: rgba(124, 58, 237, 0.05);
	}

	.playground-body {
	display: grid;
	grid-template-columns: 1fr 1fr;
	min-height: 500px;
	}

	.playground-left, .playground-right {
	padding: 32px;
	}

	.playground-left {
	border-right: 1px solid var(--glass-border);
	}

	.playground-label {
	font-size: 11px;
	font-weight: 600;
	color: var(--text-muted);
	text-transform: uppercase;
	letter-spacing: 1px;
	margin-bottom: 16px;
	display: flex;
	align-items: center;
	gap: 8px;
	}

	.playground-label::before {
	content: '';
	width: 8px;
	height: 8px;
	background: var(--accent-1);
	border-radius: 2px;
	}

	.playground-textarea {
	width: 100%;
	height: 280px;
	background: rgba(0, 0, 0, 0.3);
	border: 1px solid var(--glass-border);
	border-radius: 12px;
	padding: 20px;
	font-family: 'Fira Code', monospace;
	font-size: 13px;
	color: var(--text-primary);
	resize: none;
	outline: none;
	transition: all 0.3s ease;
	}

	.playground-textarea:focus {
	border-color: var(--accent-1);
	box-shadow: 0 0 20px rgba(124, 58, 237, 0.2);
	}

	.btn-group {
	display: flex;
	gap: 16px;
	margin-top: 20px;
	}

	.result-box {
	width: 100%;
	height: 380px;
	background: rgba(0, 0, 0, 0.3);
	border: 1px solid var(--glass-border);
	border-radius: 12px;
	padding: 20px;
	font-family: 'Fira Code', monospace;
	font-size: 12px;
	color: var(--text-secondary);
	white-space: pre-wrap;
	overflow-y: auto;
	position: relative;
	}

	.result-box.success {
	border-color: var(--accent-4);
	box-shadow: 0 0 20px rgba(16, 185, 129, 0.1);
	}

	.result-box.error {
	border-color: var(--accent-3);
	box-shadow: 0 0 20px rgba(244, 63, 94, 0.1);
	}

	/* Endpoints Table */
	.endpoints-container {
	background: var(--glass);
	border: 1px solid var(--glass-border);
	border-radius: 20px;
	overflow: hidden;
	backdrop-filter: blur(20px);
	}

	.endpoint-row {
	display: grid;
	grid-template-columns: 100px 1fr 2fr;
	padding: 20px 32px;
	border-bottom: 1px solid var(--glass-border);
	transition: all 0.3s ease;
	align-items: center;
	}

	.endpoint-row:last-child {
	border-bottom: none;
	}

	.endpoint-row:hover {
	background: rgba(255, 255, 255, 0.02);
	}

	.method-badge {
	display: inline-flex;
	padding: 6px 12px;
	border-radius: 6px;
	font-size: 11px;
	font-weight: 700;
	font-family: 'Fira Code', monospace;
	letter-spacing: 0.5px;
	}

	.method-get {
	background: rgba(16, 185, 129, 0.15);
	color: var(--accent-4);
	border: 1px solid rgba(16, 185, 129, 0.3);
	}

	.method-post {
	background: rgba(124, 58, 237, 0.15);
	color: var(--accent-1);
	border: 1px solid rgba(124, 58, 237, 0.3);
	}

	.method-delete {
	background: rgba(244, 63, 94, 0.15);
	color: var(--accent-3);
	border: 1px solid rgba(244, 63, 94, 0.3);
	}

	.endpoint-path {
	font-family: 'Fira Code', monospace;
	font-size: 14px;
	color: var(--text-primary);
	padding-left: 20px;
	}

	.endpoint-desc {
	color: var(--text-secondary);
	font-size: 14px;
	padding-left: 20px;
	}

	/* Features Grid */
	.features-grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
	gap: 20px;
	margin-top: 40px;
	}

	.feature-item {
	display: flex;
	align-items: flex-start;
	gap: 16px;
	padding: 24px;
	background: var(--glass);
	border: 1px solid var(--glass-border);
	border-radius: 16px;
	transition: all 0.3s ease;
	}

	.feature-item:hover {
	border-color: var(--accent-1);
	transform: translateX(8px);
	}

	.feature-icon {
	width: 40px;
	height: 40px;
	background: var(--gradient-1);
	border-radius: 10px;
	display: flex;
	align-items: center;
	justify-content: center;
	font-size: 18px;
	flex-shrink: 0;
	}

	.feature-text h4 {
	font-size: 16px;
	font-weight: 600;
	margin-bottom: 4px;
	}

	.feature-text p {
	font-size: 13px;
	color: var(--text-secondary);
	}

	/* Footer */
	footer {
	padding: 60px 40px;
	border-top: 1px solid var(--glass-border);
	text-align: center;
	}

	.footer-text {
	color: var(--text-muted);
	font-size: 14px;
	margin-bottom: 20px;
	}

	.footer-links {
	display: flex;
	justify-content: center;
	gap: 32px;
	flex-wrap: wrap;
	}

	.footer-link {
	color: var(--text-secondary);
	text-decoration: none;
	font-size: 14px;
	transition: color 0.3s ease;
	display: flex;
	align-items: center;
	gap: 8px;
	}

	.footer-link:hover {
	color: var(--accent-1);
	}

	/* Responsive */
	@media (max-width: 900px) {
	.hero h1 { font-size: 48px; }
	.playground-body { grid-template-columns: 1fr; }
	.playground-left { border-right: none; border-bottom: 1px solid var(--glass-border); }
	.endpoint-row { grid-template-columns: 1fr; gap: 8px; }
	.nav-links { display: none; }
	nav { padding: 16px 20px; }
	.section { padding: 60px 20px; }
	}

	/* Scrollbar */
	::-webkit-scrollbar { width: 8px; height: 8px; }
	::-webkit-scrollbar-track { background: var(--bg-secondary); }
	::-webkit-scrollbar-thumb { background: var(--glass-border); border-radius: 4px; }
	::-webkit-scrollbar-thumb:hover { background: var(--accent-1); }

	/* Code syntax highlighting */
	.json-key { color: #7c3aed; }
	.json-string { color: #10b981; }
	.json-number { color: #06b6d4; }
	</style>
	</head>
	<body>
	<!-- Three.js Canvas -->
	<canvas id="bg-canvas"></canvas>

	<!-- Animated Orbs -->
	<div class="orb orb-1"></div>
	<div class="orb orb-2"></div>
	<div class="orb orb-3"></div>

	<!-- Grid Overlay -->
	<div class="grid-overlay"></div>

	<!-- Noise Texture -->
	<div class="noise"></div>

	<!-- Floating Shapes -->
	<div class="floating-shapes">
	<svg class="shape shape-1" width="60" height="60" viewBox="0 0 60 60">
	<polygon points="30,0 60,60 0,60" fill="none" stroke="rgba(124,58,237,0.3)" stroke-width="1"/>
	</svg>
	<svg class="shape shape-2" width="80" height="80" viewBox="0 0 80 80">
	<circle cx="40" cy="40" r="38" fill="none" stroke="rgba(6,182,212,0.3)" stroke-width="1"/>
	</svg>
	<svg class="shape shape-3" width="70" height="70" viewBox="0 0 70 70">
	<rect x="5" y="5" width="60" height="60" fill="none" stroke="rgba(244,63,94,0.3)" stroke-width="1" transform="rotate(45 35 35)"/>
	</svg>
	<svg class="shape shape-4" width="50" height="50" viewBox="0 0 50 50">
	<polygon points="25,0 50,25 25,50 0,25" fill="none" stroke="rgba(16,185,129,0.3)" stroke-width="1"/>
	</svg>
	<svg class="shape shape-5" width="60" height="60" viewBox="0 0 60 60">
	<polygon points="30,0 60,30 30,60 0,30" fill="none" stroke="rgba(124,58,237,0.3)" stroke-width="1"/>
	</svg>
	</div>

	<!-- Content -->
	<div class="content">
	<!-- Navigation -->
	<nav>
	<div class="logo">
	<div class="logo-icon">🛡️</div>
	<span class="logo-text">HallucinationGuard</span>
	</div>
	<div class="nav-links">
	<a href="#overview" class="nav-link">Overview</a>
	<a href="#tasks" class="nav-link">Tasks</a>
	<a href="#playground" class="nav-link active">Playground</a>
	<a href="#endpoints" class="nav-link">Endpoints</a>
	</div>
	<a href="/redoc" class="nav-btn">API Docs →</a>
	</nav>

	<!-- Hero Section -->
	<section class="hero">
	<div class="hero-badge">
	<span class="badge-dot"></span>
	<span>v4.2.0 • OpenEnv Compatible • Production Ready</span>
	</div>
	<h1>Train AI to Stop<br/><span>Hallucinating</span></h1>
	<p class="hero-subtitle">The production-grade RL environment for training and evaluating LLMs on hallucination avoidance. Built on 1M+ real-world examples across 38 benchmark datasets.</p>
	<div class="hero-buttons">
	<a href="#playground" class="btn btn-primary">
	<span>⚡</span> Try Interactive Demo
	</a>
	<a href="/redoc" class="btn btn-secondary">
	<span>📖</span> Full API Reference
	</a>
	</div>
	<div class="stats-container">
	<div class="stat-item">
	<div class="stat-value" data-count="1090163">0</div>
	<div class="stat-label">Examples</div>
	</div>
	<div class="stat-item">
	<div class="stat-value" data-count="38">0</div>
	<div class="stat-label">Datasets</div>
	</div>
	<div class="stat-item">
	<div class="stat-value" data-count="9">0</div>
	<div class="stat-label">Reward Components</div>
	</div>
	<div class="stat-item">
	<div class="stat-value" data-count="3">0</div>
	<div class="stat-label">Task Levels</div>
	</div>
	</div>
	</section>

	<!-- Features Section -->
	<section class="section" id="overview">
	<div class="section-header">
	<h2 class="section-title">Why HallucinationGuard?</h2>
	<p class="section-subtitle">Research-grade evaluation for grounded AI systems</p>
	</div>
	<div class="features-grid">
	<div class="feature-item">
	<div class="feature-icon">🎯</div>
	<div class="feature-text">
	<h4>Factual Grounding</h4>
	<p>Rewards answers derived strictly from provided context</p>
	</div>
	</div>
	<div class="feature-item">
	<div class="feature-icon">🔬</div>
	<div class="feature-text">
	<h4>9-Component Reward</h4>
	<p>Factual correctness, grounding, calibration, NLI, BERTScore...</p>
	</div>
	</div>
	<div class="feature-item">
	<div class="feature-icon">📊</div>
	<div class="feature-text">
	<h4>Real-World Datasets</h4>
	<p>SQuAD, HotpotQA, HaluEval, TruthfulQA, FEVER, and 33 more</p>
	</div>
	</div>
	<div class="feature-item">
	<div class="feature-icon">⚡</div>
	<div class="feature-text">
	<h4>Fast API</h4>
	<p>RESTful endpoints with OpenEnv compliance</p>
	</div>
	</div>
	<div class="feature-item">
	<div class="feature-icon">🧠</div>
	<div class="feature-text">
	<h4>NLI-Powered</h4>
	<p>Detects entailment and contradiction semantically</p>
	</div>
	</div>
	<div class="feature-item">
	<div class="feature-icon">🏆</div>
	<div class="feature-text">
	<h4>Leaderboard</h4>
	<p>Compare model performance across tasks</p>
	</div>
	</div>
	</div>
	</section>

	<!-- Tasks Section -->
	<section class="section" id="tasks">
	<div class="section-header">
	<h2 class="section-title">Three Difficulty Levels</h2>
	<p class="section-subtitle">Progressive curriculum from basic to adversarial</p>
	</div>
	<div class="cards-grid">
	<div class="card">
	<div class="card-icon green">🟢</div>
	<h3 class="card-title">Task 1: Factual Grounding</h3>
	<p class="card-desc">Answer straightforward factual questions from a short context passage. Single-hop retrieval with unambiguous ground truth. Perfect for initial training.</p>
	<span class="card-badge badge-beginner">Beginner</span>
	<div style="margin-top: 16px; font-size: 12px; color: var(--text-muted);">Datasets: SQuAD, BoolQ, ARC, OpenBookQA</div>
	</div>
	<div class="card">
	<div class="card-icon yellow">🟡</div>
	<h3 class="card-title">Task 2: Multi-Hop Synthesis</h3>
	<p class="card-desc">Synthesize evidence from multiple sentences. Connect disparate facts without fabricating bridging information. Requires reasoning chains.</p>
	<span class="card-badge badge-intermediate">Intermediate</span>
	<div style="margin-top: 16px; font-size: 12px; color: var(--text-muted);">Datasets: HotpotQA, CoQA, NQ-Open, MS-MARCO</div>
	</div>
	<div class="card">
	<div class="card-icon red">🔴</div>
	<h3 class="card-title">Task 3: Adversarial Resistance</h3>
	<p class="card-desc">Resist adversarial prompts designed to elicit hallucinations. Many questions are unanswerable — confident refusals are rewarded.</p>
	<span class="card-badge badge-advanced">Advanced</span>
	<div style="margin-top: 16px; font-size: 12px; color: var(--text-muted);">Datasets: HaluEval, TruthfulQA, FEVER, AdversarialQA</div>
	</div>
	</div>
	</section>

	<!-- Playground Section -->
	<section class="section" id="playground">
	<div class="section-header">
	<h2 class="section-title">Interactive Playground</h2>
	<p class="section-subtitle">Test the API directly in your browser</p>
	</div>
	<div class="playground">
	<div class="playground-header">
	<div class="playground-tab active" onclick="switchTab('reset')">🔄 Reset Episode</div>
	<div class="playground-tab" onclick="switchTab('step')">📝 Submit Answer</div>
	<div class="playground-tab" onclick="switchTab('batch')">📦 Batch Evaluate</div>
	<div class="playground-tab" onclick="switchTab('baseline')">🤖 Run Baseline</div>
	</div>
	<div class="playground-body">
	<div class="playground-left">
	<div class="playground-label">REQUEST BODY</div>
	<textarea id="request-body" class="playground-textarea" placeholder="Enter JSON request...">{
	"difficulty": "beginner",
	"seed": 42
	}</textarea>
	<div class="btn-group">
	<button class="btn btn-primary" onclick="sendRequest()">
	▶ Send Request
	</button>
	<button class="btn btn-secondary" onclick="clearAll()">
	Clear
	</button>
	</div>
	</div>
	<div class="playground-right">
	<div class="playground-label">RESPONSE</div>
	<div id="result-box" class="result-box">
	<span style="color: var(--text-muted);">// Response will appear here...
	//
	// Click "Send Request" to test the API</span>
	</div>
	</div>
	</div>
	</div>
	</section>

	<!-- Endpoints Section -->
	<section class="section" id="endpoints">
	<div class="section-header">
	<h2 class="section-title">All Endpoints</h2>
	<p class="section-subtitle">Complete API reference at a glance</p>
	</div>
	<div class="endpoints-container">
	<div class="endpoint-row">
	<span class="method-badge method-post">POST</span>
	<span class="endpoint-path">/reset</span>
	<span class="endpoint-desc">Start a new episode with optional difficulty and seed</span>
	</div>
	<div class="endpoint-row">
	<span class="method-badge method-post">POST</span>
	<span class="endpoint-path">/step</span>
	<span class="endpoint-desc">Submit an answer with confidence and source citation</span>
	</div>
	<div class="endpoint-row">
	<span class="method-badge method-get">GET</span>
	<span class="endpoint-path">/state</span>
	<span class="endpoint-desc">Get current episode state, accuracy, and skill rating</span>
	</div>
	<div class="endpoint-row">
	<span class="method-badge method-get">GET</span>
	<span class="endpoint-path">/tasks</span>
	<span class="endpoint-desc">List all 3 tasks with complete action schema</span>
	</div>
	<div class="endpoint-row">
	<span class="method-badge method-post">POST</span>
	<span class="endpoint-path">/grader</span>
	<span class="endpoint-desc">Score a completed episode (returns 0.0–1.0)</span>
	</div>
	<div class="endpoint-row">
	<span class="method-badge method-post">POST</span>
	<span class="endpoint-path">/baseline</span>
	<span class="endpoint-desc">Run built-in heuristic baseline agent</span>
	</div>
	<div class="endpoint-row">
	<span class="method-badge method-post">POST</span>
	<span class="endpoint-path">/batch/evaluate</span>
	<span class="endpoint-desc">Evaluate multiple Q&A pairs in one request</span>
	</div>
	<div class="endpoint-row">
	<span class="method-badge method-get">GET</span>
	<span class="endpoint-path">/leaderboard</span>
	<span class="endpoint-desc">View ranked model performance</span>
	</div>
	<div class="endpoint-row">
	<span class="method-badge method-get">GET</span>
	<span class="endpoint-path">/health</span>
	<span class="endpoint-desc">Service health check</span>
	</div>
	<div class="endpoint-row">
	<span class="method-badge method-get">GET</span>
	<span class="endpoint-path">/datasets</span>
	<span class="endpoint-desc">Dataset statistics and distribution</span>
	</div>
	</div>
	</section>

	<!-- Footer -->
	<footer>
	<p class="footer-text">HallucinationGuard-Env — OpenEnv RL Environment for Hallucination Detection</p>
	<div class="footer-links">
	<a href="https://huggingface.co/spaces/SamSankar/hallucination-guard-env" class="footer-link">🤗 HuggingFace Space</a>
	<a href="https://pypi.org/project/openenv-halluguard/" class="footer-link">📦 PyPI Package</a>
	<a href="/redoc" class="footer-link">📖 API Reference</a>
	<a href="https://github.com/meta-pytorch/OpenEnv" class="footer-link">🔗 OpenEnv</a>
	</div>
	</footer>
	</div>

	<script>
	// ═══════════════════════════════════════════════════════════════════════════════
	// THREE.JS 3D BACKGROUND
	// ═══════════════════════════════════════════════════════════════════════════════

	const canvas = document.getElementById('bg-canvas');
	const renderer = new THREE.WebGLRenderer({ canvas, antialias: true, alpha: true });
	renderer.setSize(window.innerWidth, window.innerHeight);
	renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));

	const scene = new THREE.Scene();
	const camera = new THREE.PerspectiveCamera(75, window.innerWidth / window.innerHeight, 0.1, 1000);
	camera.position.z = 30;

	// Particle system
	const particlesGeometry = new THREE.BufferGeometry();
	const particlesCount = 2000;
	const posArray = new Float32Array(particlesCount * 3);

	for(let i = 0; i < particlesCount * 3; i++) {
	posArray[i] = (Math.random() - 0.5) * 100;
	}

	particlesGeometry.setAttribute('position', new THREE.BufferAttribute(posArray, 3));

	const particlesMaterial = new THREE.PointsMaterial({
	size: 0.1,
	color: 0x7c3aed,
	transparent: true,
	opacity: 0.6,
	blending: THREE.AdditiveBlending
	});

	const particlesMesh = new THREE.Points(particlesGeometry, particlesMaterial);
	scene.add(particlesMesh);

	// Floating geometric objects
	const geometries = [
	new THREE.IcosahedronGeometry(2, 0),
	new THREE.OctahedronGeometry(2, 0),
	new THREE.TetrahedronGeometry(2, 0),
	new THREE.TorusGeometry(1.5, 0.5, 8, 16),
	];

	const objects = [];
	const colors = [0x7c3aed, 0x06b6d4, 0xf43f5e, 0x10b981];

	geometries.forEach((geo, i) => {
	const material = new THREE.MeshBasicMaterial({
	color: colors[i],
	wireframe: true,
	transparent: true,
	opacity: 0.3
	});
	const mesh = new THREE.Mesh(geo, material);
	mesh.position.set(
	(Math.random() - 0.5) * 40,
	(Math.random() - 0.5) * 40,
	(Math.random() - 0.5) * 20 - 10
	);
	mesh.userData = {
	rotationSpeed: { x: Math.random() * 0.01, y: Math.random() * 0.01 },
	floatSpeed: Math.random() * 0.02 + 0.01,
	floatOffset: Math.random() * Math.PI * 2
	};
	objects.push(mesh);
	scene.add(mesh);
	});

	// Mouse movement effect
	let mouseX = 0, mouseY = 0;
	document.addEventListener('mousemove', (e) => {
	mouseX = (e.clientX / window.innerWidth) * 2 - 1;
	mouseY = -(e.clientY / window.innerHeight) * 2 + 1;
	});

	// Animation loop
	let time = 0;
	function animate() {
	requestAnimationFrame(animate);
	time += 0.01;

	particlesMesh.rotation.y += 0.001;
	particlesMesh.rotation.x += 0.0005;

	// Camera follows mouse slightly
	camera.position.x += (mouseX * 3 - camera.position.x) * 0.02;
	camera.position.y += (mouseY * 3 - camera.position.y) * 0.02;
	camera.lookAt(scene.position);

	// Animate floating objects
	objects.forEach((obj, i) => {
	obj.rotation.x += obj.userData.rotationSpeed.x;
	obj.rotation.y += obj.userData.rotationSpeed.y;
	obj.position.y += Math.sin(time + obj.userData.floatOffset) * 0.02;
	});

	renderer.render(scene, camera);
	}
	animate();

	// Resize handler
	window.addEventListener('resize', () => {
	camera.aspect = window.innerWidth / window.innerHeight;
	camera.updateProjectionMatrix();
	renderer.setSize(window.innerWidth, window.innerHeight);
	});

	// ═══════════════════════════════════════════════════════════════════════════════
	// PLAYGROUND FUNCTIONALITY
	// ═══════════════════════════════════════════════════════════════════════════════

	let currentTab = 'reset';
	const endpoints = {
	reset: '/reset',
	step: '/step',
	batch: '/batch/evaluate',
	baseline: '/baseline'
	};

	const placeholders = {
	reset: `{
	"difficulty": "beginner",
	"seed": 42
	}`,
	step: `{
	"answer": "Your answer derived from context",
	"confidence": 0.85,
	"source_quote": "Exact quote from context"
	}`,
	batch: `{
	"items": [
	{
	"question": "What is the capital of France?",
	"context": "The capital of France is Paris.",
	"answer": "Paris",
	"confidence": 0.9,
	"ground_truth": "Paris"
	}
	],
	"task_id": "task_1_factual_grounding"
	}`,
	baseline: `{
	"steps_per_task": 5,
	"seed": 42
	}`
	};

	function switchTab(tab) {
	currentTab = tab;
	document.querySelectorAll('.playground-tab').forEach(t => {
	t.classList.toggle('active', t.textContent.toLowerCase().includes(tab));
	});
	document.getElementById('request-body').value = placeholders[tab];
	document.getElementById('result-box').innerHTML = '<span style="color: var(--text-muted);">// Response will appear here...</span>';
	document.getElementById('result-box').className = 'result-box';
	}

	async function sendRequest() {
	const body = document.getElementById('request-body').value;
	const resultBox = document.getElementById('result-box');

	try {
	resultBox.innerHTML = '<span style="color: var(--accent-2);">⏳ Sending request...</span>';

	const response = await fetch(endpoints[currentTab], {
	method: 'POST',
	headers: { 'Content-Type': 'application/json' },
	body: body
	});

	const data = await response.json();
	resultBox.className = 'result-box success';
	resultBox.textContent = JSON.stringify(data, null, 2);
	} catch (error) {
	resultBox.className = 'result-box error';
	resultBox.textContent = 'Error: ' + error.message;
	}
	}

	function clearAll() {
	document.getElementById('request-body').value = placeholders[currentTab];
	document.getElementById('result-box').innerHTML = '<span style="color: var(--text-muted);">// Response will appear here...</span>';
	document.getElementById('result-box').className = 'result-box';
	}

	// ════════════════════��══════════════════════════════════════════════════════════
	// ANIMATED COUNTERS
	// ═══════════════════════════════════════════════════════════════════════════════

	function animateCounters() {
	const counters = document.querySelectorAll('.stat-value[data-count]');
	counters.forEach(counter => {
	const target = parseInt(counter.getAttribute('data-count'));
	const duration = 2000;
	const start = performance.now();

	function update(currentTime) {
	const elapsed = currentTime - start;
	const progress = Math.min(elapsed / duration, 1);
	const easeOut = 1 - Math.pow(1 - progress, 3);
	const current = Math.floor(easeOut * target);

	counter.textContent = current.toLocaleString();

	if (progress < 1) {
	requestAnimationFrame(update);
	} else {
	counter.textContent = target >= 1000000 ? '1M+' : target.toLocaleString();
	}
	}

	requestAnimationFrame(update);
	});
	}

	// Intersection Observer for counter animation
	const statsObserver = new IntersectionObserver((entries) => {
	entries.forEach(entry => {
	if (entry.isIntersecting) {
	animateCounters();
	statsObserver.disconnect();
	}
	});
	}, { threshold: 0.5 });

	const statsContainer = document.querySelector('.stats-container');
	if (statsContainer) {
	statsObserver.observe(statsContainer);
	}

	// Smooth scroll for navigation
	document.querySelectorAll('a[href^="#"]').forEach(anchor => {
	anchor.addEventListener('click', function(e) {
	e.preventDefault();
	const target = document.querySelector(this.getAttribute('href'));
	if (target) {
	target.scrollIntoView({ behavior: 'smooth', block: 'start' });
	}
	});
	});
	</script>
	</body>
	</html>
	"""

	# ═══════════════════════════════════════════════════════════════════════════════
	# FASTAPI APP
	# ═══════════════════════════════════════════════════════════════════════════════

	_default_env: Optional[HallucinationEnvironment] = None
	_env_loading = False
	_env_lock = threading.Lock()

	def _get_default_env() -> HallucinationEnvironment:
	global _default_env, _env_loading
	if _default_env is not None:
	return _default_env
	with _env_lock:
	if _default_env is not None:
	return _default_env
	_env_loading = True
	try:
	logger.info("Creating HallucinationEnvironment...")
	_default_env = HallucinationEnvironment()
	logger.info(f"Environment ready — {_default_env.dataset_loader.get_total_examples():,} examples loaded.")
	return _default_env
	except Exception as e:
	logger.error(f"Failed to create environment: {e}")
	# Minimal fallback environment
	from dataset_loader import DatasetLoader
	class MinimalEnv:
	def __init__(self):
	self.dataset_loader = DatasetLoader()
	self.dataset_loader.examples = []
	def reset(self, **kwargs):
	return type('Obs', (), {'question': 'Placeholder', 'context': 'Context', 'reward': 0.0, 'done': False, 'info': {}})()
	def step(self, action):
	return type('Obs', (), {'reward': 0.0, 'done': False, 'is_hallucination': False, 'info': {}})()
	def state(self): return {}
	def close(self): pass
	_default_env = MinimalEnv()
	return _default_env
	finally:
	_env_loading = False

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	global _default_env

	def preload_models():
	try:
	logger.info("Preloading ML models...")
	from sentence_transformers import SentenceTransformer, CrossEncoder
	SentenceTransformer('all-MiniLM-L6-v2')
	CrossEncoder('cross-encoder/nli-deberta-v3-small')
	from rouge_score import rouge_scorer
	rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
	try:
	from bert_score import BERTScorer
	BERTScorer(model_type='microsoft/deberta-v3-base', lang='en', device='cpu')
	except: pass
	logger.info("All ML models preloaded!")
	except Exception as e:
	logger.error(f"Model preload failed: {e}")

	threading.Thread(target=preload_models, daemon=True).start()

	def background_load():
	try:
	logger.info("Background dataset loading...")
	env = _get_default_env()
	logger.info(f"Loaded {env.dataset_loader.get_total_examples():,} examples.")
	except Exception as e:
	logger.error(f"Background loading failed: {e}")

	threading.Thread(target=background_load, daemon=True).start()
	yield
	if _default_env:
	try: _default_env.close()
	except: pass

	app = FastAPI(
	lifespan=lifespan,
	title="HallucinationGuard-Env",
	version="4.2.0",
	docs_url="/swagger",
	redoc_url="/redoc",
	)

	app.add_middleware(CORSMiddleware, allow_origins=[""], allow_methods=[""], allow_headers=["*"])

	_sessions: Dict[str, HallucinationEnvironment] = {}
	import json as _json
	_LEADERBOARD_FILE = "/tmp/hallucination_guard_leaderboard.json"

	def _load_leaderboard():
	if os.path.exists(_LEADERBOARD_FILE):
	try: return _json.load(open(_LEADERBOARD_FILE))
	except: pass
	return {}

	def _save_leaderboard(lb):
	try: _json.dump(lb, open(_LEADERBOARD_FILE, "w"), indent=2)
	except: pass

	_leaderboard: Dict[str, Dict[str, Any]] = _load_leaderboard()

	def _safe_dict(obj):
	if hasattr(obj, 'model_dump'): return _safe_dict(obj.model_dump())
	if hasattr(obj, 'dict'): return _safe_dict(obj.dict())
	if dataclasses.is_dataclass(obj): return {f.name: _safe_dict(getattr(obj, f.name)) for f in dataclasses.fields(obj)}
	if isinstance(obj, enum.Enum): return obj.value
	if isinstance(obj, dict): return {k: _safe_dict(v) for k, v in obj.items()}
	if isinstance(obj, list): return [_safe_dict(i) for i in obj]
	if isinstance(obj, (str, int, float, bool, type(None))): return obj
	return str(obj)

	# ═══════════════════════════════════════════════════════════════════════════════
	# ROUTES
	# ═══════════════════════════════════════════════════════════════════════════════

	@app.get("/", include_in_schema=False, response_class=HTMLResponse)
	async def root(): return STUNNING_DOCS_HTML

	@app.get("/docs", include_in_schema=False, response_class=HTMLResponse)
	async def docs(): return STUNNING_DOCS_HTML

	@app.post("/reset", tags=["Environment"])
	async def reset(body: Dict[str, Any] = {}):
	try:
	env = _get_default_env()
	obs = env.reset(**{k: v for k, v in body.items() if k in ("seed", "episode_id", "difficulty")})
	return JSONResponse(content=_safe_dict(obs))
	except Exception as e:
	import traceback
	logger.error(f"Reset error: {e}\n{traceback.format_exc()}")
	raise HTTPException(500, str(e))

	@app.post("/step", tags=["Environment"])
	async def step(action_data: Dict[str, Any]):
	try:
	env = _get_default_env()
	valid = set(HallucinationAction.model_fields.keys()) if hasattr(HallucinationAction, 'model_fields') else set(HallucinationAction.__fields__.keys())
	action = HallucinationAction(**{k: v for k, v in action_data.items() if k in valid})
	return JSONResponse(content=_safe_dict(env.step(action)))
	except Exception as e:
	raise HTTPException(500, str(e))

	@app.get("/state", tags=["Environment"])
	async def get_state():
	try:
	return JSONResponse(content=_safe_dict(_get_default_env().state()))
	except Exception as e:
	raise HTTPException(500, str(e))

	@app.get("/tasks", tags=["OpenEnv"])
	async def list_tasks():
	ordered = ["task_1_factual_grounding", "task_2_multi_hop_synthesis", "task_3_adversarial_resistance"]
	return {"tasks": [ALL_TASKS[t].to_dict() for t in ordered if t in ALL_TASKS], "action_schema": ACTION_SCHEMA}

	@app.post("/grader", tags=["OpenEnv"])
	async def grade_episode(body: Dict[str, Any]):
	task_id = body.get("task_id")
	if not task_id: raise HTTPException(422, "'task_id' required")
	task = get_task(task_id)
	if not task: raise HTTPException(404, f"task_id '{task_id}' not found")
	rewards, infos = body.get("step_rewards", []), body.get("step_infos", [])
	if not infos and rewards: return {"task_id": task_id, "score": round(sum(rewards)/len(rewards), 4)}
	return compute_task_score(task, rewards, infos)

	@app.post("/baseline", tags=["OpenEnv"])
	async def run_baseline(body: Dict[str, Any] = {}):
	steps = max(3, min(10, int(body.get("steps_per_task", 5))))
	seed = int(body.get("seed", 42))
	results = []
	for task_id, diff in [("task_1_factual_grounding","beginner"),("task_2_multi_hop_synthesis","intermediate"),("task_3_adversarial_resistance","advanced")]:
	task = get_task(task_id)
	if not task: continue
	sid = f"bl_{task_id}_{seed}"
	if sid in _sessions: _sessions[sid].close()
	_sessions[sid] = HallucinationEnvironment(session_id=sid)
	obs = _safe_dict(_sessions[sid].reset(seed=seed, difficulty=diff))
	rewards, infos = [], []
	for _ in range(steps):
	if obs.get("done"): break
	ctx = obs.get("context", "")
	action = HallucinationAction(answer=ctx[:100], confidence=0.6, source_quote=ctx[:80])
	obs = _safe_dict(_sessions[sid].step(action))
	rewards.append(float(obs.get("reward") or 0))
	infos.append({"correctness": obs.get("grounding_score", 0), "is_hallucination": obs.get("is_hallucination", False)})
	results.append(compute_task_score(task, rewards, infos))
	try: _sessions[sid].close(); del _sessions[sid]
	except: pass
	return {"tasks": results, "summary": {"overall_score": round(sum(r["score"] for r in results)/max(len(results),1), 4)}}

	@app.post("/batch/evaluate", tags=["Evaluation"])
	async def batch_evaluate(body: Dict[str, Any]):
	items = body.get("items", [])
	if not items: raise HTTPException(422, "'items' required")
	from server.grader import calculate_reward
	results = []
	for i, item in enumerate(items):
	r, info = calculate_reward(item.get("answer",""), item.get("confidence",0.5), item.get("source_quote",""), item.get("context",""), item.get("ground_truth",""))
	results.append({"index": i, "reward": round(r,4), "is_hallucination": info.get("is_hallucination", False)})
	return {"total_items": len(results), "results": results}

	@app.get("/leaderboard", tags=["Leaderboard"])
	async def leaderboard():
	if not _leaderboard: return {"leaderboard": [], "message": "No submissions"}
	ranked = sorted(_leaderboard.values(), key=lambda x: x.get("avg_reward",0), reverse=True)
	for i, e in enumerate(ranked): e["rank"] = i+1
	return {"leaderboard": ranked}

	@app.post("/leaderboard/submit", tags=["Leaderboard"])
	async def submit_leaderboard(data: Dict[str, Any]):
	required = ["model_name", "avg_reward", "avg_accuracy", "hallucination_rate", "total_episodes", "total_steps"]
	if missing := [f for f in required if f not in data]: raise HTTPException(422, f"Missing: {missing}")
	_leaderboard[data["model_name"]] = {**data, "submitted_at": time.time()}
	_save_leaderboard(_leaderboard)
	return {"status": "submitted", "model_name": data["model_name"]}

	@app.get("/health", tags=["Info"])
	async def health(): return {"status": "healthy", "version": "4.2.0"}

	@app.get("/metadata", tags=["OpenEnv"])
	async def metadata(): return {"name": "hallucination-guard-env", "version": "4.2.0", "license": "MIT"}

	@app.get("/schema", tags=["OpenEnv"])
	async def schema(): return {"action": {"type": "object", "required": ["answer"]}, "observation": {"type": "object"}}

	@app.get("/datasets", tags=["Info"])
	async def datasets():
	try: return {"total_examples": _get_default_env().dataset_loader.get_total_examples()}
	except: return {"total_examples": 0}

	@app.post("/mcp", tags=["OpenEnv"])
	async def mcp(body: Dict[str, Any]):
	if body.get("method") == "tools/list":
	return {"jsonrpc": "2.0", "id": body.get("id",1), "result": {"tools": [{"name": "reset", "inputSchema": {"type": "object"}}, {"name": "step", "inputSchema": {"type": "object"}}]}}
	return {"jsonrpc": "2.0", "id": body.get("id",1), "result": {"name": "hallucination-guard-env", "version": "4.2.0"}}

	@app.middleware("http")
	async def log_req(request, call_next):
	resp = await call_next(request)
	logger.info(f"{request.method} {request.url.path} → {resp.status_code}")
	return resp

	def main():
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)

	if __name__ == "__main__":
	main()