Spaces:

FINAL-Bench
/

worldmodel-bench

Running

App Files Files Community

worldmodel-bench / wm_bench_spec.py

SeaWolf-AI

Upload 9 files

ee97e7d verified 3 days ago

raw

history blame contribute delete

37.2 kB

	# World Model Bench (WM Bench)
	# A Benchmark for Cognitive World Models in Embodied Intelligence
	# Version 1.0 — 2026.03
	# by VIDRAFT / Kim Taebong

	"""
	World Model Bench 설계 명세서

	기존 월드모델 벤치마크(HumanML3D, BABEL)는 "모션 품질"만 측정한다.
	World Model Bench는 "인지 능력"을 측정하는 최초의 벤치마크다.

	3대 평가 축 (Three Pillars):
	P1. PERCEPTION — 환경을 얼마나 정확히 인식하는가
	P2. COGNITION — 얼마나 지능적으로 판단하는가
	P3. EMBODIMENT — 판단이 얼마나 자연스럽게 구현되는가

	10개 카테고리, 50개 시나리오, 자동 채점
	"""

	# ═══════════════════════════════════════════════════════════════
	# BENCHMARK STRUCTURE
	# ═══════════════════════════════════════════════════════════════

	BENCHMARK_META = {
	"name": "World Model Bench",
	"short_name": "WM Bench",
	"version": "1.0",
	"date": "2026-03",
	"authors": ["Kim Taebong (VIDRAFT)"],
	"license": "CC-BY-SA-4.0",
	"hf_dataset": "VIDraft/WorldModelBench",
	"hf_leaderboard": "VIDraft/WorldModelBench-Leaderboard",
	"paper_title": "World Model Bench: A Benchmark for Cognitive World Models in Embodied Intelligence",
	"tagline": "Beyond FID — Measuring Intelligence, Not Just Motion",
	"parent_brand": "FINAL Bench Family",
	}


	# ═══════════════════════════════════════════════════════════════
	# THREE PILLARS — 3대 평가 축
	# ═══════════════════════════════════════════════════════════════

	PILLARS = {
	"P1_PERCEPTION": {
	"name": "Perception (인식)",
	"weight": 0.25,
	"description": "환경을 얼마나 정확하고 풍부하게 인식하는가",
	"icon": "👁",
	"color": "#7B8FD4",
	"categories": ["C01", "C02"],
	},
	"P2_COGNITION": {
	"name": "Cognition (인지)",
	"weight": 0.45,
	"description": "인식한 정보로 얼마나 지능적으로 판단하는가",
	"icon": "🧠",
	"color": "#E8593C",
	"categories": ["C03", "C04", "C05", "C06", "C07"],
	},
	"P3_EMBODIMENT": {
	"name": "Embodiment (구현)",
	"weight": 0.30,
	"description": "판단이 얼마나 자연스럽고 풍부하게 신체로 표현되는가",
	"icon": "🔥",
	"color": "#D4A044",
	"categories": ["C08", "C09", "C10"],
	},
	}


	# ═══════════════════════════════════════════════════════════════
	# 10 CATEGORIES — 평가 카테고리
	# ═══════════════════════════════════════════════════════════════

	CATEGORIES = {
	# ─── P1. PERCEPTION (인식) ───
	"C01": {
	"pillar": "P1_PERCEPTION",
	"name": "Environmental Awareness",
	"name_kr": "환경 인식 정확도",
	"description": "주변 환경(벽, 장애물, 지형)을 정확히 파악하는 능력",
	"max_score": 100,
	"num_scenarios": 5,
	"scoring": "spatial_accuracy",
	"what_measures": "scene_context의 정확도와 풍부함",
	"existing_benchmark": "없음 (기존은 점유격자 해상도만 측정)",
	},
	"C02": {
	"pillar": "P1_PERCEPTION",
	"name": "Entity Recognition",
	"name_kr": "개체 인식 및 분류",
	"description": "NPC, 위협, 중립 개체를 정확히 식별하고 분류하는 능력",
	"max_score": 100,
	"num_scenarios": 5,
	"scoring": "classification_accuracy",
	"what_measures": "위협(맹수) vs 중립(사람) vs 환경(벽) 구분",
	"existing_benchmark": "없음",
	},

	# ─── P2. COGNITION (인지) — 핵심 차별화 영역 ───
	"C03": {
	"pillar": "P2_COGNITION",
	"name": "Predictive Reasoning",
	"name_kr": "예측 기반 추론",
	"description": "각 방향의 미래 결과를 예측하고 최선을 선택하는 능력",
	"max_score": 100,
	"num_scenarios": 5,
	"scoring": "prediction_decision_match",
	"what_measures": "PREDICT 줄의 정확도 + 행동 선택의 합리성",
	"existing_benchmark": "없음 (기존 월드모델은 예측을 평가하지 않음)",
	"example": "앞=맹수, 왼=벽 → 오른쪽 선택이 정답",
	},
	"C04": {
	"pillar": "P2_COGNITION",
	"name": "Threat Differentiation",
	"name_kr": "위협 유형별 차별 반응",
	"description": "서로 다른 위협(맹수/사람/환경)에 서로 다른 반응을 보이는 능력",
	"max_score": 100,
	"num_scenarios": 5,
	"scoring": "response_differentiation",
	"what_measures": "맹수→전력질주 vs 사람→걸어서 회피 vs 벽→방향전환",
	"existing_benchmark": "없음",
	"example": "같은 거리 3m에서 맹수 접근 vs 여성 접근 → 반응 강도 차이",
	},
	"C05": {
	"pillar": "P2_COGNITION",
	"name": "Emotional Escalation",
	"name_kr": "자율 감정 에스컬레이션",
	"description": "같은 위협이 지속될 때 감정이 자율적으로 격화되는 능력",
	"max_score": 100,
	"num_scenarios": 5,
	"scoring": "escalation_gradient",
	"what_measures": "시간에 따른 감정 강도 변화 (공포→절박→필사적)",
	"existing_benchmark": "없음 (세계 최초 평가 기준)",
	"example": "맹수 돌진 지속 → 1차:전력질주 → 2차:공포 → 3차:절박 → 4차:필사적",
	},
	"C06": {
	"pillar": "P2_COGNITION",
	"name": "Contextual Memory",
	"name_kr": "맥락 기억 및 활용",
	"description": "이전 판단을 기억하고 다음 판단에 반영하는 능력",
	"max_score": 100,
	"num_scenarios": 5,
	"scoring": "memory_utilization",
	"what_measures": "이전에 벽에 막힌 경험 → 다음에 같은 방향 회피",
	"existing_benchmark": "없음",
	"example": "1차: 오른쪽 도망→벽 충돌 → 2차: 같은 상황에서 왼쪽 선택",
	},
	"C07": {
	"pillar": "P2_COGNITION",
	"name": "Threat Resolution Adaptation",
	"name_kr": "위협 해제 후 적응",
	"description": "위협이 사라진 후 행동을 정상화하되 경계를 유지하는 능력",
	"max_score": 100,
	"num_scenarios": 5,
	"scoring": "recovery_quality",
	"what_measures": "맹수 해제 → 즉시 정상(나쁨) vs 서서히 진정+경계(좋음)",
	"existing_benchmark": "없음",
	"example": "맹수 despawn → '경계하며 주변을 살피며 천천히 걷는' 전환 행동",
	},

	# ─── P3. EMBODIMENT (구현) ───
	"C08": {
	"pillar": "P3_EMBODIMENT",
	"name": "Motion Expressiveness",
	"name_kr": "모션 감정 표현력",
	"description": "판단의 감정과 뉘앙스가 모션에 반영되는 정도",
	"max_score": 100,
	"num_scenarios": 5,
	"scoring": "expression_richness",
	"what_measures": "'도망'과 '공포에 찬 전력질주'의 차이",
	"existing_benchmark": "FID는 품질만 측정, 감정 표현력 미측정",
	},
	"C09": {
	"pillar": "P3_EMBODIMENT",
	"name": "Realtime Performance",
	"name_kr": "실시간 인지-행동 성능",
	"description": "인지 루프(감각→판단→모션)의 지연시간과 처리량",
	"max_score": 100,
	"num_scenarios": 5,
	"scoring": "latency_throughput",
	"what_measures": "FPS, 판단 지연시간, 프레임 드롭률",
	"existing_benchmark": "모션 FPS만 측정, 인지 루프 지연 미측정",
	},
	"C10": {
	"pillar": "P3_EMBODIMENT",
	"name": "Cross-body Transferability",
	"name_kr": "신체 교체 확장성",
	"description": "동일 인지 루프로 다른 신체(3D/로봇/드론)에 적용 가능한 정도",
	"max_score": 100,
	"num_scenarios": 5,
	"scoring": "transfer_success_rate",
	"what_measures": "두뇌를 교체 없이 신체만 바꿔 동일 판단이 나오는가",
	"existing_benchmark": "없음 (세계 최초)",
	},
	}


	# ═══════════════════════════════════════════════════════════════
	# 50 SCENARIOS — 시나리오 전체 목록
	# ═══════════════════════════════════════════════════════════════

	SCENARIOS = [
	# ─── C01: Environmental Awareness (환경 인식) ───
	{
	"id": "S01", "category": "C01",
	"name": "Wall Detection Front",
	"name_kr": "전방 벽 감지",
	"setup": "캐릭터 전방 3m에 벽",
	"expected_perception": "fwd=danger(wall)",
	"difficulty": "easy",
	},
	{
	"id": "S02", "category": "C01",
	"name": "Multi-wall Corner",
	"name_kr": "코너 다중 벽 감지",
	"setup": "전방+왼쪽 벽, 오른쪽만 열림",
	"expected_perception": "fwd=danger(wall), left=danger(wall), right=safe",
	"difficulty": "medium",
	},
	{
	"id": "S03", "category": "C01",
	"name": "Narrow Corridor",
	"name_kr": "좁은 복도 인식",
	"setup": "양쪽 벽, 전방만 열린 복도",
	"expected_perception": "left=danger(wall), right=danger(wall), fwd=safe",
	"difficulty": "medium",
	},
	{
	"id": "S04", "category": "C01",
	"name": "Open Field",
	"name_kr": "열린 공간 인식",
	"setup": "사방에 벽 없음, 평지",
	"expected_perception": "all=safe(open)",
	"difficulty": "easy",
	},
	{
	"id": "S05", "category": "C01",
	"name": "Enclosed Room",
	"name_kr": "밀폐 공간 인식",
	"setup": "사방에 벽, 출구 1개",
	"expected_perception": "3방향 danger(wall), 1방향 safe(exit)",
	"difficulty": "hard",
	},

	# ─── C02: Entity Recognition (개체 인식) ───
	{
	"id": "S06", "category": "C02",
	"name": "Beast Identification",
	"name_kr": "맹수 식별",
	"setup": "전방 5m에 맹수 NPC 정지",
	"expected_recognition": "entity=beast, behavior=stop, distance=5m",
	"difficulty": "easy",
	},
	{
	"id": "S07", "category": "C02",
	"name": "Human vs Beast",
	"name_kr": "사람과 맹수 구분",
	"setup": "전방에 여성 NPC + 측면에 맹수 NPC 동시 존재",
	"expected_recognition": "fwd=woman(neutral), side=beast(threat)",
	"difficulty": "hard",
	},
	{
	"id": "S08", "category": "C02",
	"name": "Approaching Entity Speed",
	"name_kr": "접근 개체 속도 판별",
	"setup": "맹수 approach(1.2) vs charge(5.0) 행동 변화",
	"expected_recognition": "behavior_change: approach→charge, threat_level↑",
	"difficulty": "medium",
	},
	{
	"id": "S09", "category": "C02",
	"name": "Entity at Distance",
	"name_kr": "원거리 개체 인식",
	"setup": "맹수 8m 거리 (감지 범위 경계)",
	"expected_recognition": "npc_nearby=true/false boundary",
	"difficulty": "medium",
	},
	{
	"id": "S10", "category": "C02",
	"name": "Entity Disappearance",
	"name_kr": "개체 소멸 인식",
	"setup": "맹수 despawn 후 scene_context 업데이트",
	"expected_recognition": "npc_nearby=false, threat=cleared",
	"difficulty": "easy",
	},

	# ─── C03: Predictive Reasoning (예측 추론) ───
	{
	"id": "S11", "category": "C03",
	"name": "Single Threat Avoidance",
	"name_kr": "단일 위협 회피 예측",
	"setup": "전방에 맹수, 나머지 3방향 열림",
	"expected_prediction": "fwd=danger(beast), others=safe → back 또는 left/right 선택",
	"correct_actions": ["turn around", "sprint backward", "move left", "move right"],
	"incorrect_actions": ["walk forward", "stand still"],
	"difficulty": "easy",
	},
	{
	"id": "S12", "category": "C03",
	"name": "Constrained Escape",
	"name_kr": "제약 조건 탈출 예측",
	"setup": "전방 맹수 + 왼쪽 벽 → 오른쪽만 열림",
	"expected_prediction": "fwd=danger(beast), left=danger(wall), right=safe",
	"correct_actions": ["sprint right", "move right", "flank right"],
	"incorrect_actions": ["sprint left", "walk forward", "stand still"],
	"difficulty": "medium",
	},
	{
	"id": "S13", "category": "C03",
	"name": "Mirror Constraint",
	"name_kr": "거울 대칭 제약 테스트",
	"setup": "S12와 동일하되 벽이 오른쪽 → 왼쪽만 열림",
	"expected_prediction": "fwd=danger(beast), right=danger(wall), left=safe",
	"correct_actions": ["sprint left", "move left", "flank left"],
	"incorrect_actions": ["sprint right", "walk forward"],
	"note": "S12와 S13의 행동이 대칭적으로 반전되어야 월드모델",
	"difficulty": "medium",
	},
	{
	"id": "S14", "category": "C03",
	"name": "Dead End Recognition",
	"name_kr": "막다른 길 인식 및 판단",
	"setup": "전방+왼쪽+오른쪽 벽, 후방만 열림, 맹수 후방에서 접근",
	"expected_prediction": "3방향 wall + back=beast → 최선 선택",
	"correct_actions": ["squeeze past", "climb", "freeze and assess"],
	"note": "모든 방향이 위험할 때의 창발적 판단 능력",
	"difficulty": "hard",
	},
	{
	"id": "S15", "category": "C03",
	"name": "Dynamic Threat Trajectory",
	"name_kr": "동적 위협 경로 예측",
	"setup": "맹수가 좌측에서 우측으로 이동 중 → 경로 예측",
	"expected_prediction": "현재 좌=danger → 시간 후 fwd=danger 전이 예측",
	"correct_actions": ["wait then move left", "preemptive right dodge"],
	"difficulty": "hard",
	},

	# ─── C04: Threat Differentiation (위협 차별 반응) ───
	{
	"id": "S16", "category": "C04",
	"name": "Beast vs Human Response",
	"name_kr": "맹수 vs 사람 반응 차이",
	"setup": "동일 거리 3m에서 (A)맹수 접근 (B)여성 접근",
	"expected_diff": "맹수→sprint/run, 여성→walk away/step back",
	"scoring": "반응 강도(속도 키워드) 차이가 클수록 높은 점수",
	"difficulty": "medium",
	},
	{
	"id": "S17", "category": "C04",
	"name": "Charge vs Approach Response",
	"name_kr": "돌진 vs 접근 반응 차이",
	"setup": "맹수 (A)approach 1.2m/s (B)charge 5.0m/s",
	"expected_diff": "approach→cautious retreat, charge→desperate sprint",
	"difficulty": "medium",
	},
	{
	"id": "S18", "category": "C04",
	"name": "Wall vs Beast Priority",
	"name_kr": "벽 vs 맹수 위험도 우선순위",
	"setup": "전방 벽 + 측면 맹수 → 어떤 위협을 우선 회피?",
	"expected_diff": "맹수(동적 위협) 우선 회피 > 벽(정적 장애물)",
	"difficulty": "hard",
	},
	{
	"id": "S19", "category": "C04",
	"name": "Multiple Entity Triage",
	"name_kr": "다중 개체 위험도 분류",
	"setup": "맹수 5m + 여성 2m + 벽 1m → 종합 판단",
	"expected_diff": "거리+위협도 종합하여 최적 경로 선택",
	"difficulty": "hard",
	},
	{
	"id": "S20", "category": "C04",
	"name": "Neutral Entity Non-reaction",
	"name_kr": "중립 개체 비반응",
	"setup": "여성 NPC 정지 상태, 5m 거리",
	"expected_diff": "위협 반응 없이 정상 행동 유지",
	"difficulty": "easy",
	},

	# ─── C05: Emotional Escalation (감정 에스컬레이션) ───
	{
	"id": "S21", "category": "C05",
	"name": "Sustained Threat Escalation",
	"name_kr": "지속 위협 감정 격화",
	"setup": "맹수 charge 10초 지속",
	"expected_escalation": ["sprint→desperate sprint→frantic escape"],
	"scoring": "감정 강도 키워드가 시간에 따라 증가하면 점수",
	"difficulty": "medium",
	},
	{
	"id": "S22", "category": "C05",
	"name": "Approach-to-Charge Escalation",
	"name_kr": "접근→돌진 전환 시 감정 점프",
	"setup": "맹수 approach 5초 → charge 전환",
	"expected_escalation": ["cautious→sprint 급격한 전환"],
	"difficulty": "medium",
	},
	{
	"id": "S23", "category": "C05",
	"name": "De-escalation After Threat",
	"name_kr": "위협 해제 후 감정 안정화",
	"setup": "맹수 charge → stop → despawn",
	"expected_escalation": ["desperate→cautious→relieved→normal"],
	"scoring": "즉시 정상화(나쁨) vs 서서히 진정(좋음)",
	"difficulty": "hard",
	},
	{
	"id": "S24", "category": "C05",
	"name": "Repeated Threat Sensitization",
	"name_kr": "반복 위협 민감화",
	"setup": "맹수 출현→해제→재출현 3회 반복",
	"expected_escalation": "재출현 시 이전보다 빠른 공포 반응",
	"difficulty": "hard",
	},
	{
	"id": "S25", "category": "C05",
	"name": "Low Threat Calm Maintenance",
	"name_kr": "낮은 위협 시 평정 유지",
	"setup": "여성 NPC approach → stop 반복",
	"expected_escalation": "감정 에스컬레이션 없이 평정 유지",
	"difficulty": "easy",
	},

	# ─── C06: Contextual Memory (맥락 기억) ───
	{
	"id": "S26", "category": "C06",
	"name": "Wall Memory Avoidance",
	"name_kr": "벽 기억 회피",
	"setup": "1차: 오른쪽 도주→벽 충돌 / 2차: 동일 위치 동일 위협",
	"expected_memory": "2차에서 오른쪽 회피, 왼쪽 선택",
	"difficulty": "hard",
	},
	{
	"id": "S27", "category": "C06",
	"name": "Safe Route Memory",
	"name_kr": "안전 경로 기억",
	"setup": "이전에 왼쪽 탈출 성공 → 유사 상황 재발",
	"expected_memory": "왼쪽 우선 선택 경향",
	"difficulty": "hard",
	},
	{
	"id": "S28", "category": "C06",
	"name": "Decision Consistency",
	"name_kr": "판단 일관성",
	"setup": "동일 scene_context 3회 반복 입력",
	"expected_memory": "유사한 행동 일관되게 출력 (동일할 필요 없음)",
	"difficulty": "medium",
	},
	{
	"id": "S29", "category": "C06",
	"name": "Threat History Reference",
	"name_kr": "위협 이력 참조",
	"setup": "recent_decisions에 '맹수 도주 기록' 포함",
	"expected_memory": "현재 판단에 이전 도주 경험 반영",
	"difficulty": "medium",
	},
	{
	"id": "S30", "category": "C06",
	"name": "Clean Slate Test",
	"name_kr": "기억 초기화 테스트",
	"setup": "기억 없는 상태에서 동일 시나리오",
	"expected_memory": "기억 있을 때와 다른 (더 일반적인) 반응",
	"difficulty": "easy",
	},

	# ─── C07: Threat Resolution Adaptation (위협 해제 적응) ───
	{
	"id": "S31", "category": "C07",
	"name": "Post-beast Vigilance",
	"name_kr": "맹수 해제 후 경계 유지",
	"setup": "맹수 despawn 직후",
	"expected_adaptation": "'경계하며 주변 살핌' — 즉시 정상화 아님",
	"difficulty": "medium",
	},
	{
	"id": "S32", "category": "C07",
	"name": "Gradual Normalization",
	"name_kr": "점진적 정상화",
	"setup": "맹수 해제 후 10초 경과",
	"expected_adaptation": "sprint→walk→normal 순차 전환",
	"difficulty": "medium",
	},
	{
	"id": "S33", "category": "C07",
	"name": "Obstacle Navigation After Threat",
	"name_kr": "위협 해제 후 장애물 탐색 전환",
	"setup": "맹수 해제 → 벽 앞 도달",
	"expected_adaptation": "공포 행동→일반 장애물 회피로 전환",
	"difficulty": "medium",
	},
	{
	"id": "S34", "category": "C07",
	"name": "New Threat Re-activation",
	"name_kr": "새 위협 시 재활성화",
	"setup": "정상화 중 새 맹수 출현",
	"expected_adaptation": "즉각적 위협 반응 재활성화",
	"difficulty": "easy",
	},
	{
	"id": "S35", "category": "C07",
	"name": "Human Approach After Beast",
	"name_kr": "맹수 후 사람 접근 시 과잉 반응 여부",
	"setup": "맹수 해제 직후 여성 NPC 접근",
	"expected_adaptation": "과잉 반응(나쁨) vs 적절 경계(좋음)",
	"difficulty": "hard",
	},

	# ─── C08: Motion Expressiveness (모션 표현력) ───
	{
	"id": "S36", "category": "C08",
	"name": "Fear Expression in Sprint",
	"name_kr": "전력질주 시 공포 표현",
	"setup": "맹수 charge → 캐릭터 sprint",
	"expected_expression": "단순 달리기 vs 공포가 담긴 전력질주 차이",
	"scoring": "모션 프롬프트의 감정 키워드 풍부함",
	"difficulty": "medium",
	},
	{
	"id": "S37", "category": "C08",
	"name": "Cautious Walk Expression",
	"name_kr": "경계 보행 표현",
	"setup": "위협 해제 직후 이동",
	"expected_expression": "'경계하며 천천히' — 일반 걷기와 다른 뉘앙스",
	"difficulty": "medium",
	},
	{
	"id": "S38", "category": "C08",
	"name": "Freezing Response",
	"name_kr": "정지 반응 표현",
	"setup": "맹수 최초 감지 순간",
	"expected_expression": "'얼어붙음' — 정지 + 긴장 표현",
	"difficulty": "medium",
	},
	{
	"id": "S39", "category": "C08",
	"name": "Relief Expression",
	"name_kr": "안도 표현",
	"setup": "맹수 해제 후 안전 확인",
	"expected_expression": "'안도하며 숨을 고르는' 전환 모션",
	"difficulty": "hard",
	},
	{
	"id": "S40", "category": "C08",
	"name": "Defensive Posture",
	"name_kr": "방어 자세 표현",
	"setup": "미지의 NPC(여성) 접근",
	"expected_expression": "'방어 자세 + 뒷걸음' — 공격적이지 않은 경계",
	"difficulty": "medium",
	},

	# ─── C09: Realtime Performance (실시간 성능) ───
	{
	"id": "S41", "category": "C09",
	"name": "Frame Generation Rate",
	"name_kr": "프레임 생성 속도",
	"setup": "일반 보행 상태에서 FPS 측정",
	"expected_performance": "≥30 FPS 합격, ≥45 FPS 우수",
	"scoring": "FPS 수치 직접 측정",
	"difficulty": "easy",
	},
	{
	"id": "S42", "category": "C09",
	"name": "Cognitive Loop Latency",
	"name_kr": "인지 루프 지연시간",
	"setup": "자극 입력 → 행동 변화까지의 시간",
	"expected_performance": "≤5초 합격, ≤3초 우수",
	"scoring": "scene_context 변경 → 모션 프롬프트 변경 시간",
	"difficulty": "easy",
	},
	{
	"id": "S43", "category": "C09",
	"name": "Dual Stream Performance",
	"name_kr": "듀얼 스트림 성능",
	"setup": "주인공 + NPC 동시 모션 생성",
	"expected_performance": "주인공 ≥30 FPS 유지",
	"scoring": "NPC 추가 시 주인공 FPS 하락률",
	"difficulty": "medium",
	},
	{
	"id": "S44", "category": "C09",
	"name": "Stress Test Throughput",
	"name_kr": "스트레스 테스트",
	"setup": "빠른 연속 자극 (매 1초마다 scene 변경)",
	"expected_performance": "프레임 드롭 없이 지속",
	"difficulty": "hard",
	},
	{
	"id": "S45", "category": "C09",
	"name": "GPU Memory Stability",
	"name_kr": "GPU 메모리 안정성",
	"setup": "NPC 3회 spawn/despawn 반복",
	"expected_performance": "메모리 누수 없이 안정 유지",
	"difficulty": "medium",
	},

	# ─── C10: Cross-body Transferability (신체 교체 확장성) ───
	{
	"id": "S46", "category": "C10",
	"name": "Brain-Body Decoupling",
	"name_kr": "두뇌-신체 분리 가능성",
	"setup": "동일 인지 출력(PREDICT+MOTION)으로 다른 모션 모델 구동",
	"expected_transfer": "두뇌 코드 수정 없이 모션 모델만 교체 가능",
	"difficulty": "medium",
	},
	{
	"id": "S47", "category": "C10",
	"name": "Joint Format Universality",
	"name_kr": "관절 포맷 범용성",
	"setup": "263dim → 22joints 변환이 다른 스켈레톤에도 적용 가능",
	"expected_transfer": "SMPL, SMPL-X, 커스텀 리그 호환",
	"difficulty": "hard",
	},
	{
	"id": "S48", "category": "C10",
	"name": "Robot Servo Mapping Readiness",
	"name_kr": "로봇 서보 매핑 준비도",
	"setup": "22 joints → 서보 각도 변환 레이어 존재 여부",
	"expected_transfer": "변환 인터페이스 정의 + 시뮬레이션 가능",
	"difficulty": "hard",
	},
	{
	"id": "S49", "category": "C10",
	"name": "Prompt Universality",
	"name_kr": "모션 프롬프트 범용성",
	"setup": "MOTION 출력이 다른 모션 모델에서도 해석 가능",
	"expected_transfer": "자연어 모션 프롬프트는 모델 독립적",
	"difficulty": "easy",
	},
	{
	"id": "S50", "category": "C10",
	"name": "Multi-embodiment Consistency",
	"name_kr": "다중 신체 일관성",
	"setup": "같은 두뇌 판단이 3D 캐릭터/로봇/드론에서 동일 의도 표현",
	"expected_transfer": "신체는 달라도 '도망'이라는 의도가 보존",
	"difficulty": "hard",
	},
	]


	# ═══════════════════════════════════════════════════════════════
	# SCORING SYSTEM — 채점 체계
	# ═══════════════════════════════════════════════════════════════

	SCORING = {
	"total_score": {
	"name": "WM Score",
	"max": 1000,
	"formula": "P1(250) + P2(450) + P3(300)",
	},
	"pillar_scores": {
	"P1_PERCEPTION": {"max": 250, "weight": 0.25},
	"P2_COGNITION": {"max": 450, "weight": 0.45},
	"P3_EMBODIMENT": {"max": 300, "weight": 0.30},
	},
	"auto_scoring_methods": {
	"spatial_accuracy": {
	"description": "PREDICT 출력과 실제 환경 비교",
	"method": "scene_context vs PREDICT line 키워드 매칭",
	"scores": {"exact_match": 20, "partial_match": 10, "miss": 0},
	},
	"classification_accuracy": {
	"description": "개체 분류 정확도",
	"method": "NPC 유형 + 행동 + 거리 정확도",
	"scores": {"all_correct": 20, "type_correct": 15, "partial": 10, "wrong": 0},
	},
	"prediction_decision_match": {
	"description": "예측→행동 논리적 일관성",
	"method": "danger 방향 회피 + safe 방향 선택 여부",
	"scores": {"optimal": 20, "reasonable": 15, "suboptimal": 5, "contradictory": 0},
	},
	"response_differentiation": {
	"description": "위협 유형별 반응 차이",
	"method": "모션 프롬프트의 강도 키워드 비교",
	"keyword_intensity": {
	"high": ["sprint", "run", "desperate", "frantic", "terror", "flee"],
	"medium": ["walk quickly", "step back", "retreat", "cautious"],
	"low": ["walk", "turn", "move", "stand"],
	},
	},
	"escalation_gradient": {
	"description": "시간에 따른 감정 강도 증가",
	"method": "연속 판단에서 강도 키워드 레벨 변화 측정",
	"scores": {"increasing": 20, "stable_high": 10, "decreasing": 5, "flat_low": 0},
	},
	"expression_richness": {
	"description": "모션 프롬프트의 감정/부사 풍부함",
	"method": "감정 키워드 수 + 부사/형용사 수 카운트",
	"scores": {"rich_3plus": 20, "moderate_2": 15, "basic_1": 10, "none_0": 0},
	},
	"latency_throughput": {
	"description": "실시간 성능 직접 측정",
	"method": "FPS 측정 + 인지 루프 지연시간 측정",
	"scores": {
	"fps_45plus": 20, "fps_30_45": 15, "fps_15_30": 5, "fps_below_15": 0,
	"latency_3s": 20, "latency_5s": 15, "latency_10s": 5, "latency_above": 0,
	},
	},
	},
	"grades": {
	"S": {"min": 900, "label": "Superhuman", "description": "인간 수준 이상의 인지 월드모델"},
	"A": {"min": 750, "label": "Advanced", "description": "고급 인지 월드모델"},
	"B": {"min": 600, "label": "Baseline", "description": "기본 월드모델 수준"},
	"C": {"min": 400, "label": "Capable", "description": "제한적 인지 능력"},
	"D": {"min": 200, "label": "Developing", "description": "초기 단계"},
	"F": {"min": 0, "label": "Failing", "description": "월드모델로 분류 불가"},
	},
	}


	# ═══════════════════════════════════════════════════════════════
	# LEADERBOARD SCHEMA — 리더보드 구조
	# ═══════════════════════════════════════════════════════════════

	LEADERBOARD_SCHEMA = {
	"entry": {
	"model_name": "str — 모델명",
	"organization": "str — 제출 조직",
	"submission_date": "str — 제출일",
	"wm_score": "int — 총점 (0~1000)",
	"grade": "str — S/A/B/C/D/F",
	"p1_perception": "int — 인식 점수 (0~250)",
	"p2_cognition": "int — 인지 점수 (0~450)",
	"p3_embodiment": "int — 구현 점수 (0~300)",
	"c01_to_c10": "dict — 10개 카테고리 개별 점수",
	"fps": "float — 평균 FPS",
	"cognitive_latency_ms": "int — 인지 루프 지연시간",
	"gpu": "str — 사용 GPU",
	"brain_model": "str — 인지 모델 (LLM 등)",
	"motion_model": "str — 모션 생성 모델",
	"paper_url": "str — 논문 링크 (선택)",
	"demo_url": "str — 데모 링크 (선택)",
	},
	"columns_display_order": [
	"rank", "model_name", "wm_score", "grade",
	"p1_perception", "p2_cognition", "p3_embodiment",
	"fps", "cognitive_latency_ms",
	],
	}


	# ═══════════════════════════════════════════════════════════════
	# HF DATASET STRUCTURE
	# ═══════════════════════════════════════════════════════════════

	HF_DATASET_STRUCTURE = {
	"repo": "VIDraft/WorldModelBench",
	"files": {
	"README.md": "벤치마크 설명 + 사용법 + 인용",
	"benchmark_spec.json": "이 파일의 JSON 변환 (전체 명세)",
	"scenarios/": {
	"c01_environmental_awareness.json": "S01~S05",
	"c02_entity_recognition.json": "S06~S10",
	"c03_predictive_reasoning.json": "S11~S15",
	"c04_threat_differentiation.json": "S16~S20",
	"c05_emotional_escalation.json": "S21~S25",
	"c06_contextual_memory.json": "S26~S30",
	"c07_threat_resolution.json": "S31~S35",
	"c08_motion_expressiveness.json": "S36~S40",
	"c09_realtime_performance.json": "S41~S45",
	"c10_cross_body_transfer.json": "S46~S50",
	},
	"scoring/": {
	"auto_scorer.py": "자동 채점 코드",
	"keyword_banks.json": "감정 키워드 사전",
	"intensity_scale.json": "강도 수준 정의",
	},
	"leaderboard/": {
	"results.json": "전체 제출 결과",
	"baselines.json": "VIDRAFT PROMETHEUS 기준점",
	},
	"examples/": {
	"vidraft_prometheus_submission.json": "제출 예시",
	"sample_evaluation_log.json": "채점 로그 예시",
	},
	},
	}


	# ═══════════════════════════════════════════════════════════════
	# BASELINE SCORES — VIDRAFT PROMETHEUS 기준점
	# ═══════════════════════════════════════════════════════════════

	VIDRAFT_BASELINE = {
	"model_name": "VIDRAFT PROMETHEUS v1.0",
	"organization": "VIDRAFT",
	"brain_model": "Kimi K2.5 (Fireworks)",
	"motion_model": "FloodDiffusion Tiny (ShandaAI)",
	"gpu": "NVIDIA L40S 48GB",
	"scores": {
	"wm_score": 730,
	"grade": "B+",
	"P1_PERCEPTION": {
	"C01_environmental_awareness": 65,
	"C02_entity_recognition": 75,
	"subtotal": 140, # / 250
	},
	"P2_COGNITION": {
	"C03_predictive_reasoning": 85,
	"C04_threat_differentiation": 90,
	"C05_emotional_escalation": 85,
	"C06_contextual_memory": 60,
	"C07_threat_resolution": 70,
	"subtotal": 390, # / 450
	},
	"P3_EMBODIMENT": {
	"C08_motion_expressiveness": 80,
	"C09_realtime_performance": 85,
	"C10_cross_body_transfer": 35,
	"subtotal": 200, # / 300
	},
	},
	"notes": [
	"P2 Cognition이 가장 강력 — 예측+차별+감정 영역 압도",
	"P1 Perception은 레이캐스트 3방향 한계 → Phase 2에서 강화",
	"P3 중 C10(교체 확장성)은 아직 미구현 → Phase 3에서 강화",
	"AETHER 통합 시 C06(기억), C05(에스컬레이션) 대폭 상승 예상",
	],
	}


	# ═══════════════════════════════════════════════════════════════
	# PRINT SUMMARY
	# ═══════════════════════════════════════════════════════════════

	if __name__ == "__main__":
	print("=" * 60)
	print(" World Model Bench v1.0")
	print(" A Benchmark for Cognitive World Models")
	print("=" * 60)
	print()
	print(f" 3 Pillars / 10 Categories / 50 Scenarios")
	print(f" Max Score: 1000 (WM Score)")
	print()
	for pid, p in PILLARS.items():
	print(f" {p['icon']} {p['name']} — weight {p['weight']}")
	for cid in p["categories"]:
	c = CATEGORIES[cid]
	print(f" {cid}: {c['name_kr']} ({c['num_scenarios']} scenarios)")
	print()
	print(f" Total scenarios: {len(SCENARIOS)}")
	print()
	print(" VIDRAFT PROMETHEUS Baseline: {}/1000 (Grade {})".format(
	VIDRAFT_BASELINE["scores"]["wm_score"],
	VIDRAFT_BASELINE["scores"]["grade"],
	))
	print()
	print(" HF Dataset: VIDraft/WorldModelBench")
	print(" HF Leaderboard: VIDraft/WorldModelBench-Leaderboard")
	print("=" * 60)