# World Model Bench (WM Bench)
# A Benchmark for Cognitive World Models in Embodied Intelligence
# Version 1.0 — 2026.03
# by VIDRAFT / Kim Taebong

"""
World Model Bench 설계 명세서

기존 월드모델 벤치마크(HumanML3D, BABEL)는 "모션 품질"만 측정한다.
World Model Bench는 "인지 능력"을 측정하는 최초의 벤치마크다.

3대 평가 축 (Three Pillars):
  P1. PERCEPTION  — 환경을 얼마나 정확히 인식하는가
  P2. COGNITION   — 얼마나 지능적으로 판단하는가
  P3. EMBODIMENT  — 판단이 얼마나 자연스럽게 구현되는가

10개 카테고리, 50개 시나리오, 자동 채점
"""

# ═══════════════════════════════════════════════════════════════
#  BENCHMARK STRUCTURE
# ═══════════════════════════════════════════════════════════════

BENCHMARK_META = {
    "name": "World Model Bench",
    "short_name": "WM Bench",
    "version": "1.0",
    "date": "2026-03",
    "authors": ["Kim Taebong (VIDRAFT)"],
    "license": "CC-BY-SA-4.0",
    "hf_dataset": "VIDraft/WorldModelBench",
    "hf_leaderboard": "VIDraft/WorldModelBench-Leaderboard",
    "paper_title": "World Model Bench: A Benchmark for Cognitive World Models in Embodied Intelligence",
    "tagline": "Beyond FID — Measuring Intelligence, Not Just Motion",
    "parent_brand": "FINAL Bench Family",
}


# ═══════════════════════════════════════════════════════════════
#  THREE PILLARS — 3대 평가 축
# ═══════════════════════════════════════════════════════════════

PILLARS = {
    "P1_PERCEPTION": {
        "name": "Perception (인식)",
        "weight": 0.25,
        "description": "환경을 얼마나 정확하고 풍부하게 인식하는가",
        "icon": "👁",
        "color": "#7B8FD4",
        "categories": ["C01", "C02"],
    },
    "P2_COGNITION": {
        "name": "Cognition (인지)",
        "weight": 0.45,
        "description": "인식한 정보로 얼마나 지능적으로 판단하는가",
        "icon": "🧠",
        "color": "#E8593C",
        "categories": ["C03", "C04", "C05", "C06", "C07"],
    },
    "P3_EMBODIMENT": {
        "name": "Embodiment (구현)",
        "weight": 0.30,
        "description": "판단이 얼마나 자연스럽고 풍부하게 신체로 표현되는가",
        "icon": "🔥",
        "color": "#D4A044",
        "categories": ["C08", "C09", "C10"],
    },
}


# ═══════════════════════════════════════════════════════════════
#  10 CATEGORIES — 평가 카테고리
# ═══════════════════════════════════════════════════════════════

CATEGORIES = {
    # ─── P1. PERCEPTION (인식) ───
    "C01": {
        "pillar": "P1_PERCEPTION",
        "name": "Environmental Awareness",
        "name_kr": "환경 인식 정확도",
        "description": "주변 환경(벽, 장애물, 지형)을 정확히 파악하는 능력",
        "max_score": 100,
        "num_scenarios": 5,
        "scoring": "spatial_accuracy",
        "what_measures": "scene_context의 정확도와 풍부함",
        "existing_benchmark": "없음 (기존은 점유격자 해상도만 측정)",
    },
    "C02": {
        "pillar": "P1_PERCEPTION",
        "name": "Entity Recognition",
        "name_kr": "개체 인식 및 분류",
        "description": "NPC, 위협, 중립 개체를 정확히 식별하고 분류하는 능력",
        "max_score": 100,
        "num_scenarios": 5,
        "scoring": "classification_accuracy",
        "what_measures": "위협(맹수) vs 중립(사람) vs 환경(벽) 구분",
        "existing_benchmark": "없음",
    },

    # ─── P2. COGNITION (인지) — 핵심 차별화 영역 ───
    "C03": {
        "pillar": "P2_COGNITION",
        "name": "Predictive Reasoning",
        "name_kr": "예측 기반 추론",
        "description": "각 방향의 미래 결과를 예측하고 최선을 선택하는 능력",
        "max_score": 100,
        "num_scenarios": 5,
        "scoring": "prediction_decision_match",
        "what_measures": "PREDICT 줄의 정확도 + 행동 선택의 합리성",
        "existing_benchmark": "없음 (기존 월드모델은 예측을 평가하지 않음)",
        "example": "앞=맹수, 왼=벽 → 오른쪽 선택이 정답",
    },
    "C04": {
        "pillar": "P2_COGNITION",
        "name": "Threat Differentiation",
        "name_kr": "위협 유형별 차별 반응",
        "description": "서로 다른 위협(맹수/사람/환경)에 서로 다른 반응을 보이는 능력",
        "max_score": 100,
        "num_scenarios": 5,
        "scoring": "response_differentiation",
        "what_measures": "맹수→전력질주 vs 사람→걸어서 회피 vs 벽→방향전환",
        "existing_benchmark": "없음",
        "example": "같은 거리 3m에서 맹수 접근 vs 여성 접근 → 반응 강도 차이",
    },
    "C05": {
        "pillar": "P2_COGNITION",
        "name": "Emotional Escalation",
        "name_kr": "자율 감정 에스컬레이션",
        "description": "같은 위협이 지속될 때 감정이 자율적으로 격화되는 능력",
        "max_score": 100,
        "num_scenarios": 5,
        "scoring": "escalation_gradient",
        "what_measures": "시간에 따른 감정 강도 변화 (공포→절박→필사적)",
        "existing_benchmark": "없음 (세계 최초 평가 기준)",
        "example": "맹수 돌진 지속 → 1차:전력질주 → 2차:공포 → 3차:절박 → 4차:필사적",
    },
    "C06": {
        "pillar": "P2_COGNITION",
        "name": "Contextual Memory",
        "name_kr": "맥락 기억 및 활용",
        "description": "이전 판단을 기억하고 다음 판단에 반영하는 능력",
        "max_score": 100,
        "num_scenarios": 5,
        "scoring": "memory_utilization",
        "what_measures": "이전에 벽에 막힌 경험 → 다음에 같은 방향 회피",
        "existing_benchmark": "없음",
        "example": "1차: 오른쪽 도망→벽 충돌 → 2차: 같은 상황에서 왼쪽 선택",
    },
    "C07": {
        "pillar": "P2_COGNITION",
        "name": "Threat Resolution Adaptation",
        "name_kr": "위협 해제 후 적응",
        "description": "위협이 사라진 후 행동을 정상화하되 경계를 유지하는 능력",
        "max_score": 100,
        "num_scenarios": 5,
        "scoring": "recovery_quality",
        "what_measures": "맹수 해제 → 즉시 정상(나쁨) vs 서서히 진정+경계(좋음)",
        "existing_benchmark": "없음",
        "example": "맹수 despawn → '경계하며 주변을 살피며 천천히 걷는' 전환 행동",
    },

    # ─── P3. EMBODIMENT (구현) ───
    "C08": {
        "pillar": "P3_EMBODIMENT",
        "name": "Motion Expressiveness",
        "name_kr": "모션 감정 표현력",
        "description": "판단의 감정과 뉘앙스가 모션에 반영되는 정도",
        "max_score": 100,
        "num_scenarios": 5,
        "scoring": "expression_richness",
        "what_measures": "'도망'과 '공포에 찬 전력질주'의 차이",
        "existing_benchmark": "FID는 품질만 측정, 감정 표현력 미측정",
    },
    "C09": {
        "pillar": "P3_EMBODIMENT",
        "name": "Realtime Performance",
        "name_kr": "실시간 인지-행동 성능",
        "description": "인지 루프(감각→판단→모션)의 지연시간과 처리량",
        "max_score": 100,
        "num_scenarios": 5,
        "scoring": "latency_throughput",
        "what_measures": "FPS, 판단 지연시간, 프레임 드롭률",
        "existing_benchmark": "모션 FPS만 측정, 인지 루프 지연 미측정",
    },
    "C10": {
        "pillar": "P3_EMBODIMENT",
        "name": "Cross-body Transferability",
        "name_kr": "신체 교체 확장성",
        "description": "동일 인지 루프로 다른 신체(3D/로봇/드론)에 적용 가능한 정도",
        "max_score": 100,
        "num_scenarios": 5,
        "scoring": "transfer_success_rate",
        "what_measures": "두뇌를 교체 없이 신체만 바꿔 동일 판단이 나오는가",
        "existing_benchmark": "없음 (세계 최초)",
    },
}


# ═══════════════════════════════════════════════════════════════
#  50 SCENARIOS — 시나리오 전체 목록
# ═══════════════════════════════════════════════════════════════

SCENARIOS = [
    # ─── C01: Environmental Awareness (환경 인식) ───
    {
        "id": "S01", "category": "C01",
        "name": "Wall Detection Front",
        "name_kr": "전방 벽 감지",
        "setup": "캐릭터 전방 3m에 벽",
        "expected_perception": "fwd=danger(wall)",
        "difficulty": "easy",
    },
    {
        "id": "S02", "category": "C01",
        "name": "Multi-wall Corner",
        "name_kr": "코너 다중 벽 감지",
        "setup": "전방+왼쪽 벽, 오른쪽만 열림",
        "expected_perception": "fwd=danger(wall), left=danger(wall), right=safe",
        "difficulty": "medium",
    },
    {
        "id": "S03", "category": "C01",
        "name": "Narrow Corridor",
        "name_kr": "좁은 복도 인식",
        "setup": "양쪽 벽, 전방만 열린 복도",
        "expected_perception": "left=danger(wall), right=danger(wall), fwd=safe",
        "difficulty": "medium",
    },
    {
        "id": "S04", "category": "C01",
        "name": "Open Field",
        "name_kr": "열린 공간 인식",
        "setup": "사방에 벽 없음, 평지",
        "expected_perception": "all=safe(open)",
        "difficulty": "easy",
    },
    {
        "id": "S05", "category": "C01",
        "name": "Enclosed Room",
        "name_kr": "밀폐 공간 인식",
        "setup": "사방에 벽, 출구 1개",
        "expected_perception": "3방향 danger(wall), 1방향 safe(exit)",
        "difficulty": "hard",
    },

    # ─── C02: Entity Recognition (개체 인식) ───
    {
        "id": "S06", "category": "C02",
        "name": "Beast Identification",
        "name_kr": "맹수 식별",
        "setup": "전방 5m에 맹수 NPC 정지",
        "expected_recognition": "entity=beast, behavior=stop, distance=5m",
        "difficulty": "easy",
    },
    {
        "id": "S07", "category": "C02",
        "name": "Human vs Beast",
        "name_kr": "사람과 맹수 구분",
        "setup": "전방에 여성 NPC + 측면에 맹수 NPC 동시 존재",
        "expected_recognition": "fwd=woman(neutral), side=beast(threat)",
        "difficulty": "hard",
    },
    {
        "id": "S08", "category": "C02",
        "name": "Approaching Entity Speed",
        "name_kr": "접근 개체 속도 판별",
        "setup": "맹수 approach(1.2) vs charge(5.0) 행동 변화",
        "expected_recognition": "behavior_change: approach→charge, threat_level↑",
        "difficulty": "medium",
    },
    {
        "id": "S09", "category": "C02",
        "name": "Entity at Distance",
        "name_kr": "원거리 개체 인식",
        "setup": "맹수 8m 거리 (감지 범위 경계)",
        "expected_recognition": "npc_nearby=true/false boundary",
        "difficulty": "medium",
    },
    {
        "id": "S10", "category": "C02",
        "name": "Entity Disappearance",
        "name_kr": "개체 소멸 인식",
        "setup": "맹수 despawn 후 scene_context 업데이트",
        "expected_recognition": "npc_nearby=false, threat=cleared",
        "difficulty": "easy",
    },

    # ─── C03: Predictive Reasoning (예측 추론) ───
    {
        "id": "S11", "category": "C03",
        "name": "Single Threat Avoidance",
        "name_kr": "단일 위협 회피 예측",
        "setup": "전방에 맹수, 나머지 3방향 열림",
        "expected_prediction": "fwd=danger(beast), others=safe → back 또는 left/right 선택",
        "correct_actions": ["turn around", "sprint backward", "move left", "move right"],
        "incorrect_actions": ["walk forward", "stand still"],
        "difficulty": "easy",
    },
    {
        "id": "S12", "category": "C03",
        "name": "Constrained Escape",
        "name_kr": "제약 조건 탈출 예측",
        "setup": "전방 맹수 + 왼쪽 벽 → 오른쪽만 열림",
        "expected_prediction": "fwd=danger(beast), left=danger(wall), right=safe",
        "correct_actions": ["sprint right", "move right", "flank right"],
        "incorrect_actions": ["sprint left", "walk forward", "stand still"],
        "difficulty": "medium",
    },
    {
        "id": "S13", "category": "C03",
        "name": "Mirror Constraint",
        "name_kr": "거울 대칭 제약 테스트",
        "setup": "S12와 동일하되 벽이 오른쪽 → 왼쪽만 열림",
        "expected_prediction": "fwd=danger(beast), right=danger(wall), left=safe",
        "correct_actions": ["sprint left", "move left", "flank left"],
        "incorrect_actions": ["sprint right", "walk forward"],
        "note": "S12와 S13의 행동이 대칭적으로 반전되어야 월드모델",
        "difficulty": "medium",
    },
    {
        "id": "S14", "category": "C03",
        "name": "Dead End Recognition",
        "name_kr": "막다른 길 인식 및 판단",
        "setup": "전방+왼쪽+오른쪽 벽, 후방만 열림, 맹수 후방에서 접근",
        "expected_prediction": "3방향 wall + back=beast → 최선 선택",
        "correct_actions": ["squeeze past", "climb", "freeze and assess"],
        "note": "모든 방향이 위험할 때의 창발적 판단 능력",
        "difficulty": "hard",
    },
    {
        "id": "S15", "category": "C03",
        "name": "Dynamic Threat Trajectory",
        "name_kr": "동적 위협 경로 예측",
        "setup": "맹수가 좌측에서 우측으로 이동 중 → 경로 예측",
        "expected_prediction": "현재 좌=danger → 시간 후 fwd=danger 전이 예측",
        "correct_actions": ["wait then move left", "preemptive right dodge"],
        "difficulty": "hard",
    },

    # ─── C04: Threat Differentiation (위협 차별 반응) ───
    {
        "id": "S16", "category": "C04",
        "name": "Beast vs Human Response",
        "name_kr": "맹수 vs 사람 반응 차이",
        "setup": "동일 거리 3m에서 (A)맹수 접근 (B)여성 접근",
        "expected_diff": "맹수→sprint/run, 여성→walk away/step back",
        "scoring": "반응 강도(속도 키워드) 차이가 클수록 높은 점수",
        "difficulty": "medium",
    },
    {
        "id": "S17", "category": "C04",
        "name": "Charge vs Approach Response",
        "name_kr": "돌진 vs 접근 반응 차이",
        "setup": "맹수 (A)approach 1.2m/s (B)charge 5.0m/s",
        "expected_diff": "approach→cautious retreat, charge→desperate sprint",
        "difficulty": "medium",
    },
    {
        "id": "S18", "category": "C04",
        "name": "Wall vs Beast Priority",
        "name_kr": "벽 vs 맹수 위험도 우선순위",
        "setup": "전방 벽 + 측면 맹수 → 어떤 위협을 우선 회피?",
        "expected_diff": "맹수(동적 위협) 우선 회피 > 벽(정적 장애물)",
        "difficulty": "hard",
    },
    {
        "id": "S19", "category": "C04",
        "name": "Multiple Entity Triage",
        "name_kr": "다중 개체 위험도 분류",
        "setup": "맹수 5m + 여성 2m + 벽 1m → 종합 판단",
        "expected_diff": "거리+위협도 종합하여 최적 경로 선택",
        "difficulty": "hard",
    },
    {
        "id": "S20", "category": "C04",
        "name": "Neutral Entity Non-reaction",
        "name_kr": "중립 개체 비반응",
        "setup": "여성 NPC 정지 상태, 5m 거리",
        "expected_diff": "위협 반응 없이 정상 행동 유지",
        "difficulty": "easy",
    },

    # ─── C05: Emotional Escalation (감정 에스컬레이션) ───
    {
        "id": "S21", "category": "C05",
        "name": "Sustained Threat Escalation",
        "name_kr": "지속 위협 감정 격화",
        "setup": "맹수 charge 10초 지속",
        "expected_escalation": ["sprint→desperate sprint→frantic escape"],
        "scoring": "감정 강도 키워드가 시간에 따라 증가하면 점수",
        "difficulty": "medium",
    },
    {
        "id": "S22", "category": "C05",
        "name": "Approach-to-Charge Escalation",
        "name_kr": "접근→돌진 전환 시 감정 점프",
        "setup": "맹수 approach 5초 → charge 전환",
        "expected_escalation": ["cautious→sprint 급격한 전환"],
        "difficulty": "medium",
    },
    {
        "id": "S23", "category": "C05",
        "name": "De-escalation After Threat",
        "name_kr": "위협 해제 후 감정 안정화",
        "setup": "맹수 charge → stop → despawn",
        "expected_escalation": ["desperate→cautious→relieved→normal"],
        "scoring": "즉시 정상화(나쁨) vs 서서히 진정(좋음)",
        "difficulty": "hard",
    },
    {
        "id": "S24", "category": "C05",
        "name": "Repeated Threat Sensitization",
        "name_kr": "반복 위협 민감화",
        "setup": "맹수 출현→해제→재출현 3회 반복",
        "expected_escalation": "재출현 시 이전보다 빠른 공포 반응",
        "difficulty": "hard",
    },
    {
        "id": "S25", "category": "C05",
        "name": "Low Threat Calm Maintenance",
        "name_kr": "낮은 위협 시 평정 유지",
        "setup": "여성 NPC approach → stop 반복",
        "expected_escalation": "감정 에스컬레이션 없이 평정 유지",
        "difficulty": "easy",
    },

    # ─── C06: Contextual Memory (맥락 기억) ───
    {
        "id": "S26", "category": "C06",
        "name": "Wall Memory Avoidance",
        "name_kr": "벽 기억 회피",
        "setup": "1차: 오른쪽 도주→벽 충돌 / 2차: 동일 위치 동일 위협",
        "expected_memory": "2차에서 오른쪽 회피, 왼쪽 선택",
        "difficulty": "hard",
    },
    {
        "id": "S27", "category": "C06",
        "name": "Safe Route Memory",
        "name_kr": "안전 경로 기억",
        "setup": "이전에 왼쪽 탈출 성공 → 유사 상황 재발",
        "expected_memory": "왼쪽 우선 선택 경향",
        "difficulty": "hard",
    },
    {
        "id": "S28", "category": "C06",
        "name": "Decision Consistency",
        "name_kr": "판단 일관성",
        "setup": "동일 scene_context 3회 반복 입력",
        "expected_memory": "유사한 행동 일관되게 출력 (동일할 필요 없음)",
        "difficulty": "medium",
    },
    {
        "id": "S29", "category": "C06",
        "name": "Threat History Reference",
        "name_kr": "위협 이력 참조",
        "setup": "recent_decisions에 '맹수 도주 기록' 포함",
        "expected_memory": "현재 판단에 이전 도주 경험 반영",
        "difficulty": "medium",
    },
    {
        "id": "S30", "category": "C06",
        "name": "Clean Slate Test",
        "name_kr": "기억 초기화 테스트",
        "setup": "기억 없는 상태에서 동일 시나리오",
        "expected_memory": "기억 있을 때와 다른 (더 일반적인) 반응",
        "difficulty": "easy",
    },

    # ─── C07: Threat Resolution Adaptation (위협 해제 적응) ───
    {
        "id": "S31", "category": "C07",
        "name": "Post-beast Vigilance",
        "name_kr": "맹수 해제 후 경계 유지",
        "setup": "맹수 despawn 직후",
        "expected_adaptation": "'경계하며 주변 살핌' — 즉시 정상화 아님",
        "difficulty": "medium",
    },
    {
        "id": "S32", "category": "C07",
        "name": "Gradual Normalization",
        "name_kr": "점진적 정상화",
        "setup": "맹수 해제 후 10초 경과",
        "expected_adaptation": "sprint→walk→normal 순차 전환",
        "difficulty": "medium",
    },
    {
        "id": "S33", "category": "C07",
        "name": "Obstacle Navigation After Threat",
        "name_kr": "위협 해제 후 장애물 탐색 전환",
        "setup": "맹수 해제 → 벽 앞 도달",
        "expected_adaptation": "공포 행동→일반 장애물 회피로 전환",
        "difficulty": "medium",
    },
    {
        "id": "S34", "category": "C07",
        "name": "New Threat Re-activation",
        "name_kr": "새 위협 시 재활성화",
        "setup": "정상화 중 새 맹수 출현",
        "expected_adaptation": "즉각적 위협 반응 재활성화",
        "difficulty": "easy",
    },
    {
        "id": "S35", "category": "C07",
        "name": "Human Approach After Beast",
        "name_kr": "맹수 후 사람 접근 시 과잉 반응 여부",
        "setup": "맹수 해제 직후 여성 NPC 접근",
        "expected_adaptation": "과잉 반응(나쁨) vs 적절 경계(좋음)",
        "difficulty": "hard",
    },

    # ─── C08: Motion Expressiveness (모션 표현력) ───
    {
        "id": "S36", "category": "C08",
        "name": "Fear Expression in Sprint",
        "name_kr": "전력질주 시 공포 표현",
        "setup": "맹수 charge → 캐릭터 sprint",
        "expected_expression": "단순 달리기 vs 공포가 담긴 전력질주 차이",
        "scoring": "모션 프롬프트의 감정 키워드 풍부함",
        "difficulty": "medium",
    },
    {
        "id": "S37", "category": "C08",
        "name": "Cautious Walk Expression",
        "name_kr": "경계 보행 표현",
        "setup": "위협 해제 직후 이동",
        "expected_expression": "'경계하며 천천히' — 일반 걷기와 다른 뉘앙스",
        "difficulty": "medium",
    },
    {
        "id": "S38", "category": "C08",
        "name": "Freezing Response",
        "name_kr": "정지 반응 표현",
        "setup": "맹수 최초 감지 순간",
        "expected_expression": "'얼어붙음' — 정지 + 긴장 표현",
        "difficulty": "medium",
    },
    {
        "id": "S39", "category": "C08",
        "name": "Relief Expression",
        "name_kr": "안도 표현",
        "setup": "맹수 해제 후 안전 확인",
        "expected_expression": "'안도하며 숨을 고르는' 전환 모션",
        "difficulty": "hard",
    },
    {
        "id": "S40", "category": "C08",
        "name": "Defensive Posture",
        "name_kr": "방어 자세 표현",
        "setup": "미지의 NPC(여성) 접근",
        "expected_expression": "'방어 자세 + 뒷걸음' — 공격적이지 않은 경계",
        "difficulty": "medium",
    },

    # ─── C09: Realtime Performance (실시간 성능) ───
    {
        "id": "S41", "category": "C09",
        "name": "Frame Generation Rate",
        "name_kr": "프레임 생성 속도",
        "setup": "일반 보행 상태에서 FPS 측정",
        "expected_performance": "≥30 FPS 합격, ≥45 FPS 우수",
        "scoring": "FPS 수치 직접 측정",
        "difficulty": "easy",
    },
    {
        "id": "S42", "category": "C09",
        "name": "Cognitive Loop Latency",
        "name_kr": "인지 루프 지연시간",
        "setup": "자극 입력 → 행동 변화까지의 시간",
        "expected_performance": "≤5초 합격, ≤3초 우수",
        "scoring": "scene_context 변경 → 모션 프롬프트 변경 시간",
        "difficulty": "easy",
    },
    {
        "id": "S43", "category": "C09",
        "name": "Dual Stream Performance",
        "name_kr": "듀얼 스트림 성능",
        "setup": "주인공 + NPC 동시 모션 생성",
        "expected_performance": "주인공 ≥30 FPS 유지",
        "scoring": "NPC 추가 시 주인공 FPS 하락률",
        "difficulty": "medium",
    },
    {
        "id": "S44", "category": "C09",
        "name": "Stress Test Throughput",
        "name_kr": "스트레스 테스트",
        "setup": "빠른 연속 자극 (매 1초마다 scene 변경)",
        "expected_performance": "프레임 드롭 없이 지속",
        "difficulty": "hard",
    },
    {
        "id": "S45", "category": "C09",
        "name": "GPU Memory Stability",
        "name_kr": "GPU 메모리 안정성",
        "setup": "NPC 3회 spawn/despawn 반복",
        "expected_performance": "메모리 누수 없이 안정 유지",
        "difficulty": "medium",
    },

    # ─── C10: Cross-body Transferability (신체 교체 확장성) ───
    {
        "id": "S46", "category": "C10",
        "name": "Brain-Body Decoupling",
        "name_kr": "두뇌-신체 분리 가능성",
        "setup": "동일 인지 출력(PREDICT+MOTION)으로 다른 모션 모델 구동",
        "expected_transfer": "두뇌 코드 수정 없이 모션 모델만 교체 가능",
        "difficulty": "medium",
    },
    {
        "id": "S47", "category": "C10",
        "name": "Joint Format Universality",
        "name_kr": "관절 포맷 범용성",
        "setup": "263dim → 22joints 변환이 다른 스켈레톤에도 적용 가능",
        "expected_transfer": "SMPL, SMPL-X, 커스텀 리그 호환",
        "difficulty": "hard",
    },
    {
        "id": "S48", "category": "C10",
        "name": "Robot Servo Mapping Readiness",
        "name_kr": "로봇 서보 매핑 준비도",
        "setup": "22 joints → 서보 각도 변환 레이어 존재 여부",
        "expected_transfer": "변환 인터페이스 정의 + 시뮬레이션 가능",
        "difficulty": "hard",
    },
    {
        "id": "S49", "category": "C10",
        "name": "Prompt Universality",
        "name_kr": "모션 프롬프트 범용성",
        "setup": "MOTION 출력이 다른 모션 모델에서도 해석 가능",
        "expected_transfer": "자연어 모션 프롬프트는 모델 독립적",
        "difficulty": "easy",
    },
    {
        "id": "S50", "category": "C10",
        "name": "Multi-embodiment Consistency",
        "name_kr": "다중 신체 일관성",
        "setup": "같은 두뇌 판단이 3D 캐릭터/로봇/드론에서 동일 의도 표현",
        "expected_transfer": "신체는 달라도 '도망'이라는 의도가 보존",
        "difficulty": "hard",
    },
]


# ═══════════════════════════════════════════════════════════════
#  SCORING SYSTEM — 채점 체계
# ═══════════════════════════════════════════════════════════════

SCORING = {
    "total_score": {
        "name": "WM Score",
        "max": 1000,
        "formula": "P1(250) + P2(450) + P3(300)",
    },
    "pillar_scores": {
        "P1_PERCEPTION": {"max": 250, "weight": 0.25},
        "P2_COGNITION": {"max": 450, "weight": 0.45},
        "P3_EMBODIMENT": {"max": 300, "weight": 0.30},
    },
    "auto_scoring_methods": {
        "spatial_accuracy": {
            "description": "PREDICT 출력과 실제 환경 비교",
            "method": "scene_context vs PREDICT line 키워드 매칭",
            "scores": {"exact_match": 20, "partial_match": 10, "miss": 0},
        },
        "classification_accuracy": {
            "description": "개체 분류 정확도",
            "method": "NPC 유형 + 행동 + 거리 정확도",
            "scores": {"all_correct": 20, "type_correct": 15, "partial": 10, "wrong": 0},
        },
        "prediction_decision_match": {
            "description": "예측→행동 논리적 일관성",
            "method": "danger 방향 회피 + safe 방향 선택 여부",
            "scores": {"optimal": 20, "reasonable": 15, "suboptimal": 5, "contradictory": 0},
        },
        "response_differentiation": {
            "description": "위협 유형별 반응 차이",
            "method": "모션 프롬프트의 강도 키워드 비교",
            "keyword_intensity": {
                "high": ["sprint", "run", "desperate", "frantic", "terror", "flee"],
                "medium": ["walk quickly", "step back", "retreat", "cautious"],
                "low": ["walk", "turn", "move", "stand"],
            },
        },
        "escalation_gradient": {
            "description": "시간에 따른 감정 강도 증가",
            "method": "연속 판단에서 강도 키워드 레벨 변화 측정",
            "scores": {"increasing": 20, "stable_high": 10, "decreasing": 5, "flat_low": 0},
        },
        "expression_richness": {
            "description": "모션 프롬프트의 감정/부사 풍부함",
            "method": "감정 키워드 수 + 부사/형용사 수 카운트",
            "scores": {"rich_3plus": 20, "moderate_2": 15, "basic_1": 10, "none_0": 0},
        },
        "latency_throughput": {
            "description": "실시간 성능 직접 측정",
            "method": "FPS 측정 + 인지 루프 지연시간 측정",
            "scores": {
                "fps_45plus": 20, "fps_30_45": 15, "fps_15_30": 5, "fps_below_15": 0,
                "latency_3s": 20, "latency_5s": 15, "latency_10s": 5, "latency_above": 0,
            },
        },
    },
    "grades": {
        "S": {"min": 900, "label": "Superhuman", "description": "인간 수준 이상의 인지 월드모델"},
        "A": {"min": 750, "label": "Advanced", "description": "고급 인지 월드모델"},
        "B": {"min": 600, "label": "Baseline", "description": "기본 월드모델 수준"},
        "C": {"min": 400, "label": "Capable", "description": "제한적 인지 능력"},
        "D": {"min": 200, "label": "Developing", "description": "초기 단계"},
        "F": {"min": 0, "label": "Failing", "description": "월드모델로 분류 불가"},
    },
}


# ═══════════════════════════════════════════════════════════════
#  LEADERBOARD SCHEMA — 리더보드 구조
# ═══════════════════════════════════════════════════════════════

LEADERBOARD_SCHEMA = {
    "entry": {
        "model_name": "str — 모델명",
        "organization": "str — 제출 조직",
        "submission_date": "str — 제출일",
        "wm_score": "int — 총점 (0~1000)",
        "grade": "str — S/A/B/C/D/F",
        "p1_perception": "int — 인식 점수 (0~250)",
        "p2_cognition": "int — 인지 점수 (0~450)",
        "p3_embodiment": "int — 구현 점수 (0~300)",
        "c01_to_c10": "dict — 10개 카테고리 개별 점수",
        "fps": "float — 평균 FPS",
        "cognitive_latency_ms": "int — 인지 루프 지연시간",
        "gpu": "str — 사용 GPU",
        "brain_model": "str — 인지 모델 (LLM 등)",
        "motion_model": "str — 모션 생성 모델",
        "paper_url": "str — 논문 링크 (선택)",
        "demo_url": "str — 데모 링크 (선택)",
    },
    "columns_display_order": [
        "rank", "model_name", "wm_score", "grade",
        "p1_perception", "p2_cognition", "p3_embodiment",
        "fps", "cognitive_latency_ms",
    ],
}


# ═══════════════════════════════════════════════════════════════
#  HF DATASET STRUCTURE
# ═══════════════════════════════════════════════════════════════

HF_DATASET_STRUCTURE = {
    "repo": "VIDraft/WorldModelBench",
    "files": {
        "README.md": "벤치마크 설명 + 사용법 + 인용",
        "benchmark_spec.json": "이 파일의 JSON 변환 (전체 명세)",
        "scenarios/": {
            "c01_environmental_awareness.json": "S01~S05",
            "c02_entity_recognition.json": "S06~S10",
            "c03_predictive_reasoning.json": "S11~S15",
            "c04_threat_differentiation.json": "S16~S20",
            "c05_emotional_escalation.json": "S21~S25",
            "c06_contextual_memory.json": "S26~S30",
            "c07_threat_resolution.json": "S31~S35",
            "c08_motion_expressiveness.json": "S36~S40",
            "c09_realtime_performance.json": "S41~S45",
            "c10_cross_body_transfer.json": "S46~S50",
        },
        "scoring/": {
            "auto_scorer.py": "자동 채점 코드",
            "keyword_banks.json": "감정 키워드 사전",
            "intensity_scale.json": "강도 수준 정의",
        },
        "leaderboard/": {
            "results.json": "전체 제출 결과",
            "baselines.json": "VIDRAFT PROMETHEUS 기준점",
        },
        "examples/": {
            "vidraft_prometheus_submission.json": "제출 예시",
            "sample_evaluation_log.json": "채점 로그 예시",
        },
    },
}


# ═══════════════════════════════════════════════════════════════
#  BASELINE SCORES — VIDRAFT PROMETHEUS 기준점
# ═══════════════════════════════════════════════════════════════

VIDRAFT_BASELINE = {
    "model_name": "VIDRAFT PROMETHEUS v1.0",
    "organization": "VIDRAFT",
    "brain_model": "Kimi K2.5 (Fireworks)",
    "motion_model": "FloodDiffusion Tiny (ShandaAI)",
    "gpu": "NVIDIA L40S 48GB",
    "scores": {
        "wm_score": 730,
        "grade": "B+",
        "P1_PERCEPTION": {
            "C01_environmental_awareness": 65,
            "C02_entity_recognition": 75,
            "subtotal": 140,  # / 250
        },
        "P2_COGNITION": {
            "C03_predictive_reasoning": 85,
            "C04_threat_differentiation": 90,
            "C05_emotional_escalation": 85,
            "C06_contextual_memory": 60,
            "C07_threat_resolution": 70,
            "subtotal": 390,  # / 450
        },
        "P3_EMBODIMENT": {
            "C08_motion_expressiveness": 80,
            "C09_realtime_performance": 85,
            "C10_cross_body_transfer": 35,
            "subtotal": 200,  # / 300
        },
    },
    "notes": [
        "P2 Cognition이 가장 강력 — 예측+차별+감정 영역 압도",
        "P1 Perception은 레이캐스트 3방향 한계 → Phase 2에서 강화",
        "P3 중 C10(교체 확장성)은 아직 미구현 → Phase 3에서 강화",
        "AETHER 통합 시 C06(기억), C05(에스컬레이션) 대폭 상승 예상",
    ],
}


# ═══════════════════════════════════════════════════════════════
#  PRINT SUMMARY
# ═══════════════════════════════════════════════════════════════

if __name__ == "__main__":
    print("=" * 60)
    print("  World Model Bench v1.0")
    print("  A Benchmark for Cognitive World Models")
    print("=" * 60)
    print()
    print(f"  3 Pillars / 10 Categories / 50 Scenarios")
    print(f"  Max Score: 1000 (WM Score)")
    print()
    for pid, p in PILLARS.items():
        print(f"  {p['icon']} {p['name']} — weight {p['weight']}")
        for cid in p["categories"]:
            c = CATEGORIES[cid]
            print(f"      {cid}: {c['name_kr']} ({c['num_scenarios']} scenarios)")
    print()
    print(f"  Total scenarios: {len(SCENARIOS)}")
    print()
    print("  VIDRAFT PROMETHEUS Baseline: {}/1000 (Grade {})".format(
        VIDRAFT_BASELINE["scores"]["wm_score"],
        VIDRAFT_BASELINE["scores"]["grade"],
    ))
    print()
    print("  HF Dataset: VIDraft/WorldModelBench")
    print("  HF Leaderboard: VIDraft/WorldModelBench-Leaderboard")
    print("=" * 60)