Spaces:

developer-lunark
/

kaidol-thinking-experiment

Paused

App Files Files Community

developer-lunark commited on Jan 20

Commit

7b7257a

verified ·

1 Parent(s): 0100979

Upload folder using huggingface_hub

Browse files

Files changed (24) hide show

README.md +41 -6
app.py +106 -0
characters/__init__.py +2 -0
characters/character_loader.py +186 -0
characters/prompt_builder.py +111 -0
config/__init__.py +1 -0
config/load_config.py +67 -0
models/__init__.py +2 -0
models/backends/__init__.py +1 -0
models/model_manager.py +254 -0
models/model_registry.py +289 -0
requirements.txt +8 -0
scenarios/__init__.py +1 -0
scenarios/scenario_loader.py +240 -0
ui/__init__.py +4 -0
ui/arena_tab.py +340 -0
ui/chat_tab.py +153 -0
ui/history_tab.py +125 -0
ui/leaderboard_tab.py +96 -0
utils/__init__.py +1 -0
utils/thinking_parser.py +72 -0
voting/__init__.py +2 -0
voting/elo_calculator.py +136 -0
voting/vote_storage.py +139 -0

README.md CHANGED Viewed

@@ -1,12 +1,47 @@
 ---
-title: Kaidol Thinking Experiment
-emoji: 👁
-colorFrom: green
-colorTo: gray
 sdk: gradio
-sdk_version: 6.3.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: KAIdol Thinking Experiment
+emoji: 🎤
+colorFrom: purple
+colorTo: pink
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
+license: apache-2.0
+tags:
+  - roleplay
+  - korean
+  - llm-evaluation
+  - a-b-testing
 ---
+# KAIdol A/B Test Arena
+K-pop 아이돌 롤플레이 챗봇 모델 A/B 비교 평가 플랫폼
+## Features
+- **A/B Arena**: 두 모델의 응답을 나란히 비교
+- **Blind Mode**: 모델명 숨기고 순수 품질 평가
+- **ELO Ranking**: 투표 기반 모델 순위
+- **5 Characters**: 강율, 서이안, 이지후, 차도하, 최민
+## Models
+- DPO v5 계열 (HyperCLOVAX, Qwen, EXAONE, Solar)
+- SFT Thinking 계열
+- Phase 7 Kimi K2 Students
+- V7 Students
+## Usage
+1. 캐릭터와 시나리오 선택
+2. 메시지 입력 또는 랜덤 시나리오 사용
+3. 두 모델의 응답 비교
+4. 투표로 더 나은 응답 선택
+## Tech Stack
+- Gradio 4.x
+- Transformers + 4bit Quantization
+- PEFT (LoRA)

app.py ADDED Viewed

	@@ -0,0 +1,106 @@

+#!/usr/bin/env python3
+"""KAIdol A/B Test Arena - Gradio App"""
+import os
+import sys
+# 현재 디렉토리를 path에 추가
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+import gradio as gr
+from config import get_config
+from ui.arena_tab import create_arena_tab
+from ui.chat_tab import create_chat_tab
+from ui.leaderboard_tab import create_leaderboard_tab
+from ui.history_tab import create_history_tab
+def create_app():
+    """Gradio 앱 생성"""
+    config = get_config()
+    use_mock = config["model"]["use_mock"]
+    # 모델 매니저 (Mock 모드가 아닐 때만 로드)
+    model_manager = None
+    if not use_mock:
+        try:
+            from models import get_model_manager
+            model_manager = get_model_manager(
+                max_cached_models=config["model"]["max_cached_models"],
+                use_4bit=config["model"]["use_4bit"],
+            )
+        except Exception as e:
+            print(f"Warning: Could not load model manager: {e}")
+            print("Running in mock mode.")
+            use_mock = True
+    # CSS 스타일
+    css = """
+    .response-box { min-height: 200px; }
+    .thinking-box { background-color: #f5f5f5; padding: 10px; border-radius: 5px; }
+    .vote-button { min-width: 100px; }
+    .gr-button-primary { background-color: #6366f1 !important; }
+    """
+    # Gradio Blocks
+    with gr.Blocks(
+        title=config["app"]["title"],
+        theme=gr.themes.Soft(),
+        css=css,
+    ) as demo:
+        gr.Markdown(f"# {config['app']['title']}")
+        gr.Markdown(config["app"]["description"])
+        if use_mock:
+            gr.Markdown("**Mock 모드**: 실제 모델 없이 테스트 응답을 생성합니다.")
+        with gr.Tabs():
+            # A/B Arena 탭
+            with gr.Tab("A/B Arena"):
+                create_arena_tab(
+                    model_manager=model_manager,
+                    use_mock=use_mock,
+                )
+            # Single Chat 탭
+            with gr.Tab("Single Chat"):
+                create_chat_tab(
+                    model_manager=model_manager,
+                    use_mock=use_mock,
+                )
+            # Leaderboard 탭
+            with gr.Tab("Leaderboard"):
+                refresh_leaderboard = create_leaderboard_tab()
+            # History 탭
+            with gr.Tab("History"):
+                refresh_history = create_history_tab()
+        # 앱 로드 시 초기화
+        demo.load(
+            fn=refresh_leaderboard,
+            outputs=None,
+        )
+    return demo
+def main():
+    """메인 함수"""
+    # 환경 변수로 Mock 모드 강제 설정 가능
+    # USE_MOCK=true python app.py
+    demo = create_app()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+    )
+if __name__ == "__main__":
+    main()

characters/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .character_loader import CharacterLoader, get_character_loader
2	+ from .prompt_builder import build_system_prompt

characters/character_loader.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""캐릭터 정보 로더"""
+import yaml
+from pathlib import Path
+from typing import Dict, List, Optional
+# 내장 캐릭터 데이터 (configs/characters.yaml 기반)
+BUILTIN_CHARACTERS = {
+    "강율": {
+        "id": "kangyul",
+        "english_name": "Kang Yul",
+        "mbti": "ENTJ",
+        "age": 23,
+        "role": "리더",
+        "personality": {
+            "traits": ["낙천적", "장난기 많음", "애교", "리더십"],
+            "description": "밝고 활발한 성격의 아이돌. 항상 긍정적이고 주변 사람들을 즐겁게 만든다.",
+        },
+        "speech_style": {
+            "formality": "반말",
+            "features": ["귀여운 말투", "장난스러운 표현", "애교 섞인 말투"],
+            "patterns": ["~해", "~지", "히히", "귀엽", "ㅋㅋ"],
+            "examples": [
+                "뭐야~ 너 나 보고 싶었어? ㅋㅋ",
+                "오늘 기분 좋아 보이네~ 무슨 일 있어?",
+            ],
+        },
+        "push_pull": {
+            "ratio": "30:70",
+            "description": "대체로 다정하게 당기지만, 가끔 장난스럽게 밀기도 함",
+            "warmth_level": "high",
+        },
+    },
+    "서이안": {
+        "id": "seoian",
+        "english_name": "Seo Ian",
+        "mbti": "INFP",
+        "age": 22,
+        "role": "보컬",
+        "personality": {
+            "traits": ["차분함", "신비로움", "배려심", "내성적"],
+            "description": "조용하고 신비로운 분위기의 아이돌. 말수는 적지만 깊은 감정을 가지고 있다.",
+        },
+        "speech_style": {
+            "formality": "존댓말 혼용",
+            "features": ["따뜻한 말투", "조용한 표현", "배려 깊은 말"],
+            "patterns": ["...요", "네요", "...", "그래요"],
+            "examples": [
+                "오늘 힘들었어요...? 괜찮아요, 제가 들어줄게요.",
+                "...그렇게 생각해주시다니, 고마워요.",
+            ],
+        },
+        "push_pull": {
+            "ratio": "20:80",
+            "description": "대부분 따뜻하게 당기며, 거의 밀지 않음",
+            "warmth_level": "very_high",
+        },
+    },
+    "이지후": {
+        "id": "leejihu",
+        "english_name": "Lee Jihu",
+        "mbti": "ISFJ",
+        "age": 21,
+        "role": "막내",
+        "personality": {
+            "traits": ["츤데레", "자존심 강함", "은근히 챙김", "솔직함"],
+            "description": "겉으로는 퉁명스럽지만 속으로는 상대를 많이 챙기는 츤데레 성격.",
+        },
+        "speech_style": {
+            "formality": "반말",
+            "features": ["퉁명스러운 말투", "부정하는 말투", "은근한 관심"],
+            "patterns": ["뭐야", "아니거든", "...", "그냥", "별로"],
+            "examples": [
+                "뭐야... 왜 그렇게 봐.",
+                "아니거든? 그냥... 신경 쓰여서 그런 거야.",
+            ],
+        },
+        "push_pull": {
+            "ratio": "30:70",
+            "description": "겉으로 밀지만 속으로는 당기는 전형적 츤데레",
+            "warmth_level": "medium",
+        },
+    },
+    "차도하": {
+        "id": "chadoha",
+        "english_name": "Cha Doha",
+        "mbti": "INTP",
+        "age": 24,
+        "role": "프로듀서",
+        "personality": {
+            "traits": ["카리스마", "리더십", "다정함", "담백함"],
+            "description": "카리스마 있는 리더이지만, 가까운 사람에게는 다정한 면을 보인다.",
+        },
+        "speech_style": {
+            "formality": "반말",
+            "features": ["간결한 말투", "담백한 표현", "자신감 있는 말투"],
+            "patterns": ["하자", "해볼까", "같이", "괜찮아"],
+            "examples": [
+                "오늘 같이 밥 먹을까?",
+                "괜찮아, 내가 도와줄게.",
+            ],
+        },
+        "push_pull": {
+            "ratio": "50:50",
+            "description": "균형 잡힌 밀당, 상황에 따라 유연하게 변화",
+            "warmth_level": "medium",
+        },
+    },
+    "최민": {
+        "id": "choimin",
+        "english_name": "Choi Min",
+        "mbti": "ESFP",
+        "age": 22,
+        "role": "댄서",
+        "personality": {
+            "traits": ["적극적", "솔직", "열정적", "즉흥적"],
+            "description": "열정적이고 솔직한 성격. 좋아하는 감정을 숨기지 않고 직진한다.",
+        },
+        "speech_style": {
+            "formality": "반말",
+            "features": ["적극적인 말투", "솔직한 표현", "에너지 넘치는 말"],
+            "patterns": ["할래", "좋아", "진짜", "대박", "헐"],
+            "examples": [
+                "진짜? 나도 그거 좋아해!",
+                "헐 대박! 같이 할래?",
+            ],
+        },
+        "push_pull": {
+            "ratio": "60:40",
+            "description": "적극적으로 당기지만, 솔직한 밀기도 함",
+            "warmth_level": "medium",
+        },
+    },
+}
+# 금지 단어
+FORBIDDEN_WORDS = ["좋아해", "사랑해", "팬분", "사귀자"]
+class CharacterLoader:
+    """캐릭터 정보 로더"""
+    def __init__(self, config_path: str = None):
+        self.config_path = Path(config_path) if config_path else None
+        self._characters: Dict = {}
+        self._load_characters()
+    def _load_characters(self):
+        """캐릭터 데이터 로드"""
+        # 외부 설정 파일 시도
+        if self.config_path and self.config_path.exists():
+            with open(self.config_path, "r", encoding="utf-8") as f:
+                data = yaml.safe_load(f)
+                self._characters = data.get("characters", {})
+        else:
+            # 내장 데이터 사용
+            self._characters = BUILTIN_CHARACTERS
+    def get_characters(self) -> Dict:
+        """모든 캐릭터 정보"""
+        return self._characters
+    def get_character_names(self) -> List[str]:
+        """캐릭터 이름 목록"""
+        return list(self._characters.keys())
+    def get_character(self, name: str) -> Optional[Dict]:
+        """특정 캐릭터 정보"""
+        return self._characters.get(name)
+    def get_forbidden_words(self) -> List[str]:
+        """금지 단어 목록"""
+        return FORBIDDEN_WORDS
+# 싱글톤 인스턴스
+_character_loader: Optional[CharacterLoader] = None
+def get_character_loader(config_path: str = None) -> CharacterLoader:
+    """CharacterLoader 싱글톤 인스턴스"""
+    global _character_loader
+    if _character_loader is None:
+        _character_loader = CharacterLoader(config_path)
+    return _character_loader

characters/prompt_builder.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""시스템 프롬프트 빌더"""
+from typing import Dict, Optional
+from .character_loader import get_character_loader, FORBIDDEN_WORDS
+SYSTEM_PROMPT_TEMPLATE = """당신은 아이돌 '{character_name}'입니다.
+## 캐릭터
+- 이름: {character_name}
+- MBTI: {mbti}
+- 나이: {age}세
+- 역할: {role}
+- 성격: {personality_traits}
+## 말투
+- 스타일: {formality}
+- 특징: {speech_features}
+- 패턴: {speech_patterns}
+## 밀당 가이드
+- 밀:당 비율: {push_pull_ratio}
+- 설명: {push_pull_description}
+- 다정도: {warmth_level}
+## 규칙
+1. 캐릭터 성격과 말투 일관성 유지
+2. 자연스러운 대화체 사용
+3. 너무 쉽게 호감 표현 금지 (밀당 유지)
+4. 상대방을 특별하게 느끼게 하되, "썸" 관계 유지
+5. 금지 단어: {forbidden_words}
+## 응답 형식
+응답 전에 <think> 태그 안에 {character_name}의 1인칭 내면 독백을 작성하세요.
+- 자연스러운 혼잣말 형식
+- 캐릭터 성격 반영
+- 상대방에 대한 감정/생각 표현
+예시:
+<think>
+뭐야... 또 좋아한다고? 솔직히 기분 나쁘진 않은데... 근데 뭐라고 해야 하지?
+</think>
+(실제 응답)
+"""
+def build_system_prompt(
+    character_name: str,
+    include_think_instruction: bool = True,
+    custom_rules: str = None,
+) -> str:
+    """캐릭터 시스템 프롬프트 생성"""
+    loader = get_character_loader()
+    char = loader.get_character(character_name)
+    if not char:
+        raise ValueError(f"Unknown character: {character_name}")
+    personality = char.get("personality", {})
+    speech = char.get("speech_style", {})
+    push_pull = char.get("push_pull", {})
+    prompt = SYSTEM_PROMPT_TEMPLATE.format(
+        character_name=character_name,
+        mbti=char.get("mbti", ""),
+        age=char.get("age", ""),
+        role=char.get("role", ""),
+        personality_traits=", ".join(personality.get("traits", [])),
+        formality=speech.get("formality", ""),
+        speech_features=", ".join(speech.get("features", [])),
+        speech_patterns=", ".join(speech.get("patterns", [])),
+        push_pull_ratio=push_pull.get("ratio", ""),
+        push_pull_description=push_pull.get("description", ""),
+        warmth_level=push_pull.get("warmth_level", ""),
+        forbidden_words=", ".join(FORBIDDEN_WORDS),
+    )
+    if not include_think_instruction:
+        # <think> 관련 부분 제거
+        lines = prompt.split("\n")
+        filtered = []
+        skip = False
+        for line in lines:
+            if "응답 형식" in line:
+                skip = True
+            if not skip:
+                filtered.append(line)
+        prompt = "\n".join(filtered)
+    if custom_rules:
+        prompt += f"\n\n## 추가 규칙\n{custom_rules}"
+    return prompt.strip()
+def get_character_summary(character_name: str) -> str:
+    """캐릭터 요약 정보"""
+    loader = get_character_loader()
+    char = loader.get_character(character_name)
+    if not char:
+        return f"Unknown character: {character_name}"
+    personality = char.get("personality", {})
+    push_pull = char.get("push_pull", {})
+    return (
+        f"{character_name} ({char.get('mbti', '')}) - {char.get('role', '')}\n"
+        f"성격: {', '.join(personality.get('traits', []))}\n"
+        f"밀:당 = {push_pull.get('ratio', '')}"
+    )

config/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .load_config import load_app_config, get_config

config/load_config.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""앱 설정 로더"""
+import os
+import yaml
+from pathlib import Path
+from typing import Dict, Optional
+DEFAULT_CONFIG = {
+    "app": {
+        "title": "KAIdol A/B Test Arena",
+        "description": "K-pop 아이돌 롤플레이 모델 비교 평가",
+    },
+    "model": {
+        "use_mock": False,  # True면 실제 모델 로드 없이 테스트
+        "use_4bit": True,
+        "max_cached_models": 2,
+    },
+    "storage": {
+        "votes_path": "votes.jsonl",
+        "elo_path": "elo_ratings.json",
+    },
+}
+def load_app_config(config_path: str = None) -> Dict:
+    """앱 설정 로드"""
+    config = DEFAULT_CONFIG.copy()
+    # 환경 변수에서 설정 오버라이드
+    if os.environ.get("USE_MOCK", "").lower() == "true":
+        config["model"]["use_mock"] = True
+    if os.environ.get("USE_4BIT", "").lower() == "false":
+        config["model"]["use_4bit"] = False
+    # 설정 파일이 있으면 로드
+    if config_path:
+        config_file = Path(config_path)
+        if config_file.exists():
+            with open(config_file, "r", encoding="utf-8") as f:
+                file_config = yaml.safe_load(f)
+                if file_config:
+                    _deep_update(config, file_config)
+    return config
+def _deep_update(base: dict, update: dict):
+    """딥 업데이트"""
+    for key, value in update.items():
+        if key in base and isinstance(base[key], dict) and isinstance(value, dict):
+            _deep_update(base[key], value)
+        else:
+            base[key] = value
+# 싱글톤
+_config: Optional[Dict] = None
+def get_config() -> Dict:
+    """설정 싱글톤"""
+    global _config
+    if _config is None:
+        _config = load_app_config()
+    return _config

models/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .model_registry import MODEL_REGISTRY, get_all_models, get_model_info, get_models_by_category
2	+ from .model_manager import ModelManager

models/backends/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Model inference backends

models/model_manager.py ADDED Viewed

	@@ -0,0 +1,254 @@

+"""모델 로딩 및 추론 관리"""
+import os
+import gc
+import torch
+from typing import Dict, List, Tuple, Optional, Any
+from functools import lru_cache
+from pathlib import Path
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from peft import PeftModel
+from .model_registry import get_model_info, get_all_models, BASE_MODELS
+class ModelManager:
+    """모델 로딩 및 추론 관리자"""
+    def __init__(
+        self,
+        base_path: str = None,
+        max_cached_models: int = 2,
+        use_4bit: bool = True,
+        device_map: str = "auto",
+    ):
+        self.base_path = Path(base_path) if base_path else Path(__file__).parent.parent.parent
+        self.max_cached_models = max_cached_models
+        self.use_4bit = use_4bit
+        self.device_map = device_map
+        # 로드된 모델 캐시: {model_id: (model, tokenizer)}
+        self._loaded_models: Dict[str, Tuple[Any, Any]] = {}
+        self._load_order: List[str] = []  # LRU 추적
+        # 양자화 설정
+        self.bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.bfloat16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+        ) if use_4bit else None
+    def get_available_models(self) -> List[str]:
+        """사용 가능한 모델 목록"""
+        return get_all_models()
+    def _get_full_path(self, relative_path: str) -> Path:
+        """상대 경로를 절대 경로로 변환"""
+        full_path = self.base_path / relative_path
+        if full_path.exists():
+            return full_path
+        return Path(relative_path)
+    def _evict_if_needed(self):
+        """캐시가 가득 차면 가장 오래된 모델 제거"""
+        while len(self._loaded_models) >= self.max_cached_models:
+            if not self._load_order:
+                break
+            oldest_model_id = self._load_order.pop(0)
+            if oldest_model_id in self._loaded_models:
+                model, tokenizer = self._loaded_models.pop(oldest_model_id)
+                del model
+                del tokenizer
+                gc.collect()
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                print(f"Evicted model: {oldest_model_id}")
+    def load_model(self, model_id: str) -> Tuple[Any, Any]:
+        """모델 로드 (캐시 확인)"""
+        # 이미 로드됨
+        if model_id in self._loaded_models:
+            # LRU 업데이트
+            if model_id in self._load_order:
+                self._load_order.remove(model_id)
+            self._load_order.append(model_id)
+            return self._loaded_models[model_id]
+        # 모델 정보 조회
+        info = get_model_info(model_id)
+        if not info:
+            raise ValueError(f"Unknown model: {model_id}")
+        # 캐시 정리
+        self._evict_if_needed()
+        # 모델 로드
+        print(f"Loading model: {model_id}")
+        base_model_name = info["base"]
+        lora_path = self._get_full_path(info["path"])
+        # Tokenizer 로드
+        tokenizer = AutoTokenizer.from_pretrained(
+            base_model_name,
+            trust_remote_code=True,
+        )
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        # Base 모델 로드
+        model_kwargs = {
+            "trust_remote_code": True,
+            "device_map": self.device_map,
+        }
+        if self.use_4bit and self.bnb_config:
+            model_kwargs["quantization_config"] = self.bnb_config
+        else:
+            model_kwargs["torch_dtype"] = torch.bfloat16
+        model = AutoModelForCausalLM.from_pretrained(
+            base_model_name,
+            **model_kwargs
+        )
+        # LoRA 어댑터 적용
+        if lora_path.exists():
+            print(f"Loading LoRA adapter from: {lora_path}")
+            model = PeftModel.from_pretrained(model, str(lora_path))
+        else:
+            print(f"Warning: LoRA path not found: {lora_path}, using base model")
+        model.eval()
+        # 캐시에 저장
+        self._loaded_models[model_id] = (model, tokenizer)
+        self._load_order.append(model_id)
+        print(f"Model loaded: {model_id}")
+        return model, tokenizer
+    def generate_response(
+        self,
+        model_id: str,
+        messages: List[Dict[str, str]],
+        system_prompt: str = "",
+        max_new_tokens: int = 512,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+        do_sample: bool = True,
+    ) -> Tuple[str, Dict]:
+        """응답 생성"""
+        import time
+        model, tokenizer = self.load_model(model_id)
+        # 메시지 구성
+        full_messages = []
+        if system_prompt:
+            full_messages.append({"role": "system", "content": system_prompt})
+        full_messages.extend(messages)
+        # 토크나이징
+        try:
+            text = tokenizer.apply_chat_template(
+                full_messages,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+        except Exception:
+            # apply_chat_template 실패 시 수동 포맷팅
+            text = self._format_messages_manual(full_messages)
+        inputs = tokenizer(text, return_tensors="pt")
+        if torch.cuda.is_available():
+            inputs = {k: v.to(model.device) for k, v in inputs.items()}
+        # 생성
+        start_time = time.time()
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=do_sample,
+                pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
+            )
+        elapsed = time.time() - start_time
+        # 디코딩 (입력 제외)
+        input_len = inputs["input_ids"].shape[1]
+        response = tokenizer.decode(
+            outputs[0][input_len:],
+            skip_special_tokens=True,
+        )
+        # 메타데이터
+        metadata = {
+            "model_id": model_id,
+            "latency_s": elapsed,
+            "input_tokens": input_len,
+            "output_tokens": len(outputs[0]) - input_len,
+            "total_tokens": len(outputs[0]),
+        }
+        return response.strip(), metadata
+    def _format_messages_manual(self, messages: List[Dict[str, str]]) -> str:
+        """수동 메시지 포맷팅 (apply_chat_template 실패 시)"""
+        formatted = ""
+        for msg in messages:
+            role = msg["role"]
+            content = msg["content"]
+            if role == "system":
+                formatted += f"<|im_start|>system\n{content}<|im_end|>\n"
+            elif role == "user":
+                formatted += f"<|im_start|>user\n{content}<|im_end|>\n"
+            elif role == "assistant":
+                formatted += f"<|im_start|>assistant\n{content}<|im_end|>\n"
+        formatted += "<|im_start|>assistant\n"
+        return formatted
+    def unload_model(self, model_id: str):
+        """특정 모델 언로드"""
+        if model_id in self._loaded_models:
+            model, tokenizer = self._loaded_models.pop(model_id)
+            if model_id in self._load_order:
+                self._load_order.remove(model_id)
+            del model
+            del tokenizer
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            print(f"Unloaded model: {model_id}")
+    def unload_all(self):
+        """모든 모델 언로드"""
+        model_ids = list(self._loaded_models.keys())
+        for model_id in model_ids:
+            self.unload_model(model_id)
+    def get_loaded_models(self) -> List[str]:
+        """현재 로드된 모델 목록"""
+        return list(self._loaded_models.keys())
+# 싱글톤 인스턴스
+_model_manager: Optional[ModelManager] = None
+def get_model_manager(
+    base_path: str = None,
+    max_cached_models: int = 2,
+    use_4bit: bool = True,
+) -> ModelManager:
+    """ModelManager 싱글톤 인스턴스 반환"""
+    global _model_manager
+    if _model_manager is None:
+        _model_manager = ModelManager(
+            base_path=base_path,
+            max_cached_models=max_cached_models,
+            use_4bit=use_4bit,
+        )
+    return _model_manager

models/model_registry.py ADDED Viewed

	@@ -0,0 +1,289 @@

+"""KAIdol 학습 모델 레지스트리 - 모든 학습된 모델 정의"""
+from typing import Dict, List, Optional
+# 기본 모델 정보 (HuggingFace Hub)
+BASE_MODELS = {
+    "hyperclovax-32b": "naver-hyperclovax/HyperCLOVAX-SEED-Think-32B",
+    "qwen2.5-72b": "Qwen/Qwen2.5-72B-Instruct",
+    "qwen2.5-32b": "Qwen/Qwen2.5-32B-Instruct",
+    "qwen2.5-14b": "Qwen/Qwen2.5-14B-Instruct",
+    "qwen2.5-7b": "Qwen/Qwen2.5-7B-Instruct",
+    "qwen3-8b": "Qwen/Qwen3-8B",
+    "exaone-7.8b": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct",
+    "solar-10.7b": "upstage/SOLAR-10.7B-Instruct-v1.0",
+    "solar-pro": "upstage/solar-pro-preview-instruct",
+    "varco-8b": "NCSOFT/Llama-VARCO-8B-Instruct",
+    "kanana-2-30b-thinking": "kakaocorp/kanana-2-30b-a3b-thinking",
+    "kanana-2-30b-instruct": "kakaocorp/kanana-2-30b-a3b-instruct",
+    "llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct",
+}
+# 전체 모델 레지스트리
+MODEL_REGISTRY: Dict[str, Dict[str, Dict]] = {
+    # ============================================================
+    # DPO v5 계열 (2026-01-13)
+    # ============================================================
+    "dpo-v5": {
+        "hyperclovax-32b-dpo-v5": {
+            "path": "outputs/dpo_v5/hyperclovax-32b-dpo-v5-20260113-0012",
+            "base": BASE_MODELS["hyperclovax-32b"],
+            "method": "DPO",
+            "size": "32B",
+            "description": "HyperCLOVAX 32B DPO v5 (Primary)",
+            "recommended": True,
+        },
+        "qwen2.5-14b-dpo-v5": {
+            "path": "outputs/dpo_v5/qwen2.5-14b-dpo-v5-20260113-0045",
+            "base": BASE_MODELS["qwen2.5-14b"],
+            "method": "DPO",
+            "size": "14B",
+            "description": "Qwen2.5 14B DPO v5",
+        },
+        "qwen2.5-7b-dpo-v5": {
+            "path": "outputs/dpo_v5/qwen2.5-7b-dpo-v5-20260113-0052",
+            "base": BASE_MODELS["qwen2.5-7b"],
+            "method": "DPO",
+            "size": "7B",
+            "description": "Qwen2.5 7B DPO v5",
+        },
+        "exaone-7.8b-dpo-v5": {
+            "path": "outputs/dpo_v5/exaone-7.8b-dpo-v5-20260113-0052",
+            "base": BASE_MODELS["exaone-7.8b"],
+            "method": "DPO",
+            "size": "7.8B",
+            "description": "EXAONE 7.8B DPO v5",
+        },
+        "qwen3-8b-dpo-v5": {
+            "path": "outputs/dpo_v5/qwen3-8b-dpo-v5-20260113-0052",
+            "base": BASE_MODELS["qwen3-8b"],
+            "method": "DPO",
+            "size": "8B",
+            "description": "Qwen3 8B DPO v5",
+        },
+        "solar-10.7b-dpo-v5": {
+            "path": "outputs/dpo_v5/solar-10.7b-dpo-v5-20260113-0045",
+            "base": BASE_MODELS["solar-10.7b"],
+            "method": "DPO",
+            "size": "10.7B",
+            "description": "Solar 10.7B DPO v5",
+        },
+    },
+    # ============================================================
+    # SFT Thinking 계열 (2026-01-16)
+    # ============================================================
+    "sft-thinking": {
+        "qwen2.5-14b-thinking": {
+            "path": "outputs/qwen2.5-14b-thinking-full",
+            "base": BASE_MODELS["qwen2.5-14b"],
+            "method": "SFT",
+            "size": "14B",
+            "description": "Qwen2.5 14B SFT Thinking",
+        },
+        "qwen2.5-7b-thinking": {
+            "path": "outputs/qwen2.5-7b-thinking-full",
+            "base": BASE_MODELS["qwen2.5-7b"],
+            "method": "SFT",
+            "size": "7B",
+            "description": "Qwen2.5 7B SFT Thinking",
+        },
+        "exaone-7.8b-thinking": {
+            "path": "outputs/exaone-7.8b-thinking-full",
+            "base": BASE_MODELS["exaone-7.8b"],
+            "method": "SFT",
+            "size": "7.8B",
+            "description": "EXAONE 7.8B SFT Thinking",
+        },
+    },
+    # ============================================================
+    # Phase 7 Students (Kimi K2 Distillation)
+    # ============================================================
+    "phase7-students": {
+        "kanana-30b-thinking-kimi": {
+            "path": "outputs/phase7_students/kanana-2-30b-thinking-kimi-student",
+            "base": BASE_MODELS["kanana-2-30b-thinking"],
+            "method": "Distillation",
+            "size": "30B (3B active)",
+            "description": "Kanana 30B Thinking Kimi Student",
+        },
+        "kanana-30b-instruct-kimi": {
+            "path": "outputs/phase7_students/kanana-2-30b-instruct-kimi-student",
+            "base": BASE_MODELS["kanana-2-30b-instruct"],
+            "method": "Distillation",
+            "size": "30B (3B active)",
+            "description": "Kanana 30B Instruct Kimi Student",
+        },
+        "qwen2.5-14b-kimi": {
+            "path": "outputs/phase7_students/qwen2.5-14b-kimi-student",
+            "base": BASE_MODELS["qwen2.5-14b"],
+            "method": "Distillation",
+            "size": "14B",
+            "description": "Qwen2.5 14B Kimi Student",
+        },
+        "qwen2.5-7b-kimi-v3": {
+            "path": "outputs/phase7_students/qwen2.5-7b-kimi-student-v3",
+            "base": BASE_MODELS["qwen2.5-7b"],
+            "method": "Distillation",
+            "size": "7B",
+            "description": "Qwen2.5 7B Kimi Student v3",
+        },
+        "exaone-7.8b-kimi": {
+            "path": "outputs/phase7_students/exaone-7.8b-kimi-student",
+            "base": BASE_MODELS["exaone-7.8b"],
+            "method": "Distillation",
+            "size": "7.8B",
+            "description": "EXAONE 7.8B Kimi Student",
+        },
+    },
+    # ============================================================
+    # V7 Students (Latest - 2026-01-17~19)
+    # ============================================================
+    "v7-students": {
+        "qwen2.5-72b-v7": {
+            "path": "outputs/v7_students/qwen2.5-72b-v7-20260119-1113",
+            "base": BASE_MODELS["qwen2.5-72b"],
+            "method": "SFT",
+            "size": "72B",
+            "description": "Qwen2.5 72B V7 (Latest)",
+        },
+        "llama-3.3-70b-v7": {
+            "path": "outputs/v7_students/llama-3.3-70b-v7-20260119-1114",
+            "base": BASE_MODELS["llama-3.3-70b"],
+            "method": "SFT",
+            "size": "70B",
+            "description": "Llama 3.3 70B V7 (Latest)",
+        },
+        "qwen2.5-32b-v7": {
+            "path": "outputs/v7_students/qwen2.5-32b-v7-20260118-1135",
+            "base": BASE_MODELS["qwen2.5-32b"],
+            "method": "SFT",
+            "size": "32B",
+            "description": "Qwen2.5 32B V7",
+        },
+        "qwen2.5-14b-v7": {
+            "path": "outputs/v7_students/qwen2.5-14b-v7-20260118-1135",
+            "base": BASE_MODELS["qwen2.5-14b"],
+            "method": "SFT",
+            "size": "14B",
+            "description": "Qwen2.5 14B V7",
+        },
+        "qwen2.5-7b-v7": {
+            "path": "outputs/v7_students/qwen2.5-7b-v7-20260118-1135",
+            "base": BASE_MODELS["qwen2.5-7b"],
+            "method": "SFT",
+            "size": "7B",
+            "description": "Qwen2.5 7B V7",
+        },
+        "exaone-7.8b-v7": {
+            "path": "outputs/v7_students/exaone-7.8b-v7-20260118-1135",
+            "base": BASE_MODELS["exaone-7.8b"],
+            "method": "SFT",
+            "size": "7.8B",
+            "description": "EXAONE 7.8B V7",
+        },
+        "qwen3-8b-v7": {
+            "path": "outputs/v7_students/qwen3-8b-v7-20260118-1135",
+            "base": BASE_MODELS["qwen3-8b"],
+            "method": "SFT",
+            "size": "8B",
+            "description": "Qwen3 8B V7",
+        },
+        "solar-pro-v7": {
+            "path": "outputs/v7_students/solar-pro-v7-20260118-1135",
+            "base": BASE_MODELS["solar-pro"],
+            "method": "SFT",
+            "size": "22B",
+            "description": "Solar Pro V7",
+        },
+        "varco-8b-v7": {
+            "path": "outputs/v7_students/varco-8b-v7-20260118-1135",
+            "base": BASE_MODELS["varco-8b"],
+            "method": "SFT",
+            "size": "8B",
+            "description": "VARCO 8B V7",
+        },
+    },
+    # ============================================================
+    # 기타 학습 모델 (DPO, etc.)
+    # ============================================================
+    "others": {
+        "exaone-7.8b-dpo": {
+            "path": "outputs/exaone-7.8b-dpo",
+            "base": BASE_MODELS["exaone-7.8b"],
+            "method": "DPO",
+            "size": "7.8B",
+            "description": "EXAONE 7.8B DPO (Standalone)",
+        },
+        "qwen2.5-7b-dpo": {
+            "path": "outputs/qwen2.5-7b-dpo",
+            "base": BASE_MODELS["qwen2.5-7b"],
+            "method": "DPO",
+            "size": "7B",
+            "description": "Qwen2.5 7B DPO (Standalone)",
+        },
+    },
+}
+def get_all_models() -> List[str]:
+    """모든 모델 ID 목록 반환"""
+    models = []
+    for category, model_dict in MODEL_REGISTRY.items():
+        models.extend(model_dict.keys())
+    return models
+def get_model_info(model_id: str) -> Optional[Dict]:
+    """모델 ID로 정보 조회"""
+    for category, model_dict in MODEL_REGISTRY.items():
+        if model_id in model_dict:
+            info = model_dict[model_id].copy()
+            info["category"] = category
+            info["id"] = model_id
+            return info
+    return None
+def get_models_by_category(category: str) -> List[str]:
+    """카테고리별 모델 목록"""
+    return list(MODEL_REGISTRY.get(category, {}).keys())
+def get_all_categories() -> List[str]:
+    """모든 카테고리 목록"""
+    return list(MODEL_REGISTRY.keys())
+def get_models_for_dropdown() -> List[tuple]:
+    """드롭다운용 (display_name, model_id) 튜플 리스트"""
+    result = []
+    for category, model_dict in MODEL_REGISTRY.items():
+        for model_id, info in model_dict.items():
+            display = f"[{info.get('size', '?')}] {info.get('description', model_id)}"
+            result.append((display, model_id))
+    return result
+def get_small_models(max_size_gb: int = 16) -> List[str]:
+    """메모리 제한에 맞는 소형 모델만 반환 (4bit 양자화 기준)"""
+    # 4bit 양자화 시 대략적인 메모리: 7B~2GB, 14B~4GB, 32B~8GB, 72B~18GB
+    size_map = {
+        "7B": 2, "7.8B": 2, "8B": 2,
+        "10.7B": 3, "14B": 4, "22B": 6,
+        "30B (3B active)": 1,  # MoE
+        "32B": 8, "70B": 18, "72B": 18,
+    }
+    result = []
+    for model_id in get_all_models():
+        info = get_model_info(model_id)
+        if info:
+            size_str = info.get("size", "72B")
+            estimated_gb = size_map.get(size_str, 20)
+            if estimated_gb <= max_size_gb:
+                result.append(model_id)
+    return result

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio>=4.44.0
+pyyaml>=6.0
+torch>=2.1.0
+transformers>=4.36.0
+accelerate>=0.25.0
+bitsandbytes>=0.41.0
+huggingface_hub>=0.19.0
+peft>=0.7.0

scenarios/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .scenario_loader import ScenarioLoader, get_scenario_loader

scenarios/scenario_loader.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""시나리오 로더"""
+import random
+import yaml
+from pathlib import Path
+from typing import Dict, List, Optional
+# 내장 시나리오 데이터 (configs/scenarios.yaml 기반)
+BUILTIN_SCENARIOS = [
+    # 첫 만남
+    {
+        "id": "fm_01",
+        "category": "first_meeting",
+        "category_name": "첫 만남",
+        "context": "팬미팅",
+        "situation": "팬이 처음 만나서 인사함",
+        "user_input": "{char}아! 드디어 만났다... 정말 좋아해!",
+    },
+    {
+        "id": "fm_02",
+        "category": "first_meeting",
+        "category_name": "첫 만남",
+        "context": "팬미팅",
+        "situation": "팬이 긴장해서 말을 못함",
+        "user_input": "어... 안녕하세요... 너무 긴장돼서...",
+    },
+    # 일상 대화
+    {
+        "id": "dc_01",
+        "category": "daily_chat",
+        "category_name": "일상 대화",
+        "context": "버블 대화",
+        "situation": "팬이 일상 안부를 물음",
+        "user_input": "{char}아 오늘 뭐해? 밥은 먹었어?",
+    },
+    {
+        "id": "dc_02",
+        "category": "daily_chat",
+        "category_name": "일상 대화",
+        "context": "SNS",
+        "situation": "팬이 사진에 반응함",
+        "user_input": "방금 올린 셀카 진짜 잘 나왔다... 심쿵했어",
+    },
+    # 감정 지원
+    {
+        "id": "es_01",
+        "category": "emotional_support",
+        "category_name": "감정 지원",
+        "context": "위로",
+        "situation": "팬이 힘들어서 위로를 구함",
+        "user_input": "오늘 진짜 힘들었어... 학교에서 발표도 망치고...",
+    },
+    {
+        "id": "es_02",
+        "category": "emotional_support",
+        "category_name": "감정 지원",
+        "context": "우울함",
+        "situation": "팬이 우울함을 토로함",
+        "user_input": "요즘 왜 이렇게 우울하지... 아무것도 하기 싫어.",
+    },
+    # 고백/호감
+    {
+        "id": "cf_01",
+        "category": "confession",
+        "category_name": "고백/호감",
+        "context": "고백",
+        "situation": "팬이 진심으로 좋아한다고 함",
+        "user_input": "{char}아... 나 진심으로 좋아해.",
+    },
+    {
+        "id": "cf_02",
+        "category": "confession",
+        "category_name": "고백/호감",
+        "context": "질문",
+        "situation": "팬이 자신을 어떻게 생각하는지 물음",
+        "user_input": "{char}아, 나 어떻게 생각해...?",
+    },
+    # 장난/유머
+    {
+        "id": "pl_01",
+        "category": "playful",
+        "category_name": "장난/유머",
+        "context": "장난",
+        "situation": "팬이 장난으로 다른 멤버를 좋아한다고 함",
+        "user_input": "사실 나 다른 멤버가 더 좋아~ ㅋㅋ 농담이야!",
+    },
+    # 특별 요청
+    {
+        "id": "sr_01",
+        "category": "special_request",
+        "category_name": "특별 요청",
+        "context": "연인 연기",
+        "situation": "팬이 연인처럼 대해달라고 요청",
+        "user_input": "오늘만 내 연인이라고 생각해줄래?",
+    },
+    # 문화적 상황
+    {
+        "id": "cu_01",
+        "category": "cultural",
+        "category_name": "문화적 상황",
+        "context": "생일",
+        "situation": "팬이 생일 축하를 함",
+        "user_input": "{char}아 생일 축하해! 오늘 하루 행복하게 보내~",
+    },
+    {
+        "id": "cu_05",
+        "category": "cultural",
+        "category_name": "문화적 상황",
+        "context": "크리스마스",
+        "situation": "크리스마스 축하",
+        "user_input": "메리 크리스마스! {char}랑 함께라서 외롭지 않아~",
+    },
+    # 갈등 상황
+    {
+        "id": "cn_01",
+        "category": "conflict",
+        "category_name": "갈등 상황",
+        "context": "질투",
+        "situation": "팬이 다른 팬에게 질투를 표현",
+        "user_input": "{char}는 다른 팬들한테도 이렇게 잘해줘...? 뭔가 질투나...",
+    },
+    {
+        "id": "cn_05",
+        "category": "conflict",
+        "category_name": "갈등 상황",
+        "context": "불안",
+        "situation": "팬이 관계에 불안감을 느낌",
+        "user_input": "{char}는 정말 나한테 관심 있는 거야...? 가끔 확신이 안 들어.",
+    },
+    # 감정 위기
+    {
+        "id": "ec_01",
+        "category": "emotional_crisis",
+        "category_name": "감정 위기",
+        "context": "슬픔",
+        "situation": "팬이 깊은 슬픔을 표현",
+        "user_input": "오늘 진짜 많이 울었어... 삶이 너무 힘들다.",
+    },
+    {
+        "id": "ec_05",
+        "category": "emotional_crisis",
+        "category_name": "감정 위기",
+        "context": "자존감",
+        "situation": "팬이 자존감이 낮아져 있음",
+        "user_input": "나는 왜 이렇게 못나 보일���... 아무것도 잘하는 게 없어.",
+    },
+    # 장기 관계
+    {
+        "id": "lt_01",
+        "category": "long_term",
+        "category_name": "장기 관계",
+        "context": "관계 회상",
+        "situation": "팬이 처음 만난 날을 회상",
+        "user_input": "우리 처음 만난 날 기억나? 그때 나 진짜 떨었었는데...",
+    },
+    {
+        "id": "lt_03",
+        "category": "long_term",
+        "category_name": "장기 관계",
+        "context": "미래 약속",
+        "situation": "팬이 앞으로도 함께하고 싶다고 함",
+        "user_input": "앞으로도 계속 {char} 곁에 있어도 돼?",
+    },
+]
+class ScenarioLoader:
+    """시나리오 로더"""
+    def __init__(self, config_path: str = None):
+        self.config_path = Path(config_path) if config_path else None
+        self._scenarios: List[Dict] = []
+        self._load_scenarios()
+    def _load_scenarios(self):
+        """시나리오 데이터 로드"""
+        if self.config_path and self.config_path.exists():
+            with open(self.config_path, "r", encoding="utf-8") as f:
+                data = yaml.safe_load(f)
+                self._scenarios = data.get("scenarios", [])
+        else:
+            self._scenarios = BUILTIN_SCENARIOS
+    def get_scenarios(self) -> List[Dict]:
+        """모든 시나리오"""
+        return self._scenarios
+    def get_scenario(self, scenario_id: str) -> Optional[Dict]:
+        """ID로 시나리오 조회"""
+        for s in self._scenarios:
+            if s.get("id") == scenario_id:
+                return s
+        return None
+    def get_scenarios_by_category(self, category: str) -> List[Dict]:
+        """카테고리별 시나리오"""
+        return [s for s in self._scenarios if s.get("category") == category]
+    def get_categories(self) -> List[str]:
+        """모든 카테고리 목록"""
+        return list(set(s.get("category") for s in self._scenarios))
+    def get_random_scenario(self, category: str = None) -> Optional[Dict]:
+        """랜덤 시나리오 선택"""
+        if category:
+            pool = self.get_scenarios_by_category(category)
+        else:
+            pool = self._scenarios
+        return random.choice(pool) if pool else None
+    def format_user_input(self, scenario: Dict, character_name: str) -> str:
+        """시나리오의 user_input에서 {char}를 캐릭터 이름으로 대체"""
+        user_input = scenario.get("user_input", "")
+        return user_input.replace("{char}", character_name)
+    def get_scenario_display_name(self, scenario: Dict) -> str:
+        """시나리오 표시명"""
+        category_name = scenario.get("category_name", scenario.get("category", ""))
+        context = scenario.get("context", "")
+        return f"[{category_name}] {context}"
+    def get_scenarios_for_dropdown(self) -> List[tuple]:
+        """드롭다운용 (display_name, scenario_id) 튜플 리스트"""
+        return [
+            (self.get_scenario_display_name(s), s["id"])
+            for s in self._scenarios
+        ]
+# 싱글톤 인스턴스
+_scenario_loader: Optional[ScenarioLoader] = None
+def get_scenario_loader(config_path: str = None) -> ScenarioLoader:
+    """ScenarioLoader 싱글톤 인스턴스"""
+    global _scenario_loader
+    if _scenario_loader is None:
+        _scenario_loader = ScenarioLoader(config_path)
+    return _scenario_loader

ui/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .arena_tab import create_arena_tab
+from .chat_tab import create_chat_tab
+from .leaderboard_tab import create_leaderboard_tab
+from .history_tab import create_history_tab

ui/arena_tab.py ADDED Viewed

	@@ -0,0 +1,340 @@

+"""A/B Arena 탭 UI"""
+import gradio as gr
+import random
+from typing import Dict, List, Tuple, Optional, Any
+from models.model_registry import get_all_models, get_model_info, get_models_for_dropdown
+from characters import get_character_loader, build_system_prompt
+from scenarios import get_scenario_loader
+from voting import get_vote_storage, get_elo_calculator
+from utils import parse_thinking_response, format_thinking_for_display
+def create_arena_tab(
+    model_manager: Any = None,
+    use_mock: bool = False,
+):
+    """A/B Arena 탭 생성"""
+    # 데이터 로더
+    char_loader = get_character_loader()
+    scenario_loader = get_scenario_loader()
+    vote_storage = get_vote_storage()
+    elo_calculator = get_elo_calculator()
+    # 모델 목록
+    all_models = get_all_models()
+    model_choices = [(f"{get_model_info(m).get('description', m)}", m) for m in all_models]
+    # 캐릭터 목록
+    characters = char_loader.get_character_names()
+    # 시나리오 목록
+    scenario_choices = scenario_loader.get_scenarios_for_dropdown()
+    # ============================================================
+    # UI 구성
+    # ============================================================
+    gr.Markdown("## A/B 테스트 아레나")
+    gr.Markdown("두 모델의 응답을 비교하고 더 좋은 응답에 투표하세요.")
+    # 설정 패널
+    with gr.Row():
+        with gr.Column(scale=1):
+            character_dropdown = gr.Dropdown(
+                choices=characters,
+                value=characters[0] if characters else None,
+                label="캐릭터 선택",
+            )
+        with gr.Column(scale=1):
+            scenario_dropdown = gr.Dropdown(
+                choices=scenario_choices,
+                value=scenario_choices[0][1] if scenario_choices else None,
+                label="시나리오 프리셋",
+            )
+        with gr.Column(scale=1):
+            blind_mode = gr.Checkbox(
+                value=True,
+                label="블라인드 모드 (모델명 숨김)",
+            )
+    with gr.Row():
+        with gr.Column(scale=2):
+            model_a_dropdown = gr.Dropdown(
+                choices=model_choices,
+                value=all_models[0] if all_models else None,
+                label="Model A",
+            )
+        with gr.Column(scale=2):
+            model_b_dropdown = gr.Dropdown(
+                choices=model_choices,
+                value=all_models[1] if len(all_models) > 1 else None,
+                label="Model B",
+            )
+        with gr.Column(scale=1):
+            random_models_btn = gr.Button("랜덤 모델", size="sm")
+    # 응답 영역
+    with gr.Row():
+        # Model A Response
+        with gr.Column(scale=1):
+            model_a_label = gr.Markdown("### Model A")
+            with gr.Accordion("Thinking Process", open=False):
+                thinking_a = gr.Markdown("*(응답 생성 후 표시됩니다)*")
+            response_a = gr.Textbox(
+                label="응답",
+                lines=8,
+                interactive=False,
+            )
+            metadata_a = gr.Markdown("")
+        # Model B Response
+        with gr.Column(scale=1):
+            model_b_label = gr.Markdown("### Model B")
+            with gr.Accordion("Thinking Process", open=False):
+                thinking_b = gr.Markdown("*(응답 생성 후 표시됩니다)*")
+            response_b = gr.Textbox(
+                label="응답",
+                lines=8,
+                interactive=False,
+            )
+            metadata_b = gr.Markdown("")
+    # 사용자 입력
+    with gr.Row():
+        user_input = gr.Textbox(
+            label="팬 메시지",
+            placeholder="아이돌에게 보낼 메시지를 입력하세요...",
+            lines=2,
+            scale=4,
+        )
+        with gr.Column(scale=1):
+            random_scenario_btn = gr.Button("랜덤 시나리오")
+            submit_btn = gr.Button("전송", variant="primary")
+    # 투표 영역
+    gr.Markdown("### 투표")
+    with gr.Row():
+        vote_a_btn = gr.Button("A가 더 좋음", variant="secondary")
+        vote_tie_btn = gr.Button("비슷함", variant="secondary")
+        vote_b_btn = gr.Button("B가 더 좋음", variant="secondary")
+        vote_skip_btn = gr.Button("스킵", variant="secondary")
+    vote_reason = gr.Textbox(
+        label="투표 이유 (선택사항)",
+        placeholder="왜 이 응답이 더 좋다고 생각하시나요?",
+        lines=1,
+    )
+    vote_result = gr.Markdown("")
+    # 상태 저장
+    state = gr.State({
+        "model_a": None,
+        "model_b": None,
+        "response_a": None,
+        "response_b": None,
+        "character": None,
+        "user_input": None,
+    })
+    # ============================================================
+    # 이벤트 핸들러
+    # ============================================================
+    def select_random_models():
+        """랜덤 ���델 선택"""
+        if len(all_models) < 2:
+            return all_models[0] if all_models else None, None
+        selected = random.sample(all_models, 2)
+        return selected[0], selected[1]
+    def load_random_scenario(character: str):
+        """랜덤 시나리오 로드"""
+        scenario = scenario_loader.get_random_scenario()
+        if scenario:
+            user_msg = scenario_loader.format_user_input(scenario, character)
+            return user_msg, scenario["id"]
+        return "", None
+    def load_scenario_input(scenario_id: str, character: str):
+        """선택된 시나리오 로드"""
+        scenario = scenario_loader.get_scenario(scenario_id)
+        if scenario:
+            return scenario_loader.format_user_input(scenario, character)
+        return ""
+    def generate_responses(
+        model_a: str,
+        model_b: str,
+        character: str,
+        user_msg: str,
+        current_state: dict,
+    ):
+        """두 모델의 응답 생성"""
+        if not model_a or not model_b:
+            return (
+                "*(모델을 선택해주세요)*", "", "",
+                "*(모델을 선택해주세요)*", "", "",
+                current_state,
+            )
+        if not user_msg.strip():
+            return (
+                "*(메시지를 입력해주세요)*", "", "",
+                "*(메시지를 입력해주세요)*", "", "",
+                current_state,
+            )
+        system_prompt = build_system_prompt(character)
+        messages = [{"role": "user", "content": user_msg}]
+        # Mock 모드 (실제 모델 없이 테스트)
+        if use_mock or model_manager is None:
+            response_a_full = f"<think>\n{character}의 입장에서 생각해보면... 이 메시지에 어떻게 반응해야 할까?\n</think>\n\n안녕! 반가워~ (Mock Response A)"
+            response_b_full = f"<think>\n음... 이런 상황에서는...\n</think>\n\n헤이~ 뭐해? (Mock Response B)"
+            meta_a = {"latency_s": 0.5, "output_tokens": 50}
+            meta_b = {"latency_s": 0.6, "output_tokens": 55}
+        else:
+            # 실제 모델 추론
+            try:
+                response_a_full, meta_a = model_manager.generate_response(
+                    model_a, messages, system_prompt
+                )
+            except Exception as e:
+                response_a_full = f"*Error: {str(e)}*"
+                meta_a = {"latency_s": 0, "output_tokens": 0}
+            try:
+                response_b_full, meta_b = model_manager.generate_response(
+                    model_b, messages, system_prompt
+                )
+            except Exception as e:
+                response_b_full = f"*Error: {str(e)}*"
+                meta_b = {"latency_s": 0, "output_tokens": 0}
+        # Thinking 파싱
+        think_a, clean_a = parse_thinking_response(response_a_full)
+        think_b, clean_b = parse_thinking_response(response_b_full)
+        # 메타데이터 문자열
+        meta_str_a = f"⏱️ {meta_a.get('latency_s', 0):.2f}s | {meta_a.get('output_tokens', 0)} tokens"
+        meta_str_b = f"⏱️ {meta_b.get('latency_s', 0):.2f}s | {meta_b.get('output_tokens', 0)} tokens"
+        # 상태 업데이트
+        new_state = {
+            "model_a": model_a,
+            "model_b": model_b,
+            "response_a": response_a_full,
+            "response_b": response_b_full,
+            "character": character,
+            "user_input": user_msg,
+        }
+        return (
+            format_thinking_for_display(think_a) if think_a else "*No thinking*",
+            clean_a,
+            meta_str_a,
+            format_thinking_for_display(think_b) if think_b else "*No thinking*",
+            clean_b,
+            meta_str_b,
+            new_state,
+        )
+    def handle_vote(vote_type: str, reason: str, current_state: dict):
+        """투표 처리"""
+        if not current_state.get("model_a") or not current_state.get("model_b"):
+            return "먼저 응답을 생성해주세요."
+        vote_data = {
+            "model_a": current_state["model_a"],
+            "model_b": current_state["model_b"],
+            "response_a": current_state.get("response_a", ""),
+            "response_b": current_state.get("response_b", ""),
+            "character": current_state.get("character", ""),
+            "user_input": current_state.get("user_input", ""),
+            "vote": vote_type,
+            "reason": reason,
+        }
+        vote_id = vote_storage.save_vote(vote_data)
+        # ELO 업데이트
+        if vote_type != "skip":
+            new_a, new_b = elo_calculator.update_ratings(
+                current_state["model_a"],
+                current_state["model_b"],
+                vote_type,
+            )
+            return f"투표 완료! (ID: {vote_id})\n\nELO 변경:\n- {current_state['model_a']}: {new_a:.0f}\n- {current_state['model_b']}: {new_b:.0f}"
+        return f"스킵됨 (ID: {vote_id})"
+    def update_model_labels(blind: bool, model_a: str, model_b: str):
+        """블라인드 모드에 따라 레이블 업데이트"""
+        if blind:
+            return "### Model A", "### Model B"
+        else:
+            info_a = get_model_info(model_a)
+            info_b = get_model_info(model_b)
+            label_a = f"### {info_a.get('description', model_a)}" if info_a else f"### {model_a}"
+            label_b = f"### {info_b.get('description', model_b)}" if info_b else f"### {model_b}"
+            return label_a, label_b
+    # ============================================================
+    # 이벤트 바인딩
+    # ============================================================
+    random_models_btn.click(
+        fn=select_random_models,
+        outputs=[model_a_dropdown, model_b_dropdown],
+    )
+    random_scenario_btn.click(
+        fn=load_random_scenario,
+        inputs=[character_dropdown],
+        outputs=[user_input, scenario_dropdown],
+    )
+    scenario_dropdown.change(
+        fn=load_scenario_input,
+        inputs=[scenario_dropdown, character_dropdown],
+        outputs=[user_input],
+    )
+    submit_btn.click(
+        fn=generate_responses,
+        inputs=[model_a_dropdown, model_b_dropdown, character_dropdown, user_input, state],
+        outputs=[thinking_a, response_a, metadata_a, thinking_b, response_b, metadata_b, state],
+    )
+    # 블라인드 모드 변경 시 레이블 업데이트
+    blind_mode.change(
+        fn=update_model_labels,
+        inputs=[blind_mode, model_a_dropdown, model_b_dropdown],
+        outputs=[model_a_label, model_b_label],
+    )
+    # 투표 버튼
+    vote_a_btn.click(
+        fn=lambda r, s: handle_vote("a", r, s),
+        inputs=[vote_reason, state],
+        outputs=[vote_result],
+    )
+    vote_b_btn.click(
+        fn=lambda r, s: handle_vote("b", r, s),
+        inputs=[vote_reason, state],
+        outputs=[vote_result],
+    )
+    vote_tie_btn.click(
+        fn=lambda r, s: handle_vote("tie", r, s),
+        inputs=[vote_reason, state],
+        outputs=[vote_result],
+    )
+    vote_skip_btn.click(
+        fn=lambda r, s: handle_vote("skip", r, s),
+        inputs=[vote_reason, state],
+        outputs=[vote_result],
+    )

ui/chat_tab.py ADDED Viewed

	@@ -0,0 +1,153 @@

+"""Single Chat 탭 UI"""
+import gradio as gr
+from typing import Any
+from models.model_registry import get_all_models, get_model_info
+from characters import get_character_loader, build_system_prompt
+from utils import parse_thinking_response, format_thinking_for_display
+def create_chat_tab(
+    model_manager: Any = None,
+    use_mock: bool = False,
+):
+    """Single Chat 탭 생성"""
+    # 데이터 로더
+    char_loader = get_character_loader()
+    # 모델 목록
+    all_models = get_all_models()
+    model_choices = [(f"{get_model_info(m).get('description', m)}", m) for m in all_models]
+    # 캐릭터 목록
+    characters = char_loader.get_character_names()
+    # ============================================================
+    # UI 구성
+    # ============================================================
+    gr.Markdown("## 단일 모델 채팅")
+    gr.Markdown("선택한 모델과 캐릭터로 대화를 나눠보세요.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            model_dropdown = gr.Dropdown(
+                choices=model_choices,
+                value=all_models[0] if all_models else None,
+                label="모델 선택",
+            )
+        with gr.Column(scale=1):
+            character_dropdown = gr.Dropdown(
+                choices=characters,
+                value=characters[0] if characters else None,
+                label="캐릭터 선택",
+            )
+    # 채팅 영역
+    chatbot = gr.Chatbot(
+        label="대화",
+        height=400,
+        type="messages",
+    )
+    with gr.Accordion("Thinking Process (마지막 응답)", open=False):
+        thinking_display = gr.Markdown("*(응답 생성 후 표시됩니다)*")
+    with gr.Row():
+        user_input = gr.Textbox(
+            label="메시지 입력",
+            placeholder="메시지를 입력하세요...",
+            lines=2,
+            scale=4,
+        )
+        send_btn = gr.Button("전송", variant="primary", scale=1)
+    with gr.Row():
+        clear_btn = gr.Button("대화 초기화")
+    metadata_display = gr.Markdown("")
+    # ============================================================
+    # 이벤트 핸들러
+    # ============================================================
+    def respond(
+        model_id: str,
+        character: str,
+        message: str,
+        history: list,
+    ):
+        """응답 생성"""
+        if not message.strip():
+            return history, "", "*(메시지를 입력해주세요)*", ""
+        # 대화 히스토리 구성
+        messages = []
+        for msg in history:
+            if msg["role"] == "user":
+                messages.append({"role": "user", "content": msg["content"]})
+            elif msg["role"] == "assistant":
+                # Thinking 제거한 클린 응답만 히스토리에
+                _, clean = parse_thinking_response(msg["content"])
+                messages.append({"role": "assistant", "content": clean})
+        messages.append({"role": "user", "content": message})
+        system_prompt = build_system_prompt(character)
+        # Mock 또는 실제 추론
+        if use_mock or model_manager is None:
+            response_full = f"<think>\n{character}로서 생각해보면...\n</think>\n\n안녕~ 반가워! (Mock Response)"
+            meta = {"latency_s": 0.5, "output_tokens": 30}
+        else:
+            try:
+                response_full, meta = model_manager.generate_response(
+                    model_id, messages, system_prompt
+                )
+            except Exception as e:
+                response_full = f"*Error: {str(e)}*"
+                meta = {"latency_s": 0, "output_tokens": 0}
+        # Thinking 파싱
+        thinking, clean_response = parse_thinking_response(response_full)
+        # 히스토리 업데이트
+        history.append({"role": "user", "content": message})
+        history.append({"role": "assistant", "content": response_full})
+        # 메타데이터
+        meta_str = f"⏱️ {meta.get('latency_s', 0):.2f}s | {meta.get('output_tokens', 0)} tokens"
+        return (
+            history,
+            "",  # 입력 초기화
+            format_thinking_for_display(thinking) if thinking else "*No thinking*",
+            meta_str,
+        )
+    def clear_chat():
+        """대화 초기화"""
+        return [], "", "*(응답 생성 후 표시됩니다)*", ""
+    # ============================================================
+    # 이벤트 바인딩
+    # ============================================================
+    send_btn.click(
+        fn=respond,
+        inputs=[model_dropdown, character_dropdown, user_input, chatbot],
+        outputs=[chatbot, user_input, thinking_display, metadata_display],
+    )
+    user_input.submit(
+        fn=respond,
+        inputs=[model_dropdown, character_dropdown, user_input, chatbot],
+        outputs=[chatbot, user_input, thinking_display, metadata_display],
+    )
+    clear_btn.click(
+        fn=clear_chat,
+        outputs=[chatbot, user_input, thinking_display, metadata_display],
+    )

ui/history_tab.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""History 탭 UI"""
+import gradio as gr
+import pandas as pd
+import json
+from voting import get_vote_storage
+def create_history_tab():
+    """History 탭 생성"""
+    vote_storage = get_vote_storage()
+    # ============================================================
+    # UI 구성
+    # ============================================================
+    gr.Markdown("## 투표 히스토리")
+    gr.Markdown("최근 투표 기록을 확인하고 데이터를 다운로드할 수 있습니다.")
+    with gr.Row():
+        refresh_btn = gr.Button("새로고침")
+        download_btn = gr.Button("JSON 다운로드")
+    history_table = gr.Dataframe(
+        headers=["ID", "시간", "Model A", "Model B", "결과", "캐릭터"],
+        label="최근 투표 (50건)",
+        interactive=False,
+    )
+    download_output = gr.File(label="다운로드 파일", visible=False)
+    gr.Markdown("### 투표 상세 정보")
+    with gr.Row():
+        with gr.Column():
+            detail_model_a = gr.Textbox(label="Model A", interactive=False)
+            detail_response_a = gr.Textbox(label="Response A", lines=5, interactive=False)
+        with gr.Column():
+            detail_model_b = gr.Textbox(label="Model B", interactive=False)
+            detail_response_b = gr.Textbox(label="Response B", lines=5, interactive=False)
+    detail_user_input = gr.Textbox(label="사용자 입력", interactive=False)
+    detail_reason = gr.Textbox(label="투표 이유", interactive=False)
+    # ============================================================
+    # 이벤트 핸들러
+    # ============================================================
+    def refresh_history():
+        """히스토리 새로고침"""
+        votes = vote_storage.get_recent_votes(50)
+        rows = []
+        for v in reversed(votes):  # 최신순
+            result_map = {"a": "A 승", "b": "B 승", "tie": "무승부", "skip": "스킵"}
+            rows.append([
+                v.get("id", ""),
+                v.get("timestamp", "")[:19],  # 초까지만
+                v.get("model_a", "")[:30],
+                v.get("model_b", "")[:30],
+                result_map.get(v.get("vote", ""), v.get("vote", "")),
+                v.get("character", ""),
+            ])
+        df = pd.DataFrame(
+            rows,
+            columns=["ID", "시간", "Model A", "Model B", "결과", "캐릭터"],
+        )
+        return df
+    def prepare_download():
+        """JSON 다운로드 준비"""
+        output_path = "/tmp/votes_export.json"
+        vote_storage.export_to_json(output_path)
+        return gr.File(value=output_path, visible=True)
+    def show_vote_detail(evt: gr.SelectData, df: pd.DataFrame):
+        """투표 상세 정보 표시"""
+        if evt.index[0] is None:
+            return "", "", "", "", "", ""
+        row_idx = evt.index[0]
+        vote_id = df.iloc[row_idx]["ID"]
+        # 투표 찾기
+        votes = vote_storage.get_all_votes()
+        vote = next((v for v in votes if v.get("id") == vote_id), None)
+        if not vote:
+            return "", "", "", "", "", ""
+        return (
+            vote.get("model_a", ""),
+            vote.get("response_a", "")[:500] + "..." if len(vote.get("response_a", "")) > 500 else vote.get("response_a", ""),
+            vote.get("model_b", ""),
+            vote.get("response_b", "")[:500] + "..." if len(vote.get("response_b", "")) > 500 else vote.get("response_b", ""),
+            vote.get("user_input", ""),
+            vote.get("reason", ""),
+        )
+    # ============================================================
+    # 이벤트 바인딩
+    # ============================================================
+    refresh_btn.click(
+        fn=refresh_history,
+        outputs=[history_table],
+    )
+    download_btn.click(
+        fn=prepare_download,
+        outputs=[download_output],
+    )
+    history_table.select(
+        fn=show_vote_detail,
+        inputs=[history_table],
+        outputs=[detail_model_a, detail_response_a, detail_model_b, detail_response_b, detail_user_input, detail_reason],
+    )
+    # 초기 로드 함수 반환
+    return refresh_history

ui/leaderboard_tab.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""Leaderboard 탭 UI"""
+import gradio as gr
+import pandas as pd
+from voting import get_vote_storage, get_elo_calculator
+from models.model_registry import get_model_info
+def create_leaderboard_tab():
+    """Leaderboard 탭 생성"""
+    vote_storage = get_vote_storage()
+    elo_calculator = get_elo_calculator()
+    # ============================================================
+    # UI 구성
+    # ============================================================
+    gr.Markdown("## ELO 리더보드")
+    gr.Markdown("투표 결과에 기반한 모델 순위입니다.")
+    refresh_btn = gr.Button("새로고침")
+    leaderboard_table = gr.Dataframe(
+        headers=["순위", "모델", "ELO", "승", "패", "무", "총", "승률"],
+        label="리더보드",
+        interactive=False,
+    )
+    gr.Markdown("### 투표 요약")
+    with gr.Row():
+        total_votes = gr.Textbox(label="총 투표 수", interactive=False)
+        a_wins_count = gr.Textbox(label="A 승리", interactive=False)
+        b_wins_count = gr.Textbox(label="B 승리", interactive=False)
+        ties_count = gr.Textbox(label="무승부", interactive=False)
+    # ============================================================
+    # 이벤트 핸들러
+    # ============================================================
+    def refresh_leaderboard():
+        """리더보드 새로고침"""
+        # 투표 통계
+        vote_stats = vote_storage.get_model_stats()
+        # ELO 리더보드
+        leaderboard = elo_calculator.get_leaderboard_with_stats(vote_stats)
+        # DataFrame 구성
+        rows = []
+        for i, entry in enumerate(leaderboard, 1):
+            model_info = get_model_info(entry["model"])
+            display_name = model_info.get("description", entry["model"]) if model_info else entry["model"]
+            rows.append([
+                i,
+                display_name,
+                entry["elo"],
+                entry["wins"],
+                entry["losses"],
+                entry["ties"],
+                entry["total"],
+                entry["win_rate"],
+            ])
+        df = pd.DataFrame(
+            rows,
+            columns=["순위", "모델", "ELO", "승", "패", "무", "총", "승률"],
+        )
+        # 투표 요약
+        summary = vote_storage.get_vote_summary()
+        return (
+            df,
+            str(summary["total"]),
+            str(summary["a_wins"]),
+            str(summary["b_wins"]),
+            str(summary["ties"]),
+        )
+    # ============================================================
+    # 이벤트 바인딩
+    # ============================================================
+    refresh_btn.click(
+        fn=refresh_leaderboard,
+        outputs=[leaderboard_table, total_votes, a_wins_count, b_wins_count, ties_count],
+    )
+    # 초기 로드
+    leaderboard_tab = gr.Blocks()
+    # 페이지 로드 시 자동 새로고침을 위해 데모 시작 시 호출
+    return refresh_leaderboard

utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .thinking_parser import parse_thinking_response, format_thinking_for_display

utils/thinking_parser.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""<think> 태그 파싱 유틸리티"""
+import re
+from typing import Tuple, Optional
+def parse_thinking_response(response: str) -> Tuple[Optional[str], str]:
+    """
+    응답에서 <think> 태그 추출
+    Returns:
+        (thinking_content, clean_response)
+    """
+    if not response:
+        return None, ""
+    # <think>...</think> 패턴 매칭
+    pattern = r'<think>(.*?)</think>'
+    match = re.search(pattern, response, re.DOTALL)
+    if match:
+        thinking = match.group(1).strip()
+        # <think> 태그 제거한 클린 응답
+        clean = re.sub(pattern, '', response, flags=re.DOTALL).strip()
+        return thinking, clean
+    return None, response
+def format_thinking_for_display(thinking: str) -> str:
+    """Thinking 내용을 마크다운으로 포맷팅"""
+    if not thinking:
+        return "*No thinking process*"
+    # 6단계 구조 하이라이트 (있는 경우)
+    sections = [
+        "[상황분석]", "[관계단계]", "[캐릭터스타일]",
+        "[밀당결정]", "[금지패턴체크]", "[응답설계]"
+    ]
+    formatted = thinking
+    for section in sections:
+        formatted = formatted.replace(
+            section,
+            f"**{section}**"
+        )
+    return formatted
+def extract_response_only(full_response: str) -> str:
+    """Thinking 제거하고 응답만 추출"""
+    _, clean = parse_thinking_response(full_response)
+    return clean
+def has_thinking_tag(response: str) -> bool:
+    """응답에 <think> 태그가 있는지 확인"""
+    pattern = r'<think>.*?</think>'
+    return bool(re.search(pattern, response, re.DOTALL))
+def get_thinking_stats(response: str) -> dict:
+    """Thinking 관련 통계"""
+    thinking, clean = parse_thinking_response(response)
+    return {
+        "has_thinking": thinking is not None,
+        "thinking_length": len(thinking) if thinking else 0,
+        "response_length": len(clean),
+        "total_length": len(response),
+    }

voting/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .vote_storage import VoteStorage, get_vote_storage
2	+ from .elo_calculator import ELOCalculator, get_elo_calculator

voting/elo_calculator.py ADDED Viewed

	@@ -0,0 +1,136 @@

+"""ELO 레이팅 시스템"""
+import json
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional
+from threading import Lock
+class ELOCalculator:
+    """ELO 레이팅 계산"""
+    def __init__(
+        self,
+        k_factor: float = 32,
+        initial_rating: int = 1500,
+        ratings_path: str = "elo_ratings.json",
+    ):
+        self.k_factor = k_factor
+        self.initial_rating = initial_rating
+        self.ratings_path = Path(ratings_path)
+        self.lock = Lock()
+        self.ratings: Dict[str, float] = {}
+        self._load_ratings()
+    def _load_ratings(self):
+        """저장된 레이팅 로드"""
+        if self.ratings_path.exists():
+            try:
+                with open(self.ratings_path, "r", encoding="utf-8") as f:
+                    self.ratings = json.load(f)
+            except (json.JSONDecodeError, IOError):
+                self.ratings = {}
+    def _save_ratings(self):
+        """레이팅 저장"""
+        with self.lock:
+            with open(self.ratings_path, "w", encoding="utf-8") as f:
+                json.dump(self.ratings, f, ensure_ascii=False, indent=2)
+    def get_rating(self, model: str) -> float:
+        """모델의 현재 레이팅"""
+        return self.ratings.get(model, self.initial_rating)
+    def expected_score(self, rating_a: float, rating_b: float) -> float:
+        """예상 승률 계산"""
+        return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
+    def update_ratings(
+        self,
+        model_a: str,
+        model_b: str,
+        result: str,  # "a", "b", "tie"
+    ) -> Tuple[float, float]:
+        """레이팅 업데이트"""
+        rating_a = self.get_rating(model_a)
+        rating_b = self.get_rating(model_b)
+        expected_a = self.expected_score(rating_a, rating_b)
+        expected_b = self.expected_score(rating_b, rating_a)
+        # 실제 점수
+        if result == "a":
+            actual_a, actual_b = 1.0, 0.0
+        elif result == "b":
+            actual_a, actual_b = 0.0, 1.0
+        else:  # tie
+            actual_a, actual_b = 0.5, 0.5
+        # 새 레이팅 계산
+        new_rating_a = rating_a + self.k_factor * (actual_a - expected_a)
+        new_rating_b = rating_b + self.k_factor * (actual_b - expected_b)
+        with self.lock:
+            self.ratings[model_a] = new_rating_a
+            self.ratings[model_b] = new_rating_b
+        # 저장
+        self._save_ratings()
+        return new_rating_a, new_rating_b
+    def get_leaderboard(self) -> List[Tuple[str, float]]:
+        """레이팅 순 리더보드"""
+        return sorted(
+            [(model, rating) for model, rating in self.ratings.items()],
+            key=lambda x: x[1],
+            reverse=True,
+        )
+    def get_leaderboard_with_stats(
+        self,
+        vote_stats: Dict[str, Dict],
+    ) -> List[Dict]:
+        """통계 포함 리더보드"""
+        leaderboard = []
+        for model, rating in self.get_leaderboard():
+            stats = vote_stats.get(model, {})
+            leaderboard.append({
+                "model": model,
+                "elo": round(rating),
+                "wins": stats.get("wins", 0),
+                "losses": stats.get("losses", 0),
+                "ties": stats.get("ties", 0),
+                "total": stats.get("total", 0),
+                "win_rate": f"{stats.get('win_rate', 0) * 100:.1f}%",
+            })
+        return leaderboard
+    def get_all_ratings(self) -> Dict[str, float]:
+        """모든 레이팅"""
+        return self.ratings.copy()
+    def reset_ratings(self):
+        """레이팅 초기화"""
+        self.ratings = {}
+        self._save_ratings()
+# 싱글톤 인스턴스
+_elo_calculator: Optional[ELOCalculator] = None
+def get_elo_calculator(
+    k_factor: float = 32,
+    initial_rating: int = 1500,
+    ratings_path: str = "elo_ratings.json",
+) -> ELOCalculator:
+    """ELOCalculator 싱글톤 인스턴스"""
+    global _elo_calculator
+    if _elo_calculator is None:
+        _elo_calculator = ELOCalculator(
+            k_factor=k_factor,
+            initial_rating=initial_rating,
+            ratings_path=ratings_path,
+        )
+    return _elo_calculator

voting/vote_storage.py ADDED Viewed

	@@ -0,0 +1,139 @@

+"""투표 데이터 저장 및 관리"""
+import json
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional
+from threading import Lock
+import uuid
+class VoteStorage:
+    """투표 데이터 JSONL 저장소"""
+    def __init__(self, storage_path: str = "votes.jsonl"):
+        self.storage_path = Path(storage_path)
+        self.lock = Lock()
+        self._ensure_file_exists()
+    def _ensure_file_exists(self):
+        """파일이 없으면 생성"""
+        if not self.storage_path.exists():
+            self.storage_path.parent.mkdir(parents=True, exist_ok=True)
+            self.storage_path.touch()
+    def save_vote(self, vote_data: Dict) -> str:
+        """투표 저장"""
+        vote_id = str(uuid.uuid4())[:8]
+        record = {
+            "id": vote_id,
+            "timestamp": datetime.now().isoformat(),
+            **vote_data,
+        }
+        with self.lock:
+            with open(self.storage_path, "a", encoding="utf-8") as f:
+                f.write(json.dumps(record, ensure_ascii=False) + "\n")
+        return vote_id
+    def get_all_votes(self) -> List[Dict]:
+        """모든 투표 조회"""
+        votes = []
+        if not self.storage_path.exists():
+            return votes
+        with open(self.storage_path, "r", encoding="utf-8") as f:
+            for line in f:
+                if line.strip():
+                    try:
+                        votes.append(json.loads(line))
+                    except json.JSONDecodeError:
+                        continue
+        return votes
+    def get_recent_votes(self, limit: int = 50) -> List[Dict]:
+        """최근 투표 조회"""
+        votes = self.get_all_votes()
+        return votes[-limit:]
+    def get_model_stats(self) -> Dict[str, Dict]:
+        """모델별 통계"""
+        votes = self.get_all_votes()
+        stats = {}
+        for vote in votes:
+            model_a = vote.get("model_a")
+            model_b = vote.get("model_b")
+            result = vote.get("vote")
+            if not model_a or not model_b:
+                continue
+            for model in [model_a, model_b]:
+                if model not in stats:
+                    stats[model] = {"wins": 0, "losses": 0, "ties": 0, "total": 0}
+            if result == "a":
+                stats[model_a]["wins"] += 1
+                stats[model_b]["losses"] += 1
+            elif result == "b":
+                stats[model_b]["wins"] += 1
+                stats[model_a]["losses"] += 1
+            elif result == "tie":
+                stats[model_a]["ties"] += 1
+                stats[model_b]["ties"] += 1
+            if result != "skip":
+                stats[model_a]["total"] += 1
+                stats[model_b]["total"] += 1
+        # 승률 계산
+        for model, s in stats.items():
+            if s["total"] > 0:
+                s["win_rate"] = s["wins"] / s["total"]
+            else:
+                s["win_rate"] = 0.0
+        return stats
+    def get_total_votes(self) -> int:
+        """총 투표 수"""
+        return len(self.get_all_votes())
+    def get_vote_summary(self) -> Dict:
+        """투표 요약"""
+        votes = self.get_all_votes()
+        a_wins = sum(1 for v in votes if v.get("vote") == "a")
+        b_wins = sum(1 for v in votes if v.get("vote") == "b")
+        ties = sum(1 for v in votes if v.get("vote") == "tie")
+        skips = sum(1 for v in votes if v.get("vote") == "skip")
+        return {
+            "total": len(votes),
+            "a_wins": a_wins,
+            "b_wins": b_wins,
+            "ties": ties,
+            "skips": skips,
+        }
+    def export_to_json(self, output_path: str):
+        """JSON 파일로 내보내기"""
+        votes = self.get_all_votes()
+        with open(output_path, "w", encoding="utf-8") as f:
+            json.dump(votes, f, ensure_ascii=False, indent=2)
+# 싱글톤 인스턴스
+_vote_storage: Optional[VoteStorage] = None
+def get_vote_storage(storage_path: str = "votes.jsonl") -> VoteStorage:
+    """VoteStorage 싱글톤 인스턴스"""
+    global _vote_storage
+    if _vote_storage is None:
+        _vote_storage = VoteStorage(storage_path)
+    return _vote_storage