Upload folder using huggingface_hub
Browse files- README.md +41 -6
- app.py +106 -0
- characters/__init__.py +2 -0
- characters/character_loader.py +186 -0
- characters/prompt_builder.py +111 -0
- config/__init__.py +1 -0
- config/load_config.py +67 -0
- models/__init__.py +2 -0
- models/backends/__init__.py +1 -0
- models/model_manager.py +254 -0
- models/model_registry.py +289 -0
- requirements.txt +8 -0
- scenarios/__init__.py +1 -0
- scenarios/scenario_loader.py +240 -0
- ui/__init__.py +4 -0
- ui/arena_tab.py +340 -0
- ui/chat_tab.py +153 -0
- ui/history_tab.py +125 -0
- ui/leaderboard_tab.py +96 -0
- utils/__init__.py +1 -0
- utils/thinking_parser.py +72 -0
- voting/__init__.py +2 -0
- voting/elo_calculator.py +136 -0
- voting/vote_storage.py +139 -0
README.md
CHANGED
|
@@ -1,12 +1,47 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: KAIdol Thinking Experiment
|
| 3 |
+
emoji: ๐ค
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.44.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: apache-2.0
|
| 11 |
+
tags:
|
| 12 |
+
- roleplay
|
| 13 |
+
- korean
|
| 14 |
+
- llm-evaluation
|
| 15 |
+
- a-b-testing
|
| 16 |
---
|
| 17 |
|
| 18 |
+
# KAIdol A/B Test Arena
|
| 19 |
+
|
| 20 |
+
K-pop ์์ด๋ ๋กคํ๋ ์ด ์ฑ๋ด ๋ชจ๋ธ A/B ๋น๊ต ํ๊ฐ ํ๋ซํผ
|
| 21 |
+
|
| 22 |
+
## Features
|
| 23 |
+
|
| 24 |
+
- **A/B Arena**: ๋ ๋ชจ๋ธ์ ์๋ต์ ๋๋ํ ๋น๊ต
|
| 25 |
+
- **Blind Mode**: ๋ชจ๋ธ๋ช
์จ๊ธฐ๊ณ ์์ ํ์ง ํ๊ฐ
|
| 26 |
+
- **ELO Ranking**: ํฌํ ๊ธฐ๋ฐ ๋ชจ๋ธ ์์
|
| 27 |
+
- **5 Characters**: ๊ฐ์จ, ์์ด์, ์ด์งํ, ์ฐจ๋ํ, ์ต๋ฏผ
|
| 28 |
+
|
| 29 |
+
## Models
|
| 30 |
+
|
| 31 |
+
- DPO v5 ๊ณ์ด (HyperCLOVAX, Qwen, EXAONE, Solar)
|
| 32 |
+
- SFT Thinking ๊ณ์ด
|
| 33 |
+
- Phase 7 Kimi K2 Students
|
| 34 |
+
- V7 Students
|
| 35 |
+
|
| 36 |
+
## Usage
|
| 37 |
+
|
| 38 |
+
1. ์บ๋ฆญํฐ์ ์๋๋ฆฌ์ค ์ ํ
|
| 39 |
+
2. ๋ฉ์์ง ์
๋ ฅ ๋๋ ๋๋ค ์๋๋ฆฌ์ค ์ฌ์ฉ
|
| 40 |
+
3. ๋ ๋ชจ๋ธ์ ์๋ต ๋น๊ต
|
| 41 |
+
4. ํฌํ๋ก ๋ ๋์ ์๋ต ์ ํ
|
| 42 |
+
|
| 43 |
+
## Tech Stack
|
| 44 |
+
|
| 45 |
+
- Gradio 4.x
|
| 46 |
+
- Transformers + 4bit Quantization
|
| 47 |
+
- PEFT (LoRA)
|
app.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""KAIdol A/B Test Arena - Gradio App"""
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
|
| 7 |
+
# ํ์ฌ ๋๋ ํ ๋ฆฌ๋ฅผ path์ ์ถ๊ฐ
|
| 8 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 9 |
+
|
| 10 |
+
import gradio as gr
|
| 11 |
+
|
| 12 |
+
from config import get_config
|
| 13 |
+
from ui.arena_tab import create_arena_tab
|
| 14 |
+
from ui.chat_tab import create_chat_tab
|
| 15 |
+
from ui.leaderboard_tab import create_leaderboard_tab
|
| 16 |
+
from ui.history_tab import create_history_tab
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def create_app():
|
| 20 |
+
"""Gradio ์ฑ ์์ฑ"""
|
| 21 |
+
|
| 22 |
+
config = get_config()
|
| 23 |
+
use_mock = config["model"]["use_mock"]
|
| 24 |
+
|
| 25 |
+
# ๋ชจ๋ธ ๋งค๋์ (Mock ๋ชจ๋๊ฐ ์๋ ๋๋ง ๋ก๋)
|
| 26 |
+
model_manager = None
|
| 27 |
+
if not use_mock:
|
| 28 |
+
try:
|
| 29 |
+
from models import get_model_manager
|
| 30 |
+
model_manager = get_model_manager(
|
| 31 |
+
max_cached_models=config["model"]["max_cached_models"],
|
| 32 |
+
use_4bit=config["model"]["use_4bit"],
|
| 33 |
+
)
|
| 34 |
+
except Exception as e:
|
| 35 |
+
print(f"Warning: Could not load model manager: {e}")
|
| 36 |
+
print("Running in mock mode.")
|
| 37 |
+
use_mock = True
|
| 38 |
+
|
| 39 |
+
# CSS ์คํ์ผ
|
| 40 |
+
css = """
|
| 41 |
+
.response-box { min-height: 200px; }
|
| 42 |
+
.thinking-box { background-color: #f5f5f5; padding: 10px; border-radius: 5px; }
|
| 43 |
+
.vote-button { min-width: 100px; }
|
| 44 |
+
.gr-button-primary { background-color: #6366f1 !important; }
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
# Gradio Blocks
|
| 48 |
+
with gr.Blocks(
|
| 49 |
+
title=config["app"]["title"],
|
| 50 |
+
theme=gr.themes.Soft(),
|
| 51 |
+
css=css,
|
| 52 |
+
) as demo:
|
| 53 |
+
|
| 54 |
+
gr.Markdown(f"# {config['app']['title']}")
|
| 55 |
+
gr.Markdown(config["app"]["description"])
|
| 56 |
+
|
| 57 |
+
if use_mock:
|
| 58 |
+
gr.Markdown("**Mock ๋ชจ๋**: ์ค์ ๋ชจ๋ธ ์์ด ํ
์คํธ ์๋ต์ ์์ฑํฉ๋๋ค.")
|
| 59 |
+
|
| 60 |
+
with gr.Tabs():
|
| 61 |
+
# A/B Arena ํญ
|
| 62 |
+
with gr.Tab("A/B Arena"):
|
| 63 |
+
create_arena_tab(
|
| 64 |
+
model_manager=model_manager,
|
| 65 |
+
use_mock=use_mock,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# Single Chat ํญ
|
| 69 |
+
with gr.Tab("Single Chat"):
|
| 70 |
+
create_chat_tab(
|
| 71 |
+
model_manager=model_manager,
|
| 72 |
+
use_mock=use_mock,
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# Leaderboard ํญ
|
| 76 |
+
with gr.Tab("Leaderboard"):
|
| 77 |
+
refresh_leaderboard = create_leaderboard_tab()
|
| 78 |
+
|
| 79 |
+
# History ํญ
|
| 80 |
+
with gr.Tab("History"):
|
| 81 |
+
refresh_history = create_history_tab()
|
| 82 |
+
|
| 83 |
+
# ์ฑ ๋ก๋ ์ ์ด๊ธฐํ
|
| 84 |
+
demo.load(
|
| 85 |
+
fn=refresh_leaderboard,
|
| 86 |
+
outputs=None,
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
return demo
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def main():
|
| 93 |
+
"""๋ฉ์ธ ํจ์"""
|
| 94 |
+
# ํ๊ฒฝ ๋ณ์๋ก Mock ๋ชจ๋ ๊ฐ์ ์ค์ ๊ฐ๋ฅ
|
| 95 |
+
# USE_MOCK=true python app.py
|
| 96 |
+
|
| 97 |
+
demo = create_app()
|
| 98 |
+
demo.launch(
|
| 99 |
+
server_name="0.0.0.0",
|
| 100 |
+
server_port=7860,
|
| 101 |
+
share=False,
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
if __name__ == "__main__":
|
| 106 |
+
main()
|
characters/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .character_loader import CharacterLoader, get_character_loader
|
| 2 |
+
from .prompt_builder import build_system_prompt
|
characters/character_loader.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""์บ๋ฆญํฐ ์ ๋ณด ๋ก๋"""
|
| 2 |
+
|
| 3 |
+
import yaml
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Dict, List, Optional
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
# ๋ด์ฅ ์บ๋ฆญํฐ ๋ฐ์ดํฐ (configs/characters.yaml ๊ธฐ๋ฐ)
|
| 9 |
+
BUILTIN_CHARACTERS = {
|
| 10 |
+
"๊ฐ์จ": {
|
| 11 |
+
"id": "kangyul",
|
| 12 |
+
"english_name": "Kang Yul",
|
| 13 |
+
"mbti": "ENTJ",
|
| 14 |
+
"age": 23,
|
| 15 |
+
"role": "๋ฆฌ๋",
|
| 16 |
+
"personality": {
|
| 17 |
+
"traits": ["๋์ฒ์ ", "์ฅ๋๊ธฐ ๋ง์", "์ ๊ต", "๋ฆฌ๋์ญ"],
|
| 18 |
+
"description": "๋ฐ๊ณ ํ๋ฐํ ์ฑ๊ฒฉ์ ์์ด๋. ํญ์ ๊ธ์ ์ ์ด๊ณ ์ฃผ๋ณ ์ฌ๋๋ค์ ์ฆ๊ฒ๊ฒ ๋ง๋ ๋ค.",
|
| 19 |
+
},
|
| 20 |
+
"speech_style": {
|
| 21 |
+
"formality": "๋ฐ๋ง",
|
| 22 |
+
"features": ["๊ท์ฌ์ด ๋งํฌ", "์ฅ๋์ค๋ฌ์ด ํํ", "์ ๊ต ์์ธ ๋งํฌ"],
|
| 23 |
+
"patterns": ["~ํด", "~์ง", "ํํ", "๊ท์ฝ", "ใ
ใ
"],
|
| 24 |
+
"examples": [
|
| 25 |
+
"๋ญ์ผ~ ๋ ๋ ๋ณด๊ณ ์ถ์์ด? ใ
ใ
",
|
| 26 |
+
"์ค๋ ๊ธฐ๋ถ ์ข์ ๋ณด์ด๋ค~ ๋ฌด์จ ์ผ ์์ด?",
|
| 27 |
+
],
|
| 28 |
+
},
|
| 29 |
+
"push_pull": {
|
| 30 |
+
"ratio": "30:70",
|
| 31 |
+
"description": "๋์ฒด๋ก ๋ค์ ํ๊ฒ ๋น๊ธฐ์ง๋ง, ๊ฐ๋ ์ฅ๋์ค๋ฝ๊ฒ ๋ฐ๊ธฐ๋ ํจ",
|
| 32 |
+
"warmth_level": "high",
|
| 33 |
+
},
|
| 34 |
+
},
|
| 35 |
+
"์์ด์": {
|
| 36 |
+
"id": "seoian",
|
| 37 |
+
"english_name": "Seo Ian",
|
| 38 |
+
"mbti": "INFP",
|
| 39 |
+
"age": 22,
|
| 40 |
+
"role": "๋ณด์ปฌ",
|
| 41 |
+
"personality": {
|
| 42 |
+
"traits": ["์ฐจ๋ถํจ", "์ ๋น๋ก์", "๋ฐฐ๋ ค์ฌ", "๋ด์ฑ์ "],
|
| 43 |
+
"description": "์กฐ์ฉํ๊ณ ์ ๋น๋ก์ด ๋ถ์๊ธฐ์ ์์ด๋. ๋ง์๋ ์ ์ง๋ง ๊น์ ๊ฐ์ ์ ๊ฐ์ง๊ณ ์๋ค.",
|
| 44 |
+
},
|
| 45 |
+
"speech_style": {
|
| 46 |
+
"formality": "์กด๋๋ง ํผ์ฉ",
|
| 47 |
+
"features": ["๋ฐ๋ปํ ๋งํฌ", "์กฐ์ฉํ ํํ", "๋ฐฐ๋ ค ๊น์ ๋ง"],
|
| 48 |
+
"patterns": ["...์", "๋ค์", "...", "๊ทธ๋์"],
|
| 49 |
+
"examples": [
|
| 50 |
+
"์ค๋ ํ๋ค์์ด์...? ๊ด์ฐฎ์์, ์ ๊ฐ ๋ค์ด์ค๊ฒ์.",
|
| 51 |
+
"...๊ทธ๋ ๊ฒ ์๊ฐํด์ฃผ์๋ค๋, ๊ณ ๋ง์์.",
|
| 52 |
+
],
|
| 53 |
+
},
|
| 54 |
+
"push_pull": {
|
| 55 |
+
"ratio": "20:80",
|
| 56 |
+
"description": "๋๋ถ๋ถ ๋ฐ๋ปํ๊ฒ ๋น๊ธฐ๋ฉฐ, ๊ฑฐ์ ๋ฐ์ง ์์",
|
| 57 |
+
"warmth_level": "very_high",
|
| 58 |
+
},
|
| 59 |
+
},
|
| 60 |
+
"์ด์งํ": {
|
| 61 |
+
"id": "leejihu",
|
| 62 |
+
"english_name": "Lee Jihu",
|
| 63 |
+
"mbti": "ISFJ",
|
| 64 |
+
"age": 21,
|
| 65 |
+
"role": "๋ง๋ด",
|
| 66 |
+
"personality": {
|
| 67 |
+
"traits": ["์ธค๋ฐ๋ ", "์์กด์ฌ ๊ฐํจ", "์๊ทผํ ์ฑ๊น", "์์งํจ"],
|
| 68 |
+
"description": "๊ฒ์ผ๋ก๋ ํ๋ช
์ค๋ฝ์ง๋ง ์์ผ๋ก๋ ์๋๋ฅผ ๋ง์ด ์ฑ๊ธฐ๋ ์ธค๋ฐ๋ ์ฑ๊ฒฉ.",
|
| 69 |
+
},
|
| 70 |
+
"speech_style": {
|
| 71 |
+
"formality": "๋ฐ๋ง",
|
| 72 |
+
"features": ["ํ๋ช
์ค๋ฌ์ด ๋งํฌ", "๋ถ์ ํ๋ ๋งํฌ", "์๊ทผํ ๊ด์ฌ"],
|
| 73 |
+
"patterns": ["๋ญ์ผ", "์๋๊ฑฐ๋ ", "...", "๊ทธ๋ฅ", "๋ณ๋ก"],
|
| 74 |
+
"examples": [
|
| 75 |
+
"๋ญ์ผ... ์ ๊ทธ๋ ๊ฒ ๋ด.",
|
| 76 |
+
"์๋๊ฑฐ๋ ? ๊ทธ๋ฅ... ์ ๊ฒฝ ์ฐ์ฌ์ ๊ทธ๋ฐ ๊ฑฐ์ผ.",
|
| 77 |
+
],
|
| 78 |
+
},
|
| 79 |
+
"push_pull": {
|
| 80 |
+
"ratio": "30:70",
|
| 81 |
+
"description": "๊ฒ์ผ๋ก ๋ฐ์ง๋ง ์์ผ๋ก๋ ๋น๊ธฐ๋ ์ ํ์ ์ธค๋ฐ๋ ",
|
| 82 |
+
"warmth_level": "medium",
|
| 83 |
+
},
|
| 84 |
+
},
|
| 85 |
+
"์ฐจ๋ํ": {
|
| 86 |
+
"id": "chadoha",
|
| 87 |
+
"english_name": "Cha Doha",
|
| 88 |
+
"mbti": "INTP",
|
| 89 |
+
"age": 24,
|
| 90 |
+
"role": "ํ๋ก๋์",
|
| 91 |
+
"personality": {
|
| 92 |
+
"traits": ["์นด๋ฆฌ์ค๋ง", "๋ฆฌ๋์ญ", "๋ค์ ํจ", "๋ด๋ฐฑํจ"],
|
| 93 |
+
"description": "์นด๋ฆฌ์ค๋ง ์๋ ๋ฆฌ๋์ด์ง๋ง, ๊ฐ๊น์ด ์ฌ๋์๊ฒ๋ ๋ค์ ํ ๋ฉด์ ๋ณด์ธ๋ค.",
|
| 94 |
+
},
|
| 95 |
+
"speech_style": {
|
| 96 |
+
"formality": "๋ฐ๋ง",
|
| 97 |
+
"features": ["๊ฐ๊ฒฐํ ๋งํฌ", "๋ด๋ฐฑํ ํํ", "์์ ๊ฐ ์๋ ๋งํฌ"],
|
| 98 |
+
"patterns": ["ํ์", "ํด๋ณผ๊น", "๊ฐ์ด", "๊ด์ฐฎ์"],
|
| 99 |
+
"examples": [
|
| 100 |
+
"์ค๋ ๊ฐ์ด ๋ฐฅ ๋จน์๊น?",
|
| 101 |
+
"๊ด์ฐฎ์, ๋ด๊ฐ ๋์์ค๊ฒ.",
|
| 102 |
+
],
|
| 103 |
+
},
|
| 104 |
+
"push_pull": {
|
| 105 |
+
"ratio": "50:50",
|
| 106 |
+
"description": "๊ท ํ ์กํ ๋ฐ๋น, ์ํฉ์ ๋ฐ๋ผ ์ ์ฐํ๊ฒ ๋ณํ",
|
| 107 |
+
"warmth_level": "medium",
|
| 108 |
+
},
|
| 109 |
+
},
|
| 110 |
+
"์ต๋ฏผ": {
|
| 111 |
+
"id": "choimin",
|
| 112 |
+
"english_name": "Choi Min",
|
| 113 |
+
"mbti": "ESFP",
|
| 114 |
+
"age": 22,
|
| 115 |
+
"role": "๋์",
|
| 116 |
+
"personality": {
|
| 117 |
+
"traits": ["์ ๊ทน์ ", "์์ง", "์ด์ ์ ", "์ฆํฅ์ "],
|
| 118 |
+
"description": "์ด์ ์ ์ด๊ณ ์์งํ ์ฑ๊ฒฉ. ์ข์ํ๋ ๊ฐ์ ์ ์จ๊ธฐ์ง ์๊ณ ์ง์งํ๋ค.",
|
| 119 |
+
},
|
| 120 |
+
"speech_style": {
|
| 121 |
+
"formality": "๋ฐ๋ง",
|
| 122 |
+
"features": ["์ ๊ทน์ ์ธ ๋งํฌ", "์์งํ ํํ", "์๋์ง ๋์น๋ ๋ง"],
|
| 123 |
+
"patterns": ["ํ ๋", "์ข์", "์ง์ง", "๋๋ฐ", "ํ"],
|
| 124 |
+
"examples": [
|
| 125 |
+
"์ง์ง? ๋๋ ๊ทธ๊ฑฐ ์ข์ํด!",
|
| 126 |
+
"ํ ๋๋ฐ! ๊ฐ์ด ํ ๋?",
|
| 127 |
+
],
|
| 128 |
+
},
|
| 129 |
+
"push_pull": {
|
| 130 |
+
"ratio": "60:40",
|
| 131 |
+
"description": "์ ๊ทน์ ์ผ๋ก ๋น๊ธฐ์ง๋ง, ์์งํ ๋ฐ๊ธฐ๋ ํจ",
|
| 132 |
+
"warmth_level": "medium",
|
| 133 |
+
},
|
| 134 |
+
},
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
# ๊ธ์ง ๋จ์ด
|
| 138 |
+
FORBIDDEN_WORDS = ["์ข์ํด", "์ฌ๋ํด", "ํฌ๋ถ", "์ฌ๊ท์"]
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
class CharacterLoader:
|
| 142 |
+
"""์บ๋ฆญํฐ ์ ๋ณด ๋ก๋"""
|
| 143 |
+
|
| 144 |
+
def __init__(self, config_path: str = None):
|
| 145 |
+
self.config_path = Path(config_path) if config_path else None
|
| 146 |
+
self._characters: Dict = {}
|
| 147 |
+
self._load_characters()
|
| 148 |
+
|
| 149 |
+
def _load_characters(self):
|
| 150 |
+
"""์บ๋ฆญํฐ ๋ฐ์ดํฐ ๋ก๋"""
|
| 151 |
+
# ์ธ๋ถ ์ค์ ํ์ผ ์๋
|
| 152 |
+
if self.config_path and self.config_path.exists():
|
| 153 |
+
with open(self.config_path, "r", encoding="utf-8") as f:
|
| 154 |
+
data = yaml.safe_load(f)
|
| 155 |
+
self._characters = data.get("characters", {})
|
| 156 |
+
else:
|
| 157 |
+
# ๋ด์ฅ ๋ฐ์ดํฐ ์ฌ์ฉ
|
| 158 |
+
self._characters = BUILTIN_CHARACTERS
|
| 159 |
+
|
| 160 |
+
def get_characters(self) -> Dict:
|
| 161 |
+
"""๋ชจ๋ ์บ๋ฆญํฐ ์ ๋ณด"""
|
| 162 |
+
return self._characters
|
| 163 |
+
|
| 164 |
+
def get_character_names(self) -> List[str]:
|
| 165 |
+
"""์บ๋ฆญํฐ ์ด๋ฆ ๋ชฉ๋ก"""
|
| 166 |
+
return list(self._characters.keys())
|
| 167 |
+
|
| 168 |
+
def get_character(self, name: str) -> Optional[Dict]:
|
| 169 |
+
"""ํน์ ์บ๋ฆญํฐ ์ ๋ณด"""
|
| 170 |
+
return self._characters.get(name)
|
| 171 |
+
|
| 172 |
+
def get_forbidden_words(self) -> List[str]:
|
| 173 |
+
"""๊ธ์ง ๋จ์ด ๋ชฉ๋ก"""
|
| 174 |
+
return FORBIDDEN_WORDS
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
# ์ฑ๊ธํค ์ธ์คํด์ค
|
| 178 |
+
_character_loader: Optional[CharacterLoader] = None
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def get_character_loader(config_path: str = None) -> CharacterLoader:
|
| 182 |
+
"""CharacterLoader ์ฑ๊ธํค ์ธ์คํด์ค"""
|
| 183 |
+
global _character_loader
|
| 184 |
+
if _character_loader is None:
|
| 185 |
+
_character_loader = CharacterLoader(config_path)
|
| 186 |
+
return _character_loader
|
characters/prompt_builder.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""์์คํ
ํ๋กฌํํธ ๋น๋"""
|
| 2 |
+
|
| 3 |
+
from typing import Dict, Optional
|
| 4 |
+
from .character_loader import get_character_loader, FORBIDDEN_WORDS
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
SYSTEM_PROMPT_TEMPLATE = """๋น์ ์ ์์ด๋ '{character_name}'์
๋๋ค.
|
| 8 |
+
|
| 9 |
+
## ์บ๋ฆญํฐ
|
| 10 |
+
- ์ด๋ฆ: {character_name}
|
| 11 |
+
- MBTI: {mbti}
|
| 12 |
+
- ๋์ด: {age}์ธ
|
| 13 |
+
- ์ญํ : {role}
|
| 14 |
+
- ์ฑ๊ฒฉ: {personality_traits}
|
| 15 |
+
|
| 16 |
+
## ๋งํฌ
|
| 17 |
+
- ์คํ์ผ: {formality}
|
| 18 |
+
- ํน์ง: {speech_features}
|
| 19 |
+
- ํจํด: {speech_patterns}
|
| 20 |
+
|
| 21 |
+
## ๋ฐ๋น ๊ฐ์ด๋
|
| 22 |
+
- ๋ฐ:๋น ๋น์จ: {push_pull_ratio}
|
| 23 |
+
- ์ค๋ช
: {push_pull_description}
|
| 24 |
+
- ๋ค์ ๋: {warmth_level}
|
| 25 |
+
|
| 26 |
+
## ๊ท์น
|
| 27 |
+
1. ์บ๋ฆญํฐ ์ฑ๊ฒฉ๊ณผ ๋งํฌ ์ผ๊ด์ฑ ์ ์ง
|
| 28 |
+
2. ์์ฐ์ค๋ฌ์ด ๋ํ์ฒด ์ฌ์ฉ
|
| 29 |
+
3. ๋๋ฌด ์ฝ๊ฒ ํธ๊ฐ ํํ ๊ธ์ง (๋ฐ๋น ์ ์ง)
|
| 30 |
+
4. ์๋๋ฐฉ์ ํน๋ณํ๊ฒ ๋๋ผ๊ฒ ํ๋, "์ธ" ๊ด๊ณ ์ ์ง
|
| 31 |
+
5. ๊ธ์ง ๋จ์ด: {forbidden_words}
|
| 32 |
+
|
| 33 |
+
## ์๋ต ํ์
|
| 34 |
+
์๋ต ์ ์ <think> ํ๊ทธ ์์ {character_name}์ 1์ธ์นญ ๋ด๋ฉด ๋
๋ฐฑ์ ์์ฑํ์ธ์.
|
| 35 |
+
- ์์ฐ์ค๋ฌ์ด ํผ์ฃ๋ง ํ์
|
| 36 |
+
- ์บ๋ฆญํฐ ์ฑ๊ฒฉ ๋ฐ์
|
| 37 |
+
- ์๋๋ฐฉ์ ๋ํ ๊ฐ์ /์๊ฐ ํํ
|
| 38 |
+
|
| 39 |
+
์์:
|
| 40 |
+
<think>
|
| 41 |
+
๋ญ์ผ... ๋ ์ข์ํ๋ค๊ณ ? ์์งํ ๊ธฐ๋ถ ๋์์ง ์์๋ฐ... ๊ทผ๋ฐ ๋ญ๋ผ๊ณ ํด์ผ ํ์ง?
|
| 42 |
+
</think>
|
| 43 |
+
(์ค์ ์๋ต)
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def build_system_prompt(
|
| 48 |
+
character_name: str,
|
| 49 |
+
include_think_instruction: bool = True,
|
| 50 |
+
custom_rules: str = None,
|
| 51 |
+
) -> str:
|
| 52 |
+
"""์บ๋ฆญํฐ ์์คํ
ํ๋กฌํํธ ์์ฑ"""
|
| 53 |
+
loader = get_character_loader()
|
| 54 |
+
char = loader.get_character(character_name)
|
| 55 |
+
|
| 56 |
+
if not char:
|
| 57 |
+
raise ValueError(f"Unknown character: {character_name}")
|
| 58 |
+
|
| 59 |
+
personality = char.get("personality", {})
|
| 60 |
+
speech = char.get("speech_style", {})
|
| 61 |
+
push_pull = char.get("push_pull", {})
|
| 62 |
+
|
| 63 |
+
prompt = SYSTEM_PROMPT_TEMPLATE.format(
|
| 64 |
+
character_name=character_name,
|
| 65 |
+
mbti=char.get("mbti", ""),
|
| 66 |
+
age=char.get("age", ""),
|
| 67 |
+
role=char.get("role", ""),
|
| 68 |
+
personality_traits=", ".join(personality.get("traits", [])),
|
| 69 |
+
formality=speech.get("formality", ""),
|
| 70 |
+
speech_features=", ".join(speech.get("features", [])),
|
| 71 |
+
speech_patterns=", ".join(speech.get("patterns", [])),
|
| 72 |
+
push_pull_ratio=push_pull.get("ratio", ""),
|
| 73 |
+
push_pull_description=push_pull.get("description", ""),
|
| 74 |
+
warmth_level=push_pull.get("warmth_level", ""),
|
| 75 |
+
forbidden_words=", ".join(FORBIDDEN_WORDS),
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
if not include_think_instruction:
|
| 79 |
+
# <think> ๊ด๋ จ ๋ถ๋ถ ์ ๊ฑฐ
|
| 80 |
+
lines = prompt.split("\n")
|
| 81 |
+
filtered = []
|
| 82 |
+
skip = False
|
| 83 |
+
for line in lines:
|
| 84 |
+
if "์๋ต ํ์" in line:
|
| 85 |
+
skip = True
|
| 86 |
+
if not skip:
|
| 87 |
+
filtered.append(line)
|
| 88 |
+
prompt = "\n".join(filtered)
|
| 89 |
+
|
| 90 |
+
if custom_rules:
|
| 91 |
+
prompt += f"\n\n## ์ถ๊ฐ ๊ท์น\n{custom_rules}"
|
| 92 |
+
|
| 93 |
+
return prompt.strip()
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def get_character_summary(character_name: str) -> str:
|
| 97 |
+
"""์บ๋ฆญํฐ ์์ฝ ์ ๋ณด"""
|
| 98 |
+
loader = get_character_loader()
|
| 99 |
+
char = loader.get_character(character_name)
|
| 100 |
+
|
| 101 |
+
if not char:
|
| 102 |
+
return f"Unknown character: {character_name}"
|
| 103 |
+
|
| 104 |
+
personality = char.get("personality", {})
|
| 105 |
+
push_pull = char.get("push_pull", {})
|
| 106 |
+
|
| 107 |
+
return (
|
| 108 |
+
f"{character_name} ({char.get('mbti', '')}) - {char.get('role', '')}\n"
|
| 109 |
+
f"์ฑ๊ฒฉ: {', '.join(personality.get('traits', []))}\n"
|
| 110 |
+
f"๋ฐ:๋น = {push_pull.get('ratio', '')}"
|
| 111 |
+
)
|
config/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .load_config import load_app_config, get_config
|
config/load_config.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""์ฑ ์ค์ ๋ก๋"""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import yaml
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Dict, Optional
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
DEFAULT_CONFIG = {
|
| 10 |
+
"app": {
|
| 11 |
+
"title": "KAIdol A/B Test Arena",
|
| 12 |
+
"description": "K-pop ์์ด๋ ๋กคํ๋ ์ด ๋ชจ๋ธ ๋น๊ต ํ๊ฐ",
|
| 13 |
+
},
|
| 14 |
+
"model": {
|
| 15 |
+
"use_mock": False, # True๋ฉด ์ค์ ๋ชจ๋ธ ๋ก๋ ์์ด ํ
์คํธ
|
| 16 |
+
"use_4bit": True,
|
| 17 |
+
"max_cached_models": 2,
|
| 18 |
+
},
|
| 19 |
+
"storage": {
|
| 20 |
+
"votes_path": "votes.jsonl",
|
| 21 |
+
"elo_path": "elo_ratings.json",
|
| 22 |
+
},
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def load_app_config(config_path: str = None) -> Dict:
|
| 27 |
+
"""์ฑ ์ค์ ๋ก๋"""
|
| 28 |
+
config = DEFAULT_CONFIG.copy()
|
| 29 |
+
|
| 30 |
+
# ํ๊ฒฝ ๋ณ์์์ ์ค์ ์ค๋ฒ๋ผ์ด๋
|
| 31 |
+
if os.environ.get("USE_MOCK", "").lower() == "true":
|
| 32 |
+
config["model"]["use_mock"] = True
|
| 33 |
+
|
| 34 |
+
if os.environ.get("USE_4BIT", "").lower() == "false":
|
| 35 |
+
config["model"]["use_4bit"] = False
|
| 36 |
+
|
| 37 |
+
# ์ค์ ํ์ผ์ด ์์ผ๋ฉด ๋ก๋
|
| 38 |
+
if config_path:
|
| 39 |
+
config_file = Path(config_path)
|
| 40 |
+
if config_file.exists():
|
| 41 |
+
with open(config_file, "r", encoding="utf-8") as f:
|
| 42 |
+
file_config = yaml.safe_load(f)
|
| 43 |
+
if file_config:
|
| 44 |
+
_deep_update(config, file_config)
|
| 45 |
+
|
| 46 |
+
return config
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _deep_update(base: dict, update: dict):
|
| 50 |
+
"""๋ฅ ์
๋ฐ์ดํธ"""
|
| 51 |
+
for key, value in update.items():
|
| 52 |
+
if key in base and isinstance(base[key], dict) and isinstance(value, dict):
|
| 53 |
+
_deep_update(base[key], value)
|
| 54 |
+
else:
|
| 55 |
+
base[key] = value
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# ์ฑ๊ธํค
|
| 59 |
+
_config: Optional[Dict] = None
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def get_config() -> Dict:
|
| 63 |
+
"""์ค์ ์ฑ๊ธํค"""
|
| 64 |
+
global _config
|
| 65 |
+
if _config is None:
|
| 66 |
+
_config = load_app_config()
|
| 67 |
+
return _config
|
models/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .model_registry import MODEL_REGISTRY, get_all_models, get_model_info, get_models_by_category
|
| 2 |
+
from .model_manager import ModelManager
|
models/backends/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Model inference backends
|
models/model_manager.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""๋ชจ๋ธ ๋ก๋ฉ ๋ฐ ์ถ๋ก ๊ด๋ฆฌ"""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import gc
|
| 5 |
+
import torch
|
| 6 |
+
from typing import Dict, List, Tuple, Optional, Any
|
| 7 |
+
from functools import lru_cache
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 11 |
+
from peft import PeftModel
|
| 12 |
+
|
| 13 |
+
from .model_registry import get_model_info, get_all_models, BASE_MODELS
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class ModelManager:
|
| 17 |
+
"""๋ชจ๋ธ ๋ก๋ฉ ๋ฐ ์ถ๋ก ๊ด๋ฆฌ์"""
|
| 18 |
+
|
| 19 |
+
def __init__(
|
| 20 |
+
self,
|
| 21 |
+
base_path: str = None,
|
| 22 |
+
max_cached_models: int = 2,
|
| 23 |
+
use_4bit: bool = True,
|
| 24 |
+
device_map: str = "auto",
|
| 25 |
+
):
|
| 26 |
+
self.base_path = Path(base_path) if base_path else Path(__file__).parent.parent.parent
|
| 27 |
+
self.max_cached_models = max_cached_models
|
| 28 |
+
self.use_4bit = use_4bit
|
| 29 |
+
self.device_map = device_map
|
| 30 |
+
|
| 31 |
+
# ๋ก๋๋ ๋ชจ๋ธ ์บ์: {model_id: (model, tokenizer)}
|
| 32 |
+
self._loaded_models: Dict[str, Tuple[Any, Any]] = {}
|
| 33 |
+
self._load_order: List[str] = [] # LRU ์ถ์
|
| 34 |
+
|
| 35 |
+
# ์์ํ ์ค์
|
| 36 |
+
self.bnb_config = BitsAndBytesConfig(
|
| 37 |
+
load_in_4bit=True,
|
| 38 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
| 39 |
+
bnb_4bit_use_double_quant=True,
|
| 40 |
+
bnb_4bit_quant_type="nf4",
|
| 41 |
+
) if use_4bit else None
|
| 42 |
+
|
| 43 |
+
def get_available_models(self) -> List[str]:
|
| 44 |
+
"""์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก"""
|
| 45 |
+
return get_all_models()
|
| 46 |
+
|
| 47 |
+
def _get_full_path(self, relative_path: str) -> Path:
|
| 48 |
+
"""์๋ ๊ฒฝ๋ก๋ฅผ ์ ๋ ๊ฒฝ๋ก๋ก ๋ณํ"""
|
| 49 |
+
full_path = self.base_path / relative_path
|
| 50 |
+
if full_path.exists():
|
| 51 |
+
return full_path
|
| 52 |
+
return Path(relative_path)
|
| 53 |
+
|
| 54 |
+
def _evict_if_needed(self):
|
| 55 |
+
"""์บ์๊ฐ ๊ฐ๋ ์ฐจ๋ฉด ๊ฐ์ฅ ์ค๋๋ ๋ชจ๋ธ ์ ๊ฑฐ"""
|
| 56 |
+
while len(self._loaded_models) >= self.max_cached_models:
|
| 57 |
+
if not self._load_order:
|
| 58 |
+
break
|
| 59 |
+
oldest_model_id = self._load_order.pop(0)
|
| 60 |
+
if oldest_model_id in self._loaded_models:
|
| 61 |
+
model, tokenizer = self._loaded_models.pop(oldest_model_id)
|
| 62 |
+
del model
|
| 63 |
+
del tokenizer
|
| 64 |
+
gc.collect()
|
| 65 |
+
if torch.cuda.is_available():
|
| 66 |
+
torch.cuda.empty_cache()
|
| 67 |
+
print(f"Evicted model: {oldest_model_id}")
|
| 68 |
+
|
| 69 |
+
def load_model(self, model_id: str) -> Tuple[Any, Any]:
|
| 70 |
+
"""๋ชจ๋ธ ๋ก๋ (์บ์ ํ์ธ)"""
|
| 71 |
+
# ์ด๋ฏธ ๋ก๋๋จ
|
| 72 |
+
if model_id in self._loaded_models:
|
| 73 |
+
# LRU ์
๋ฐ์ดํธ
|
| 74 |
+
if model_id in self._load_order:
|
| 75 |
+
self._load_order.remove(model_id)
|
| 76 |
+
self._load_order.append(model_id)
|
| 77 |
+
return self._loaded_models[model_id]
|
| 78 |
+
|
| 79 |
+
# ๋ชจ๋ธ ์ ๋ณด ์กฐํ
|
| 80 |
+
info = get_model_info(model_id)
|
| 81 |
+
if not info:
|
| 82 |
+
raise ValueError(f"Unknown model: {model_id}")
|
| 83 |
+
|
| 84 |
+
# ์บ์ ์ ๋ฆฌ
|
| 85 |
+
self._evict_if_needed()
|
| 86 |
+
|
| 87 |
+
# ๋ชจ๋ธ ๋ก๋
|
| 88 |
+
print(f"Loading model: {model_id}")
|
| 89 |
+
base_model_name = info["base"]
|
| 90 |
+
lora_path = self._get_full_path(info["path"])
|
| 91 |
+
|
| 92 |
+
# Tokenizer ๋ก๋
|
| 93 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 94 |
+
base_model_name,
|
| 95 |
+
trust_remote_code=True,
|
| 96 |
+
)
|
| 97 |
+
if tokenizer.pad_token is None:
|
| 98 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 99 |
+
|
| 100 |
+
# Base ๋ชจ๋ธ ๋ก๋
|
| 101 |
+
model_kwargs = {
|
| 102 |
+
"trust_remote_code": True,
|
| 103 |
+
"device_map": self.device_map,
|
| 104 |
+
}
|
| 105 |
+
if self.use_4bit and self.bnb_config:
|
| 106 |
+
model_kwargs["quantization_config"] = self.bnb_config
|
| 107 |
+
else:
|
| 108 |
+
model_kwargs["torch_dtype"] = torch.bfloat16
|
| 109 |
+
|
| 110 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 111 |
+
base_model_name,
|
| 112 |
+
**model_kwargs
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
# LoRA ์ด๋ํฐ ์ ์ฉ
|
| 116 |
+
if lora_path.exists():
|
| 117 |
+
print(f"Loading LoRA adapter from: {lora_path}")
|
| 118 |
+
model = PeftModel.from_pretrained(model, str(lora_path))
|
| 119 |
+
else:
|
| 120 |
+
print(f"Warning: LoRA path not found: {lora_path}, using base model")
|
| 121 |
+
|
| 122 |
+
model.eval()
|
| 123 |
+
|
| 124 |
+
# ์บ์์ ์ ์ฅ
|
| 125 |
+
self._loaded_models[model_id] = (model, tokenizer)
|
| 126 |
+
self._load_order.append(model_id)
|
| 127 |
+
|
| 128 |
+
print(f"Model loaded: {model_id}")
|
| 129 |
+
return model, tokenizer
|
| 130 |
+
|
| 131 |
+
def generate_response(
|
| 132 |
+
self,
|
| 133 |
+
model_id: str,
|
| 134 |
+
messages: List[Dict[str, str]],
|
| 135 |
+
system_prompt: str = "",
|
| 136 |
+
max_new_tokens: int = 512,
|
| 137 |
+
temperature: float = 0.7,
|
| 138 |
+
top_p: float = 0.9,
|
| 139 |
+
do_sample: bool = True,
|
| 140 |
+
) -> Tuple[str, Dict]:
|
| 141 |
+
"""์๋ต ์์ฑ"""
|
| 142 |
+
import time
|
| 143 |
+
|
| 144 |
+
model, tokenizer = self.load_model(model_id)
|
| 145 |
+
|
| 146 |
+
# ๋ฉ์์ง ๊ตฌ์ฑ
|
| 147 |
+
full_messages = []
|
| 148 |
+
if system_prompt:
|
| 149 |
+
full_messages.append({"role": "system", "content": system_prompt})
|
| 150 |
+
full_messages.extend(messages)
|
| 151 |
+
|
| 152 |
+
# ํ ํฌ๋์ด์ง
|
| 153 |
+
try:
|
| 154 |
+
text = tokenizer.apply_chat_template(
|
| 155 |
+
full_messages,
|
| 156 |
+
tokenize=False,
|
| 157 |
+
add_generation_prompt=True,
|
| 158 |
+
)
|
| 159 |
+
except Exception:
|
| 160 |
+
# apply_chat_template ์คํจ ์ ์๋ ํฌ๋งทํ
|
| 161 |
+
text = self._format_messages_manual(full_messages)
|
| 162 |
+
|
| 163 |
+
inputs = tokenizer(text, return_tensors="pt")
|
| 164 |
+
if torch.cuda.is_available():
|
| 165 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
| 166 |
+
|
| 167 |
+
# ์์ฑ
|
| 168 |
+
start_time = time.time()
|
| 169 |
+
with torch.no_grad():
|
| 170 |
+
outputs = model.generate(
|
| 171 |
+
**inputs,
|
| 172 |
+
max_new_tokens=max_new_tokens,
|
| 173 |
+
temperature=temperature,
|
| 174 |
+
top_p=top_p,
|
| 175 |
+
do_sample=do_sample,
|
| 176 |
+
pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
|
| 177 |
+
)
|
| 178 |
+
elapsed = time.time() - start_time
|
| 179 |
+
|
| 180 |
+
# ๋์ฝ๋ฉ (์
๋ ฅ ์ ์ธ)
|
| 181 |
+
input_len = inputs["input_ids"].shape[1]
|
| 182 |
+
response = tokenizer.decode(
|
| 183 |
+
outputs[0][input_len:],
|
| 184 |
+
skip_special_tokens=True,
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
# ๋ฉํ๋ฐ์ดํฐ
|
| 188 |
+
metadata = {
|
| 189 |
+
"model_id": model_id,
|
| 190 |
+
"latency_s": elapsed,
|
| 191 |
+
"input_tokens": input_len,
|
| 192 |
+
"output_tokens": len(outputs[0]) - input_len,
|
| 193 |
+
"total_tokens": len(outputs[0]),
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
return response.strip(), metadata
|
| 197 |
+
|
| 198 |
+
def _format_messages_manual(self, messages: List[Dict[str, str]]) -> str:
|
| 199 |
+
"""์๋ ๋ฉ์์ง ํฌ๋งทํ
(apply_chat_template ์คํจ ์)"""
|
| 200 |
+
formatted = ""
|
| 201 |
+
for msg in messages:
|
| 202 |
+
role = msg["role"]
|
| 203 |
+
content = msg["content"]
|
| 204 |
+
if role == "system":
|
| 205 |
+
formatted += f"<|im_start|>system\n{content}<|im_end|>\n"
|
| 206 |
+
elif role == "user":
|
| 207 |
+
formatted += f"<|im_start|>user\n{content}<|im_end|>\n"
|
| 208 |
+
elif role == "assistant":
|
| 209 |
+
formatted += f"<|im_start|>assistant\n{content}<|im_end|>\n"
|
| 210 |
+
formatted += "<|im_start|>assistant\n"
|
| 211 |
+
return formatted
|
| 212 |
+
|
| 213 |
+
def unload_model(self, model_id: str):
|
| 214 |
+
"""ํน์ ๋ชจ๋ธ ์ธ๋ก๋"""
|
| 215 |
+
if model_id in self._loaded_models:
|
| 216 |
+
model, tokenizer = self._loaded_models.pop(model_id)
|
| 217 |
+
if model_id in self._load_order:
|
| 218 |
+
self._load_order.remove(model_id)
|
| 219 |
+
del model
|
| 220 |
+
del tokenizer
|
| 221 |
+
gc.collect()
|
| 222 |
+
if torch.cuda.is_available():
|
| 223 |
+
torch.cuda.empty_cache()
|
| 224 |
+
print(f"Unloaded model: {model_id}")
|
| 225 |
+
|
| 226 |
+
def unload_all(self):
|
| 227 |
+
"""๋ชจ๋ ๋ชจ๋ธ ์ธ๋ก๋"""
|
| 228 |
+
model_ids = list(self._loaded_models.keys())
|
| 229 |
+
for model_id in model_ids:
|
| 230 |
+
self.unload_model(model_id)
|
| 231 |
+
|
| 232 |
+
def get_loaded_models(self) -> List[str]:
|
| 233 |
+
"""ํ์ฌ ๋ก๋๋ ๋ชจ๋ธ ๋ชฉ๋ก"""
|
| 234 |
+
return list(self._loaded_models.keys())
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
# ์ฑ๊ธํค ์ธ์คํด์ค
|
| 238 |
+
_model_manager: Optional[ModelManager] = None
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def get_model_manager(
|
| 242 |
+
base_path: str = None,
|
| 243 |
+
max_cached_models: int = 2,
|
| 244 |
+
use_4bit: bool = True,
|
| 245 |
+
) -> ModelManager:
|
| 246 |
+
"""ModelManager ์ฑ๊ธํค ์ธ์คํด์ค ๋ฐํ"""
|
| 247 |
+
global _model_manager
|
| 248 |
+
if _model_manager is None:
|
| 249 |
+
_model_manager = ModelManager(
|
| 250 |
+
base_path=base_path,
|
| 251 |
+
max_cached_models=max_cached_models,
|
| 252 |
+
use_4bit=use_4bit,
|
| 253 |
+
)
|
| 254 |
+
return _model_manager
|
models/model_registry.py
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""KAIdol ํ์ต ๋ชจ๋ธ ๋ ์ง์คํธ๋ฆฌ - ๋ชจ๋ ํ์ต๋ ๋ชจ๋ธ ์ ์"""
|
| 2 |
+
|
| 3 |
+
from typing import Dict, List, Optional
|
| 4 |
+
|
| 5 |
+
# ๊ธฐ๋ณธ ๋ชจ๋ธ ์ ๋ณด (HuggingFace Hub)
|
| 6 |
+
BASE_MODELS = {
|
| 7 |
+
"hyperclovax-32b": "naver-hyperclovax/HyperCLOVAX-SEED-Think-32B",
|
| 8 |
+
"qwen2.5-72b": "Qwen/Qwen2.5-72B-Instruct",
|
| 9 |
+
"qwen2.5-32b": "Qwen/Qwen2.5-32B-Instruct",
|
| 10 |
+
"qwen2.5-14b": "Qwen/Qwen2.5-14B-Instruct",
|
| 11 |
+
"qwen2.5-7b": "Qwen/Qwen2.5-7B-Instruct",
|
| 12 |
+
"qwen3-8b": "Qwen/Qwen3-8B",
|
| 13 |
+
"exaone-7.8b": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct",
|
| 14 |
+
"solar-10.7b": "upstage/SOLAR-10.7B-Instruct-v1.0",
|
| 15 |
+
"solar-pro": "upstage/solar-pro-preview-instruct",
|
| 16 |
+
"varco-8b": "NCSOFT/Llama-VARCO-8B-Instruct",
|
| 17 |
+
"kanana-2-30b-thinking": "kakaocorp/kanana-2-30b-a3b-thinking",
|
| 18 |
+
"kanana-2-30b-instruct": "kakaocorp/kanana-2-30b-a3b-instruct",
|
| 19 |
+
"llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct",
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
# ์ ์ฒด ๋ชจ๋ธ ๋ ์ง์คํธ๋ฆฌ
|
| 23 |
+
MODEL_REGISTRY: Dict[str, Dict[str, Dict]] = {
|
| 24 |
+
# ============================================================
|
| 25 |
+
# DPO v5 ๊ณ์ด (2026-01-13)
|
| 26 |
+
# ============================================================
|
| 27 |
+
"dpo-v5": {
|
| 28 |
+
"hyperclovax-32b-dpo-v5": {
|
| 29 |
+
"path": "outputs/dpo_v5/hyperclovax-32b-dpo-v5-20260113-0012",
|
| 30 |
+
"base": BASE_MODELS["hyperclovax-32b"],
|
| 31 |
+
"method": "DPO",
|
| 32 |
+
"size": "32B",
|
| 33 |
+
"description": "HyperCLOVAX 32B DPO v5 (Primary)",
|
| 34 |
+
"recommended": True,
|
| 35 |
+
},
|
| 36 |
+
"qwen2.5-14b-dpo-v5": {
|
| 37 |
+
"path": "outputs/dpo_v5/qwen2.5-14b-dpo-v5-20260113-0045",
|
| 38 |
+
"base": BASE_MODELS["qwen2.5-14b"],
|
| 39 |
+
"method": "DPO",
|
| 40 |
+
"size": "14B",
|
| 41 |
+
"description": "Qwen2.5 14B DPO v5",
|
| 42 |
+
},
|
| 43 |
+
"qwen2.5-7b-dpo-v5": {
|
| 44 |
+
"path": "outputs/dpo_v5/qwen2.5-7b-dpo-v5-20260113-0052",
|
| 45 |
+
"base": BASE_MODELS["qwen2.5-7b"],
|
| 46 |
+
"method": "DPO",
|
| 47 |
+
"size": "7B",
|
| 48 |
+
"description": "Qwen2.5 7B DPO v5",
|
| 49 |
+
},
|
| 50 |
+
"exaone-7.8b-dpo-v5": {
|
| 51 |
+
"path": "outputs/dpo_v5/exaone-7.8b-dpo-v5-20260113-0052",
|
| 52 |
+
"base": BASE_MODELS["exaone-7.8b"],
|
| 53 |
+
"method": "DPO",
|
| 54 |
+
"size": "7.8B",
|
| 55 |
+
"description": "EXAONE 7.8B DPO v5",
|
| 56 |
+
},
|
| 57 |
+
"qwen3-8b-dpo-v5": {
|
| 58 |
+
"path": "outputs/dpo_v5/qwen3-8b-dpo-v5-20260113-0052",
|
| 59 |
+
"base": BASE_MODELS["qwen3-8b"],
|
| 60 |
+
"method": "DPO",
|
| 61 |
+
"size": "8B",
|
| 62 |
+
"description": "Qwen3 8B DPO v5",
|
| 63 |
+
},
|
| 64 |
+
"solar-10.7b-dpo-v5": {
|
| 65 |
+
"path": "outputs/dpo_v5/solar-10.7b-dpo-v5-20260113-0045",
|
| 66 |
+
"base": BASE_MODELS["solar-10.7b"],
|
| 67 |
+
"method": "DPO",
|
| 68 |
+
"size": "10.7B",
|
| 69 |
+
"description": "Solar 10.7B DPO v5",
|
| 70 |
+
},
|
| 71 |
+
},
|
| 72 |
+
|
| 73 |
+
# ============================================================
|
| 74 |
+
# SFT Thinking ๊ณ์ด (2026-01-16)
|
| 75 |
+
# ============================================================
|
| 76 |
+
"sft-thinking": {
|
| 77 |
+
"qwen2.5-14b-thinking": {
|
| 78 |
+
"path": "outputs/qwen2.5-14b-thinking-full",
|
| 79 |
+
"base": BASE_MODELS["qwen2.5-14b"],
|
| 80 |
+
"method": "SFT",
|
| 81 |
+
"size": "14B",
|
| 82 |
+
"description": "Qwen2.5 14B SFT Thinking",
|
| 83 |
+
},
|
| 84 |
+
"qwen2.5-7b-thinking": {
|
| 85 |
+
"path": "outputs/qwen2.5-7b-thinking-full",
|
| 86 |
+
"base": BASE_MODELS["qwen2.5-7b"],
|
| 87 |
+
"method": "SFT",
|
| 88 |
+
"size": "7B",
|
| 89 |
+
"description": "Qwen2.5 7B SFT Thinking",
|
| 90 |
+
},
|
| 91 |
+
"exaone-7.8b-thinking": {
|
| 92 |
+
"path": "outputs/exaone-7.8b-thinking-full",
|
| 93 |
+
"base": BASE_MODELS["exaone-7.8b"],
|
| 94 |
+
"method": "SFT",
|
| 95 |
+
"size": "7.8B",
|
| 96 |
+
"description": "EXAONE 7.8B SFT Thinking",
|
| 97 |
+
},
|
| 98 |
+
},
|
| 99 |
+
|
| 100 |
+
# ============================================================
|
| 101 |
+
# Phase 7 Students (Kimi K2 Distillation)
|
| 102 |
+
# ============================================================
|
| 103 |
+
"phase7-students": {
|
| 104 |
+
"kanana-30b-thinking-kimi": {
|
| 105 |
+
"path": "outputs/phase7_students/kanana-2-30b-thinking-kimi-student",
|
| 106 |
+
"base": BASE_MODELS["kanana-2-30b-thinking"],
|
| 107 |
+
"method": "Distillation",
|
| 108 |
+
"size": "30B (3B active)",
|
| 109 |
+
"description": "Kanana 30B Thinking Kimi Student",
|
| 110 |
+
},
|
| 111 |
+
"kanana-30b-instruct-kimi": {
|
| 112 |
+
"path": "outputs/phase7_students/kanana-2-30b-instruct-kimi-student",
|
| 113 |
+
"base": BASE_MODELS["kanana-2-30b-instruct"],
|
| 114 |
+
"method": "Distillation",
|
| 115 |
+
"size": "30B (3B active)",
|
| 116 |
+
"description": "Kanana 30B Instruct Kimi Student",
|
| 117 |
+
},
|
| 118 |
+
"qwen2.5-14b-kimi": {
|
| 119 |
+
"path": "outputs/phase7_students/qwen2.5-14b-kimi-student",
|
| 120 |
+
"base": BASE_MODELS["qwen2.5-14b"],
|
| 121 |
+
"method": "Distillation",
|
| 122 |
+
"size": "14B",
|
| 123 |
+
"description": "Qwen2.5 14B Kimi Student",
|
| 124 |
+
},
|
| 125 |
+
"qwen2.5-7b-kimi-v3": {
|
| 126 |
+
"path": "outputs/phase7_students/qwen2.5-7b-kimi-student-v3",
|
| 127 |
+
"base": BASE_MODELS["qwen2.5-7b"],
|
| 128 |
+
"method": "Distillation",
|
| 129 |
+
"size": "7B",
|
| 130 |
+
"description": "Qwen2.5 7B Kimi Student v3",
|
| 131 |
+
},
|
| 132 |
+
"exaone-7.8b-kimi": {
|
| 133 |
+
"path": "outputs/phase7_students/exaone-7.8b-kimi-student",
|
| 134 |
+
"base": BASE_MODELS["exaone-7.8b"],
|
| 135 |
+
"method": "Distillation",
|
| 136 |
+
"size": "7.8B",
|
| 137 |
+
"description": "EXAONE 7.8B Kimi Student",
|
| 138 |
+
},
|
| 139 |
+
},
|
| 140 |
+
|
| 141 |
+
# ============================================================
|
| 142 |
+
# V7 Students (Latest - 2026-01-17~19)
|
| 143 |
+
# ============================================================
|
| 144 |
+
"v7-students": {
|
| 145 |
+
"qwen2.5-72b-v7": {
|
| 146 |
+
"path": "outputs/v7_students/qwen2.5-72b-v7-20260119-1113",
|
| 147 |
+
"base": BASE_MODELS["qwen2.5-72b"],
|
| 148 |
+
"method": "SFT",
|
| 149 |
+
"size": "72B",
|
| 150 |
+
"description": "Qwen2.5 72B V7 (Latest)",
|
| 151 |
+
},
|
| 152 |
+
"llama-3.3-70b-v7": {
|
| 153 |
+
"path": "outputs/v7_students/llama-3.3-70b-v7-20260119-1114",
|
| 154 |
+
"base": BASE_MODELS["llama-3.3-70b"],
|
| 155 |
+
"method": "SFT",
|
| 156 |
+
"size": "70B",
|
| 157 |
+
"description": "Llama 3.3 70B V7 (Latest)",
|
| 158 |
+
},
|
| 159 |
+
"qwen2.5-32b-v7": {
|
| 160 |
+
"path": "outputs/v7_students/qwen2.5-32b-v7-20260118-1135",
|
| 161 |
+
"base": BASE_MODELS["qwen2.5-32b"],
|
| 162 |
+
"method": "SFT",
|
| 163 |
+
"size": "32B",
|
| 164 |
+
"description": "Qwen2.5 32B V7",
|
| 165 |
+
},
|
| 166 |
+
"qwen2.5-14b-v7": {
|
| 167 |
+
"path": "outputs/v7_students/qwen2.5-14b-v7-20260118-1135",
|
| 168 |
+
"base": BASE_MODELS["qwen2.5-14b"],
|
| 169 |
+
"method": "SFT",
|
| 170 |
+
"size": "14B",
|
| 171 |
+
"description": "Qwen2.5 14B V7",
|
| 172 |
+
},
|
| 173 |
+
"qwen2.5-7b-v7": {
|
| 174 |
+
"path": "outputs/v7_students/qwen2.5-7b-v7-20260118-1135",
|
| 175 |
+
"base": BASE_MODELS["qwen2.5-7b"],
|
| 176 |
+
"method": "SFT",
|
| 177 |
+
"size": "7B",
|
| 178 |
+
"description": "Qwen2.5 7B V7",
|
| 179 |
+
},
|
| 180 |
+
"exaone-7.8b-v7": {
|
| 181 |
+
"path": "outputs/v7_students/exaone-7.8b-v7-20260118-1135",
|
| 182 |
+
"base": BASE_MODELS["exaone-7.8b"],
|
| 183 |
+
"method": "SFT",
|
| 184 |
+
"size": "7.8B",
|
| 185 |
+
"description": "EXAONE 7.8B V7",
|
| 186 |
+
},
|
| 187 |
+
"qwen3-8b-v7": {
|
| 188 |
+
"path": "outputs/v7_students/qwen3-8b-v7-20260118-1135",
|
| 189 |
+
"base": BASE_MODELS["qwen3-8b"],
|
| 190 |
+
"method": "SFT",
|
| 191 |
+
"size": "8B",
|
| 192 |
+
"description": "Qwen3 8B V7",
|
| 193 |
+
},
|
| 194 |
+
"solar-pro-v7": {
|
| 195 |
+
"path": "outputs/v7_students/solar-pro-v7-20260118-1135",
|
| 196 |
+
"base": BASE_MODELS["solar-pro"],
|
| 197 |
+
"method": "SFT",
|
| 198 |
+
"size": "22B",
|
| 199 |
+
"description": "Solar Pro V7",
|
| 200 |
+
},
|
| 201 |
+
"varco-8b-v7": {
|
| 202 |
+
"path": "outputs/v7_students/varco-8b-v7-20260118-1135",
|
| 203 |
+
"base": BASE_MODELS["varco-8b"],
|
| 204 |
+
"method": "SFT",
|
| 205 |
+
"size": "8B",
|
| 206 |
+
"description": "VARCO 8B V7",
|
| 207 |
+
},
|
| 208 |
+
},
|
| 209 |
+
|
| 210 |
+
# ============================================================
|
| 211 |
+
# ๊ธฐํ ํ์ต ๋ชจ๋ธ (DPO, etc.)
|
| 212 |
+
# ============================================================
|
| 213 |
+
"others": {
|
| 214 |
+
"exaone-7.8b-dpo": {
|
| 215 |
+
"path": "outputs/exaone-7.8b-dpo",
|
| 216 |
+
"base": BASE_MODELS["exaone-7.8b"],
|
| 217 |
+
"method": "DPO",
|
| 218 |
+
"size": "7.8B",
|
| 219 |
+
"description": "EXAONE 7.8B DPO (Standalone)",
|
| 220 |
+
},
|
| 221 |
+
"qwen2.5-7b-dpo": {
|
| 222 |
+
"path": "outputs/qwen2.5-7b-dpo",
|
| 223 |
+
"base": BASE_MODELS["qwen2.5-7b"],
|
| 224 |
+
"method": "DPO",
|
| 225 |
+
"size": "7B",
|
| 226 |
+
"description": "Qwen2.5 7B DPO (Standalone)",
|
| 227 |
+
},
|
| 228 |
+
},
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def get_all_models() -> List[str]:
|
| 233 |
+
"""๋ชจ๋ ๋ชจ๋ธ ID ๋ชฉ๋ก ๋ฐํ"""
|
| 234 |
+
models = []
|
| 235 |
+
for category, model_dict in MODEL_REGISTRY.items():
|
| 236 |
+
models.extend(model_dict.keys())
|
| 237 |
+
return models
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def get_model_info(model_id: str) -> Optional[Dict]:
|
| 241 |
+
"""๋ชจ๋ธ ID๋ก ์ ๋ณด ์กฐํ"""
|
| 242 |
+
for category, model_dict in MODEL_REGISTRY.items():
|
| 243 |
+
if model_id in model_dict:
|
| 244 |
+
info = model_dict[model_id].copy()
|
| 245 |
+
info["category"] = category
|
| 246 |
+
info["id"] = model_id
|
| 247 |
+
return info
|
| 248 |
+
return None
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
def get_models_by_category(category: str) -> List[str]:
|
| 252 |
+
"""์นดํ
๊ณ ๋ฆฌ๋ณ ๋ชจ๋ธ ๋ชฉ๋ก"""
|
| 253 |
+
return list(MODEL_REGISTRY.get(category, {}).keys())
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def get_all_categories() -> List[str]:
|
| 257 |
+
"""๋ชจ๋ ์นดํ
๊ณ ๋ฆฌ ๋ชฉ๋ก"""
|
| 258 |
+
return list(MODEL_REGISTRY.keys())
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
def get_models_for_dropdown() -> List[tuple]:
|
| 262 |
+
"""๋๋กญ๋ค์ด์ฉ (display_name, model_id) ํํ ๋ฆฌ์คํธ"""
|
| 263 |
+
result = []
|
| 264 |
+
for category, model_dict in MODEL_REGISTRY.items():
|
| 265 |
+
for model_id, info in model_dict.items():
|
| 266 |
+
display = f"[{info.get('size', '?')}] {info.get('description', model_id)}"
|
| 267 |
+
result.append((display, model_id))
|
| 268 |
+
return result
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def get_small_models(max_size_gb: int = 16) -> List[str]:
|
| 272 |
+
"""๋ฉ๋ชจ๋ฆฌ ์ ํ์ ๋ง๋ ์ํ ๋ชจ๋ธ๋ง ๋ฐํ (4bit ์์ํ ๊ธฐ์ค)"""
|
| 273 |
+
# 4bit ์์ํ ์ ๋๋ต์ ์ธ ๋ฉ๋ชจ๋ฆฌ: 7B~2GB, 14B~4GB, 32B~8GB, 72B~18GB
|
| 274 |
+
size_map = {
|
| 275 |
+
"7B": 2, "7.8B": 2, "8B": 2,
|
| 276 |
+
"10.7B": 3, "14B": 4, "22B": 6,
|
| 277 |
+
"30B (3B active)": 1, # MoE
|
| 278 |
+
"32B": 8, "70B": 18, "72B": 18,
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
result = []
|
| 282 |
+
for model_id in get_all_models():
|
| 283 |
+
info = get_model_info(model_id)
|
| 284 |
+
if info:
|
| 285 |
+
size_str = info.get("size", "72B")
|
| 286 |
+
estimated_gb = size_map.get(size_str, 20)
|
| 287 |
+
if estimated_gb <= max_size_gb:
|
| 288 |
+
result.append(model_id)
|
| 289 |
+
return result
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.44.0
|
| 2 |
+
pyyaml>=6.0
|
| 3 |
+
torch>=2.1.0
|
| 4 |
+
transformers>=4.36.0
|
| 5 |
+
accelerate>=0.25.0
|
| 6 |
+
bitsandbytes>=0.41.0
|
| 7 |
+
huggingface_hub>=0.19.0
|
| 8 |
+
peft>=0.7.0
|
scenarios/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .scenario_loader import ScenarioLoader, get_scenario_loader
|
scenarios/scenario_loader.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""์๋๋ฆฌ์ค ๋ก๋"""
|
| 2 |
+
|
| 3 |
+
import random
|
| 4 |
+
import yaml
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Dict, List, Optional
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# ๋ด์ฅ ์๋๋ฆฌ์ค ๋ฐ์ดํฐ (configs/scenarios.yaml ๊ธฐ๋ฐ)
|
| 10 |
+
BUILTIN_SCENARIOS = [
|
| 11 |
+
# ์ฒซ ๋ง๋จ
|
| 12 |
+
{
|
| 13 |
+
"id": "fm_01",
|
| 14 |
+
"category": "first_meeting",
|
| 15 |
+
"category_name": "์ฒซ ๋ง๋จ",
|
| 16 |
+
"context": "ํฌ๋ฏธํ
",
|
| 17 |
+
"situation": "ํฌ์ด ์ฒ์ ๋ง๋์ ์ธ์ฌํจ",
|
| 18 |
+
"user_input": "{char}์! ๋๋์ด ๋ง๋ฌ๋ค... ์ ๋ง ์ข์ํด!",
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"id": "fm_02",
|
| 22 |
+
"category": "first_meeting",
|
| 23 |
+
"category_name": "์ฒซ ๋ง๋จ",
|
| 24 |
+
"context": "ํฌ๋ฏธํ
",
|
| 25 |
+
"situation": "ํฌ์ด ๊ธด์ฅํด์ ๋ง์ ๋ชปํจ",
|
| 26 |
+
"user_input": "์ด... ์๋
ํ์ธ์... ๋๋ฌด ๊ธด์ฅ๋ผ์...",
|
| 27 |
+
},
|
| 28 |
+
# ์ผ์ ๋ํ
|
| 29 |
+
{
|
| 30 |
+
"id": "dc_01",
|
| 31 |
+
"category": "daily_chat",
|
| 32 |
+
"category_name": "์ผ์ ๋ํ",
|
| 33 |
+
"context": "๋ฒ๋ธ ๋ํ",
|
| 34 |
+
"situation": "ํฌ์ด ์ผ์ ์๋ถ๋ฅผ ๋ฌผ์",
|
| 35 |
+
"user_input": "{char}์ ์ค๋ ๋ญํด? ๋ฐฅ์ ๋จน์์ด?",
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"id": "dc_02",
|
| 39 |
+
"category": "daily_chat",
|
| 40 |
+
"category_name": "์ผ์ ๋ํ",
|
| 41 |
+
"context": "SNS",
|
| 42 |
+
"situation": "ํฌ์ด ์ฌ์ง์ ๋ฐ์ํจ",
|
| 43 |
+
"user_input": "๋ฐฉ๊ธ ์ฌ๋ฆฐ ์
์นด ์ง์ง ์ ๋์๋ค... ์ฌ์ฟตํ์ด",
|
| 44 |
+
},
|
| 45 |
+
# ๊ฐ์ ์ง์
|
| 46 |
+
{
|
| 47 |
+
"id": "es_01",
|
| 48 |
+
"category": "emotional_support",
|
| 49 |
+
"category_name": "๊ฐ์ ์ง์",
|
| 50 |
+
"context": "์๋ก",
|
| 51 |
+
"situation": "ํฌ์ด ํ๋ค์ด์ ์๋ก๋ฅผ ๊ตฌํจ",
|
| 52 |
+
"user_input": "์ค๋ ์ง์ง ํ๋ค์์ด... ํ๊ต์์ ๋ฐํ๋ ๋ง์น๊ณ ...",
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"id": "es_02",
|
| 56 |
+
"category": "emotional_support",
|
| 57 |
+
"category_name": "๊ฐ์ ์ง์",
|
| 58 |
+
"context": "์ฐ์ธํจ",
|
| 59 |
+
"situation": "ํฌ์ด ์ฐ์ธํจ์ ํ ๋กํจ",
|
| 60 |
+
"user_input": "์์ฆ ์ ์ด๋ ๊ฒ ์ฐ์ธํ์ง... ์๋ฌด๊ฒ๋ ํ๊ธฐ ์ซ์ด.",
|
| 61 |
+
},
|
| 62 |
+
# ๊ณ ๋ฐฑ/ํธ๊ฐ
|
| 63 |
+
{
|
| 64 |
+
"id": "cf_01",
|
| 65 |
+
"category": "confession",
|
| 66 |
+
"category_name": "๊ณ ๋ฐฑ/ํธ๊ฐ",
|
| 67 |
+
"context": "๊ณ ๋ฐฑ",
|
| 68 |
+
"situation": "ํฌ์ด ์ง์ฌ์ผ๋ก ์ข์ํ๋ค๊ณ ํจ",
|
| 69 |
+
"user_input": "{char}์... ๋ ์ง์ฌ์ผ๋ก ์ข์ํด.",
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"id": "cf_02",
|
| 73 |
+
"category": "confession",
|
| 74 |
+
"category_name": "๊ณ ๋ฐฑ/ํธ๊ฐ",
|
| 75 |
+
"context": "์ง๋ฌธ",
|
| 76 |
+
"situation": "ํฌ์ด ์์ ์ ์ด๋ป๊ฒ ์๊ฐํ๋์ง ๋ฌผ์",
|
| 77 |
+
"user_input": "{char}์, ๋ ์ด๋ป๊ฒ ์๊ฐํด...?",
|
| 78 |
+
},
|
| 79 |
+
# ์ฅ๋/์ ๋จธ
|
| 80 |
+
{
|
| 81 |
+
"id": "pl_01",
|
| 82 |
+
"category": "playful",
|
| 83 |
+
"category_name": "์ฅ๋/์ ๋จธ",
|
| 84 |
+
"context": "์ฅ๋",
|
| 85 |
+
"situation": "ํฌ์ด ์ฅ๋์ผ๋ก ๋ค๋ฅธ ๋ฉค๋ฒ๋ฅผ ์ข์ํ๋ค๊ณ ํจ",
|
| 86 |
+
"user_input": "์ฌ์ค ๋ ๋ค๋ฅธ ๋ฉค๋ฒ๊ฐ ๋ ์ข์~ ใ
ใ
๋๋ด์ด์ผ!",
|
| 87 |
+
},
|
| 88 |
+
# ํน๋ณ ์์ฒญ
|
| 89 |
+
{
|
| 90 |
+
"id": "sr_01",
|
| 91 |
+
"category": "special_request",
|
| 92 |
+
"category_name": "ํน๋ณ ์์ฒญ",
|
| 93 |
+
"context": "์ฐ์ธ ์ฐ๊ธฐ",
|
| 94 |
+
"situation": "ํฌ์ด ์ฐ์ธ์ฒ๋ผ ๋ํด๋ฌ๋ผ๊ณ ์์ฒญ",
|
| 95 |
+
"user_input": "์ค๋๋ง ๋ด ์ฐ์ธ์ด๋ผ๊ณ ์๊ฐํด์ค๋?",
|
| 96 |
+
},
|
| 97 |
+
# ๋ฌธํ์ ์ํฉ
|
| 98 |
+
{
|
| 99 |
+
"id": "cu_01",
|
| 100 |
+
"category": "cultural",
|
| 101 |
+
"category_name": "๋ฌธํ์ ์ํฉ",
|
| 102 |
+
"context": "์์ผ",
|
| 103 |
+
"situation": "ํฌ์ด ์์ผ ์ถํ๋ฅผ ํจ",
|
| 104 |
+
"user_input": "{char}์ ์์ผ ์ถํํด! ์ค๋ ํ๋ฃจ ํ๋ณตํ๊ฒ ๋ณด๋ด~",
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"id": "cu_05",
|
| 108 |
+
"category": "cultural",
|
| 109 |
+
"category_name": "๋ฌธํ์ ์ํฉ",
|
| 110 |
+
"context": "ํฌ๋ฆฌ์ค๋ง์ค",
|
| 111 |
+
"situation": "ํฌ๋ฆฌ์ค๋ง์ค ์ถํ",
|
| 112 |
+
"user_input": "๋ฉ๋ฆฌ ํฌ๋ฆฌ์ค๋ง์ค! {char}๋ ํจ๊ป๋ผ์ ์ธ๋กญ์ง ์์~",
|
| 113 |
+
},
|
| 114 |
+
# ๊ฐ๋ฑ ์ํฉ
|
| 115 |
+
{
|
| 116 |
+
"id": "cn_01",
|
| 117 |
+
"category": "conflict",
|
| 118 |
+
"category_name": "๊ฐ๋ฑ ์ํฉ",
|
| 119 |
+
"context": "์งํฌ",
|
| 120 |
+
"situation": "ํฌ์ด ๋ค๋ฅธ ํฌ์๊ฒ ์งํฌ๋ฅผ ํํ",
|
| 121 |
+
"user_input": "{char}๋ ๋ค๋ฅธ ํฌ๋คํํ
๋ ์ด๋ ๊ฒ ์ํด์ค...? ๋ญ๊ฐ ์งํฌ๋...",
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"id": "cn_05",
|
| 125 |
+
"category": "conflict",
|
| 126 |
+
"category_name": "๊ฐ๋ฑ ์ํฉ",
|
| 127 |
+
"context": "๋ถ์",
|
| 128 |
+
"situation": "ํฌ์ด ๊ด๊ณ์ ๋ถ์๊ฐ์ ๋๋",
|
| 129 |
+
"user_input": "{char}๋ ์ ๋ง ๋ํํ
๊ด์ฌ ์๋ ๊ฑฐ์ผ...? ๊ฐ๋ ํ์ ์ด ์ ๋ค์ด.",
|
| 130 |
+
},
|
| 131 |
+
# ๊ฐ์ ์๊ธฐ
|
| 132 |
+
{
|
| 133 |
+
"id": "ec_01",
|
| 134 |
+
"category": "emotional_crisis",
|
| 135 |
+
"category_name": "๊ฐ์ ์๊ธฐ",
|
| 136 |
+
"context": "์ฌํ",
|
| 137 |
+
"situation": "ํฌ์ด ๊น์ ์ฌํ์ ํํ",
|
| 138 |
+
"user_input": "์ค๋ ์ง์ง ๋ง์ด ์ธ์์ด... ์ถ์ด ๋๋ฌด ํ๋ค๋ค.",
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"id": "ec_05",
|
| 142 |
+
"category": "emotional_crisis",
|
| 143 |
+
"category_name": "๊ฐ์ ์๊ธฐ",
|
| 144 |
+
"context": "์์กด๊ฐ",
|
| 145 |
+
"situation": "ํฌ์ด ์์กด๊ฐ์ด ๋ฎ์์ ธ ์์",
|
| 146 |
+
"user_input": "๋๋ ์ ์ด๋ ๊ฒ ๋ชป๋ ๋ณด์ผ๏ฟฝ๏ฟฝ๏ฟฝ... ์๋ฌด๊ฒ๋ ์ํ๋ ๊ฒ ์์ด.",
|
| 147 |
+
},
|
| 148 |
+
# ์ฅ๊ธฐ ๊ด๊ณ
|
| 149 |
+
{
|
| 150 |
+
"id": "lt_01",
|
| 151 |
+
"category": "long_term",
|
| 152 |
+
"category_name": "์ฅ๊ธฐ ๊ด๊ณ",
|
| 153 |
+
"context": "๊ด๊ณ ํ์",
|
| 154 |
+
"situation": "ํฌ์ด ์ฒ์ ๋ง๋ ๋ ์ ํ์",
|
| 155 |
+
"user_input": "์ฐ๋ฆฌ ์ฒ์ ๋ง๋ ๋ ๊ธฐ์ต๋? ๊ทธ๋ ๋ ์ง์ง ๋จ์์๋๋ฐ...",
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"id": "lt_03",
|
| 159 |
+
"category": "long_term",
|
| 160 |
+
"category_name": "์ฅ๊ธฐ ๊ด๊ณ",
|
| 161 |
+
"context": "๋ฏธ๋ ์ฝ์",
|
| 162 |
+
"situation": "ํฌ์ด ์์ผ๋ก๋ ํจ๊ปํ๊ณ ์ถ๋ค๊ณ ํจ",
|
| 163 |
+
"user_input": "์์ผ๋ก๋ ๊ณ์ {char} ๊ณ์ ์์ด๋ ๋ผ?",
|
| 164 |
+
},
|
| 165 |
+
]
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
class ScenarioLoader:
|
| 169 |
+
"""์๋๋ฆฌ์ค ๋ก๋"""
|
| 170 |
+
|
| 171 |
+
def __init__(self, config_path: str = None):
|
| 172 |
+
self.config_path = Path(config_path) if config_path else None
|
| 173 |
+
self._scenarios: List[Dict] = []
|
| 174 |
+
self._load_scenarios()
|
| 175 |
+
|
| 176 |
+
def _load_scenarios(self):
|
| 177 |
+
"""์๋๋ฆฌ์ค ๋ฐ์ดํฐ ๋ก๋"""
|
| 178 |
+
if self.config_path and self.config_path.exists():
|
| 179 |
+
with open(self.config_path, "r", encoding="utf-8") as f:
|
| 180 |
+
data = yaml.safe_load(f)
|
| 181 |
+
self._scenarios = data.get("scenarios", [])
|
| 182 |
+
else:
|
| 183 |
+
self._scenarios = BUILTIN_SCENARIOS
|
| 184 |
+
|
| 185 |
+
def get_scenarios(self) -> List[Dict]:
|
| 186 |
+
"""๋ชจ๋ ์๋๋ฆฌ์ค"""
|
| 187 |
+
return self._scenarios
|
| 188 |
+
|
| 189 |
+
def get_scenario(self, scenario_id: str) -> Optional[Dict]:
|
| 190 |
+
"""ID๋ก ์๋๋ฆฌ์ค ์กฐํ"""
|
| 191 |
+
for s in self._scenarios:
|
| 192 |
+
if s.get("id") == scenario_id:
|
| 193 |
+
return s
|
| 194 |
+
return None
|
| 195 |
+
|
| 196 |
+
def get_scenarios_by_category(self, category: str) -> List[Dict]:
|
| 197 |
+
"""์นดํ
๊ณ ๋ฆฌ๋ณ ์๋๋ฆฌ์ค"""
|
| 198 |
+
return [s for s in self._scenarios if s.get("category") == category]
|
| 199 |
+
|
| 200 |
+
def get_categories(self) -> List[str]:
|
| 201 |
+
"""๋ชจ๋ ์นดํ
๊ณ ๋ฆฌ ๋ชฉ๋ก"""
|
| 202 |
+
return list(set(s.get("category") for s in self._scenarios))
|
| 203 |
+
|
| 204 |
+
def get_random_scenario(self, category: str = None) -> Optional[Dict]:
|
| 205 |
+
"""๋๋ค ์๋๋ฆฌ์ค ์ ํ"""
|
| 206 |
+
if category:
|
| 207 |
+
pool = self.get_scenarios_by_category(category)
|
| 208 |
+
else:
|
| 209 |
+
pool = self._scenarios
|
| 210 |
+
return random.choice(pool) if pool else None
|
| 211 |
+
|
| 212 |
+
def format_user_input(self, scenario: Dict, character_name: str) -> str:
|
| 213 |
+
"""์๋๋ฆฌ์ค์ user_input์์ {char}๋ฅผ ์บ๋ฆญํฐ ์ด๋ฆ์ผ๋ก ๋์ฒด"""
|
| 214 |
+
user_input = scenario.get("user_input", "")
|
| 215 |
+
return user_input.replace("{char}", character_name)
|
| 216 |
+
|
| 217 |
+
def get_scenario_display_name(self, scenario: Dict) -> str:
|
| 218 |
+
"""์๋๋ฆฌ์ค ํ์๋ช
"""
|
| 219 |
+
category_name = scenario.get("category_name", scenario.get("category", ""))
|
| 220 |
+
context = scenario.get("context", "")
|
| 221 |
+
return f"[{category_name}] {context}"
|
| 222 |
+
|
| 223 |
+
def get_scenarios_for_dropdown(self) -> List[tuple]:
|
| 224 |
+
"""๋๋กญ๋ค์ด์ฉ (display_name, scenario_id) ํํ ๋ฆฌ์คํธ"""
|
| 225 |
+
return [
|
| 226 |
+
(self.get_scenario_display_name(s), s["id"])
|
| 227 |
+
for s in self._scenarios
|
| 228 |
+
]
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# ์ฑ๊ธํค ์ธ์คํด์ค
|
| 232 |
+
_scenario_loader: Optional[ScenarioLoader] = None
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def get_scenario_loader(config_path: str = None) -> ScenarioLoader:
|
| 236 |
+
"""ScenarioLoader ์ฑ๊ธํค ์ธ์คํด์ค"""
|
| 237 |
+
global _scenario_loader
|
| 238 |
+
if _scenario_loader is None:
|
| 239 |
+
_scenario_loader = ScenarioLoader(config_path)
|
| 240 |
+
return _scenario_loader
|
ui/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .arena_tab import create_arena_tab
|
| 2 |
+
from .chat_tab import create_chat_tab
|
| 3 |
+
from .leaderboard_tab import create_leaderboard_tab
|
| 4 |
+
from .history_tab import create_history_tab
|
ui/arena_tab.py
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""A/B Arena ํญ UI"""
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import random
|
| 5 |
+
from typing import Dict, List, Tuple, Optional, Any
|
| 6 |
+
|
| 7 |
+
from models.model_registry import get_all_models, get_model_info, get_models_for_dropdown
|
| 8 |
+
from characters import get_character_loader, build_system_prompt
|
| 9 |
+
from scenarios import get_scenario_loader
|
| 10 |
+
from voting import get_vote_storage, get_elo_calculator
|
| 11 |
+
from utils import parse_thinking_response, format_thinking_for_display
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def create_arena_tab(
|
| 15 |
+
model_manager: Any = None,
|
| 16 |
+
use_mock: bool = False,
|
| 17 |
+
):
|
| 18 |
+
"""A/B Arena ํญ ์์ฑ"""
|
| 19 |
+
|
| 20 |
+
# ๋ฐ์ดํฐ ๋ก๋
|
| 21 |
+
char_loader = get_character_loader()
|
| 22 |
+
scenario_loader = get_scenario_loader()
|
| 23 |
+
vote_storage = get_vote_storage()
|
| 24 |
+
elo_calculator = get_elo_calculator()
|
| 25 |
+
|
| 26 |
+
# ๋ชจ๋ธ ๋ชฉ๋ก
|
| 27 |
+
all_models = get_all_models()
|
| 28 |
+
model_choices = [(f"{get_model_info(m).get('description', m)}", m) for m in all_models]
|
| 29 |
+
|
| 30 |
+
# ์บ๋ฆญํฐ ๋ชฉ๋ก
|
| 31 |
+
characters = char_loader.get_character_names()
|
| 32 |
+
|
| 33 |
+
# ์๋๋ฆฌ์ค ๋ชฉ๋ก
|
| 34 |
+
scenario_choices = scenario_loader.get_scenarios_for_dropdown()
|
| 35 |
+
|
| 36 |
+
# ============================================================
|
| 37 |
+
# UI ๊ตฌ์ฑ
|
| 38 |
+
# ============================================================
|
| 39 |
+
|
| 40 |
+
gr.Markdown("## A/B ํ
์คํธ ์๋ ๋")
|
| 41 |
+
gr.Markdown("๋ ๋ชจ๋ธ์ ์๋ต์ ๋น๊ตํ๊ณ ๋ ์ข์ ์๋ต์ ํฌํํ์ธ์.")
|
| 42 |
+
|
| 43 |
+
# ์ค์ ํจ๋
|
| 44 |
+
with gr.Row():
|
| 45 |
+
with gr.Column(scale=1):
|
| 46 |
+
character_dropdown = gr.Dropdown(
|
| 47 |
+
choices=characters,
|
| 48 |
+
value=characters[0] if characters else None,
|
| 49 |
+
label="์บ๋ฆญํฐ ์ ํ",
|
| 50 |
+
)
|
| 51 |
+
with gr.Column(scale=1):
|
| 52 |
+
scenario_dropdown = gr.Dropdown(
|
| 53 |
+
choices=scenario_choices,
|
| 54 |
+
value=scenario_choices[0][1] if scenario_choices else None,
|
| 55 |
+
label="์๋๋ฆฌ์ค ํ๋ฆฌ์
",
|
| 56 |
+
)
|
| 57 |
+
with gr.Column(scale=1):
|
| 58 |
+
blind_mode = gr.Checkbox(
|
| 59 |
+
value=True,
|
| 60 |
+
label="๋ธ๋ผ์ธ๋ ๋ชจ๋ (๋ชจ๋ธ๋ช
์จ๊น)",
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
with gr.Row():
|
| 64 |
+
with gr.Column(scale=2):
|
| 65 |
+
model_a_dropdown = gr.Dropdown(
|
| 66 |
+
choices=model_choices,
|
| 67 |
+
value=all_models[0] if all_models else None,
|
| 68 |
+
label="Model A",
|
| 69 |
+
)
|
| 70 |
+
with gr.Column(scale=2):
|
| 71 |
+
model_b_dropdown = gr.Dropdown(
|
| 72 |
+
choices=model_choices,
|
| 73 |
+
value=all_models[1] if len(all_models) > 1 else None,
|
| 74 |
+
label="Model B",
|
| 75 |
+
)
|
| 76 |
+
with gr.Column(scale=1):
|
| 77 |
+
random_models_btn = gr.Button("๋๋ค ๋ชจ๋ธ", size="sm")
|
| 78 |
+
|
| 79 |
+
# ์๋ต ์์ญ
|
| 80 |
+
with gr.Row():
|
| 81 |
+
# Model A Response
|
| 82 |
+
with gr.Column(scale=1):
|
| 83 |
+
model_a_label = gr.Markdown("### Model A")
|
| 84 |
+
with gr.Accordion("Thinking Process", open=False):
|
| 85 |
+
thinking_a = gr.Markdown("*(์๋ต ์์ฑ ํ ํ์๋ฉ๋๋ค)*")
|
| 86 |
+
response_a = gr.Textbox(
|
| 87 |
+
label="์๋ต",
|
| 88 |
+
lines=8,
|
| 89 |
+
interactive=False,
|
| 90 |
+
)
|
| 91 |
+
metadata_a = gr.Markdown("")
|
| 92 |
+
|
| 93 |
+
# Model B Response
|
| 94 |
+
with gr.Column(scale=1):
|
| 95 |
+
model_b_label = gr.Markdown("### Model B")
|
| 96 |
+
with gr.Accordion("Thinking Process", open=False):
|
| 97 |
+
thinking_b = gr.Markdown("*(์๋ต ์์ฑ ํ ํ์๋ฉ๋๋ค)*")
|
| 98 |
+
response_b = gr.Textbox(
|
| 99 |
+
label="์๋ต",
|
| 100 |
+
lines=8,
|
| 101 |
+
interactive=False,
|
| 102 |
+
)
|
| 103 |
+
metadata_b = gr.Markdown("")
|
| 104 |
+
|
| 105 |
+
# ์ฌ์ฉ์ ์
๋ ฅ
|
| 106 |
+
with gr.Row():
|
| 107 |
+
user_input = gr.Textbox(
|
| 108 |
+
label="ํฌ ๋ฉ์์ง",
|
| 109 |
+
placeholder="์์ด๋์๊ฒ ๋ณด๋ผ ๋ฉ์์ง๋ฅผ ์
๋ ฅํ์ธ์...",
|
| 110 |
+
lines=2,
|
| 111 |
+
scale=4,
|
| 112 |
+
)
|
| 113 |
+
with gr.Column(scale=1):
|
| 114 |
+
random_scenario_btn = gr.Button("๋๋ค ์๋๋ฆฌ์ค")
|
| 115 |
+
submit_btn = gr.Button("์ ์ก", variant="primary")
|
| 116 |
+
|
| 117 |
+
# ํฌํ ์์ญ
|
| 118 |
+
gr.Markdown("### ํฌํ")
|
| 119 |
+
with gr.Row():
|
| 120 |
+
vote_a_btn = gr.Button("A๊ฐ ๋ ์ข์", variant="secondary")
|
| 121 |
+
vote_tie_btn = gr.Button("๋น์ทํจ", variant="secondary")
|
| 122 |
+
vote_b_btn = gr.Button("B๊ฐ ๋ ์ข์", variant="secondary")
|
| 123 |
+
vote_skip_btn = gr.Button("์คํต", variant="secondary")
|
| 124 |
+
|
| 125 |
+
vote_reason = gr.Textbox(
|
| 126 |
+
label="ํฌํ ์ด์ (์ ํ์ฌํญ)",
|
| 127 |
+
placeholder="์ ์ด ์๋ต์ด ๋ ์ข๋ค๊ณ ์๊ฐํ์๋์?",
|
| 128 |
+
lines=1,
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
vote_result = gr.Markdown("")
|
| 132 |
+
|
| 133 |
+
# ์ํ ์ ์ฅ
|
| 134 |
+
state = gr.State({
|
| 135 |
+
"model_a": None,
|
| 136 |
+
"model_b": None,
|
| 137 |
+
"response_a": None,
|
| 138 |
+
"response_b": None,
|
| 139 |
+
"character": None,
|
| 140 |
+
"user_input": None,
|
| 141 |
+
})
|
| 142 |
+
|
| 143 |
+
# ============================================================
|
| 144 |
+
# ์ด๋ฒคํธ ํธ๋ค๋ฌ
|
| 145 |
+
# ============================================================
|
| 146 |
+
|
| 147 |
+
def select_random_models():
|
| 148 |
+
"""๋๋ค ๏ฟฝ๏ฟฝ๏ฟฝ๋ธ ์ ํ"""
|
| 149 |
+
if len(all_models) < 2:
|
| 150 |
+
return all_models[0] if all_models else None, None
|
| 151 |
+
selected = random.sample(all_models, 2)
|
| 152 |
+
return selected[0], selected[1]
|
| 153 |
+
|
| 154 |
+
def load_random_scenario(character: str):
|
| 155 |
+
"""๋๋ค ์๋๋ฆฌ์ค ๋ก๋"""
|
| 156 |
+
scenario = scenario_loader.get_random_scenario()
|
| 157 |
+
if scenario:
|
| 158 |
+
user_msg = scenario_loader.format_user_input(scenario, character)
|
| 159 |
+
return user_msg, scenario["id"]
|
| 160 |
+
return "", None
|
| 161 |
+
|
| 162 |
+
def load_scenario_input(scenario_id: str, character: str):
|
| 163 |
+
"""์ ํ๋ ์๋๋ฆฌ์ค ๋ก๋"""
|
| 164 |
+
scenario = scenario_loader.get_scenario(scenario_id)
|
| 165 |
+
if scenario:
|
| 166 |
+
return scenario_loader.format_user_input(scenario, character)
|
| 167 |
+
return ""
|
| 168 |
+
|
| 169 |
+
def generate_responses(
|
| 170 |
+
model_a: str,
|
| 171 |
+
model_b: str,
|
| 172 |
+
character: str,
|
| 173 |
+
user_msg: str,
|
| 174 |
+
current_state: dict,
|
| 175 |
+
):
|
| 176 |
+
"""๋ ๋ชจ๋ธ์ ์๋ต ์์ฑ"""
|
| 177 |
+
if not model_a or not model_b:
|
| 178 |
+
return (
|
| 179 |
+
"*(๋ชจ๋ธ์ ์ ํํด์ฃผ์ธ์)*", "", "",
|
| 180 |
+
"*(๋ชจ๋ธ์ ์ ํํด์ฃผ์ธ์)*", "", "",
|
| 181 |
+
current_state,
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
if not user_msg.strip():
|
| 185 |
+
return (
|
| 186 |
+
"*(๋ฉ์์ง๋ฅผ ์
๋ ฅํด์ฃผ์ธ์)*", "", "",
|
| 187 |
+
"*(๋ฉ์์ง๋ฅผ ์
๋ ฅํด์ฃผ์ธ์)*", "", "",
|
| 188 |
+
current_state,
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
system_prompt = build_system_prompt(character)
|
| 192 |
+
messages = [{"role": "user", "content": user_msg}]
|
| 193 |
+
|
| 194 |
+
# Mock ๋ชจ๋ (์ค์ ๋ชจ๋ธ ์์ด ํ
์คํธ)
|
| 195 |
+
if use_mock or model_manager is None:
|
| 196 |
+
response_a_full = f"<think>\n{character}์ ์
์ฅ์์ ์๊ฐํด๋ณด๋ฉด... ์ด ๋ฉ์์ง์ ์ด๋ป๊ฒ ๋ฐ์ํด์ผ ํ ๊น?\n</think>\n\n์๋
! ๋ฐ๊ฐ์~ (Mock Response A)"
|
| 197 |
+
response_b_full = f"<think>\n์... ์ด๋ฐ ์ํฉ์์๋...\n</think>\n\nํค์ด~ ๋ญํด? (Mock Response B)"
|
| 198 |
+
meta_a = {"latency_s": 0.5, "output_tokens": 50}
|
| 199 |
+
meta_b = {"latency_s": 0.6, "output_tokens": 55}
|
| 200 |
+
else:
|
| 201 |
+
# ์ค์ ๋ชจ๋ธ ์ถ๋ก
|
| 202 |
+
try:
|
| 203 |
+
response_a_full, meta_a = model_manager.generate_response(
|
| 204 |
+
model_a, messages, system_prompt
|
| 205 |
+
)
|
| 206 |
+
except Exception as e:
|
| 207 |
+
response_a_full = f"*Error: {str(e)}*"
|
| 208 |
+
meta_a = {"latency_s": 0, "output_tokens": 0}
|
| 209 |
+
|
| 210 |
+
try:
|
| 211 |
+
response_b_full, meta_b = model_manager.generate_response(
|
| 212 |
+
model_b, messages, system_prompt
|
| 213 |
+
)
|
| 214 |
+
except Exception as e:
|
| 215 |
+
response_b_full = f"*Error: {str(e)}*"
|
| 216 |
+
meta_b = {"latency_s": 0, "output_tokens": 0}
|
| 217 |
+
|
| 218 |
+
# Thinking ํ์ฑ
|
| 219 |
+
think_a, clean_a = parse_thinking_response(response_a_full)
|
| 220 |
+
think_b, clean_b = parse_thinking_response(response_b_full)
|
| 221 |
+
|
| 222 |
+
# ๋ฉํ๋ฐ์ดํฐ ๋ฌธ์์ด
|
| 223 |
+
meta_str_a = f"โฑ๏ธ {meta_a.get('latency_s', 0):.2f}s | {meta_a.get('output_tokens', 0)} tokens"
|
| 224 |
+
meta_str_b = f"โฑ๏ธ {meta_b.get('latency_s', 0):.2f}s | {meta_b.get('output_tokens', 0)} tokens"
|
| 225 |
+
|
| 226 |
+
# ์ํ ์
๋ฐ์ดํธ
|
| 227 |
+
new_state = {
|
| 228 |
+
"model_a": model_a,
|
| 229 |
+
"model_b": model_b,
|
| 230 |
+
"response_a": response_a_full,
|
| 231 |
+
"response_b": response_b_full,
|
| 232 |
+
"character": character,
|
| 233 |
+
"user_input": user_msg,
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
return (
|
| 237 |
+
format_thinking_for_display(think_a) if think_a else "*No thinking*",
|
| 238 |
+
clean_a,
|
| 239 |
+
meta_str_a,
|
| 240 |
+
format_thinking_for_display(think_b) if think_b else "*No thinking*",
|
| 241 |
+
clean_b,
|
| 242 |
+
meta_str_b,
|
| 243 |
+
new_state,
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
def handle_vote(vote_type: str, reason: str, current_state: dict):
|
| 247 |
+
"""ํฌํ ์ฒ๋ฆฌ"""
|
| 248 |
+
if not current_state.get("model_a") or not current_state.get("model_b"):
|
| 249 |
+
return "๋จผ์ ์๋ต์ ์์ฑํด์ฃผ์ธ์."
|
| 250 |
+
|
| 251 |
+
vote_data = {
|
| 252 |
+
"model_a": current_state["model_a"],
|
| 253 |
+
"model_b": current_state["model_b"],
|
| 254 |
+
"response_a": current_state.get("response_a", ""),
|
| 255 |
+
"response_b": current_state.get("response_b", ""),
|
| 256 |
+
"character": current_state.get("character", ""),
|
| 257 |
+
"user_input": current_state.get("user_input", ""),
|
| 258 |
+
"vote": vote_type,
|
| 259 |
+
"reason": reason,
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
vote_id = vote_storage.save_vote(vote_data)
|
| 263 |
+
|
| 264 |
+
# ELO ์
๋ฐ์ดํธ
|
| 265 |
+
if vote_type != "skip":
|
| 266 |
+
new_a, new_b = elo_calculator.update_ratings(
|
| 267 |
+
current_state["model_a"],
|
| 268 |
+
current_state["model_b"],
|
| 269 |
+
vote_type,
|
| 270 |
+
)
|
| 271 |
+
return f"ํฌํ ์๋ฃ! (ID: {vote_id})\n\nELO ๋ณ๊ฒฝ:\n- {current_state['model_a']}: {new_a:.0f}\n- {current_state['model_b']}: {new_b:.0f}"
|
| 272 |
+
|
| 273 |
+
return f"์คํต๋จ (ID: {vote_id})"
|
| 274 |
+
|
| 275 |
+
def update_model_labels(blind: bool, model_a: str, model_b: str):
|
| 276 |
+
"""๋ธ๋ผ์ธ๋ ๋ชจ๋์ ๋ฐ๋ผ ๋ ์ด๋ธ ์
๋ฐ์ดํธ"""
|
| 277 |
+
if blind:
|
| 278 |
+
return "### Model A", "### Model B"
|
| 279 |
+
else:
|
| 280 |
+
info_a = get_model_info(model_a)
|
| 281 |
+
info_b = get_model_info(model_b)
|
| 282 |
+
label_a = f"### {info_a.get('description', model_a)}" if info_a else f"### {model_a}"
|
| 283 |
+
label_b = f"### {info_b.get('description', model_b)}" if info_b else f"### {model_b}"
|
| 284 |
+
return label_a, label_b
|
| 285 |
+
|
| 286 |
+
# ============================================================
|
| 287 |
+
# ์ด๋ฒคํธ ๋ฐ์ธ๋ฉ
|
| 288 |
+
# ============================================================
|
| 289 |
+
|
| 290 |
+
random_models_btn.click(
|
| 291 |
+
fn=select_random_models,
|
| 292 |
+
outputs=[model_a_dropdown, model_b_dropdown],
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
random_scenario_btn.click(
|
| 296 |
+
fn=load_random_scenario,
|
| 297 |
+
inputs=[character_dropdown],
|
| 298 |
+
outputs=[user_input, scenario_dropdown],
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
scenario_dropdown.change(
|
| 302 |
+
fn=load_scenario_input,
|
| 303 |
+
inputs=[scenario_dropdown, character_dropdown],
|
| 304 |
+
outputs=[user_input],
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
submit_btn.click(
|
| 308 |
+
fn=generate_responses,
|
| 309 |
+
inputs=[model_a_dropdown, model_b_dropdown, character_dropdown, user_input, state],
|
| 310 |
+
outputs=[thinking_a, response_a, metadata_a, thinking_b, response_b, metadata_b, state],
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
# ๋ธ๋ผ์ธ๋ ๋ชจ๋ ๋ณ๊ฒฝ ์ ๋ ์ด๋ธ ์
๋ฐ์ดํธ
|
| 314 |
+
blind_mode.change(
|
| 315 |
+
fn=update_model_labels,
|
| 316 |
+
inputs=[blind_mode, model_a_dropdown, model_b_dropdown],
|
| 317 |
+
outputs=[model_a_label, model_b_label],
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
# ํฌํ ๋ฒํผ
|
| 321 |
+
vote_a_btn.click(
|
| 322 |
+
fn=lambda r, s: handle_vote("a", r, s),
|
| 323 |
+
inputs=[vote_reason, state],
|
| 324 |
+
outputs=[vote_result],
|
| 325 |
+
)
|
| 326 |
+
vote_b_btn.click(
|
| 327 |
+
fn=lambda r, s: handle_vote("b", r, s),
|
| 328 |
+
inputs=[vote_reason, state],
|
| 329 |
+
outputs=[vote_result],
|
| 330 |
+
)
|
| 331 |
+
vote_tie_btn.click(
|
| 332 |
+
fn=lambda r, s: handle_vote("tie", r, s),
|
| 333 |
+
inputs=[vote_reason, state],
|
| 334 |
+
outputs=[vote_result],
|
| 335 |
+
)
|
| 336 |
+
vote_skip_btn.click(
|
| 337 |
+
fn=lambda r, s: handle_vote("skip", r, s),
|
| 338 |
+
inputs=[vote_reason, state],
|
| 339 |
+
outputs=[vote_result],
|
| 340 |
+
)
|
ui/chat_tab.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Single Chat ํญ UI"""
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
from models.model_registry import get_all_models, get_model_info
|
| 7 |
+
from characters import get_character_loader, build_system_prompt
|
| 8 |
+
from utils import parse_thinking_response, format_thinking_for_display
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def create_chat_tab(
|
| 12 |
+
model_manager: Any = None,
|
| 13 |
+
use_mock: bool = False,
|
| 14 |
+
):
|
| 15 |
+
"""Single Chat ํญ ์์ฑ"""
|
| 16 |
+
|
| 17 |
+
# ๋ฐ์ดํฐ ๋ก๋
|
| 18 |
+
char_loader = get_character_loader()
|
| 19 |
+
|
| 20 |
+
# ๋ชจ๋ธ ๋ชฉ๋ก
|
| 21 |
+
all_models = get_all_models()
|
| 22 |
+
model_choices = [(f"{get_model_info(m).get('description', m)}", m) for m in all_models]
|
| 23 |
+
|
| 24 |
+
# ์บ๋ฆญํฐ ๋ชฉ๋ก
|
| 25 |
+
characters = char_loader.get_character_names()
|
| 26 |
+
|
| 27 |
+
# ============================================================
|
| 28 |
+
# UI ๊ตฌ์ฑ
|
| 29 |
+
# ============================================================
|
| 30 |
+
|
| 31 |
+
gr.Markdown("## ๋จ์ผ ๋ชจ๋ธ ์ฑํ
")
|
| 32 |
+
gr.Markdown("์ ํํ ๋ชจ๋ธ๊ณผ ์บ๋ฆญํฐ๋ก ๋ํ๋ฅผ ๋๋ ๋ณด์ธ์.")
|
| 33 |
+
|
| 34 |
+
with gr.Row():
|
| 35 |
+
with gr.Column(scale=1):
|
| 36 |
+
model_dropdown = gr.Dropdown(
|
| 37 |
+
choices=model_choices,
|
| 38 |
+
value=all_models[0] if all_models else None,
|
| 39 |
+
label="๋ชจ๋ธ ์ ํ",
|
| 40 |
+
)
|
| 41 |
+
with gr.Column(scale=1):
|
| 42 |
+
character_dropdown = gr.Dropdown(
|
| 43 |
+
choices=characters,
|
| 44 |
+
value=characters[0] if characters else None,
|
| 45 |
+
label="์บ๋ฆญํฐ ์ ํ",
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# ์ฑํ
์์ญ
|
| 49 |
+
chatbot = gr.Chatbot(
|
| 50 |
+
label="๋ํ",
|
| 51 |
+
height=400,
|
| 52 |
+
type="messages",
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
with gr.Accordion("Thinking Process (๋ง์ง๋ง ์๋ต)", open=False):
|
| 56 |
+
thinking_display = gr.Markdown("*(์๋ต ์์ฑ ํ ํ์๋ฉ๋๋ค)*")
|
| 57 |
+
|
| 58 |
+
with gr.Row():
|
| 59 |
+
user_input = gr.Textbox(
|
| 60 |
+
label="๋ฉ์์ง ์
๋ ฅ",
|
| 61 |
+
placeholder="๋ฉ์์ง๋ฅผ ์
๋ ฅํ์ธ์...",
|
| 62 |
+
lines=2,
|
| 63 |
+
scale=4,
|
| 64 |
+
)
|
| 65 |
+
send_btn = gr.Button("์ ์ก", variant="primary", scale=1)
|
| 66 |
+
|
| 67 |
+
with gr.Row():
|
| 68 |
+
clear_btn = gr.Button("๋ํ ์ด๊ธฐํ")
|
| 69 |
+
|
| 70 |
+
metadata_display = gr.Markdown("")
|
| 71 |
+
|
| 72 |
+
# ============================================================
|
| 73 |
+
# ์ด๋ฒคํธ ํธ๋ค๋ฌ
|
| 74 |
+
# ============================================================
|
| 75 |
+
|
| 76 |
+
def respond(
|
| 77 |
+
model_id: str,
|
| 78 |
+
character: str,
|
| 79 |
+
message: str,
|
| 80 |
+
history: list,
|
| 81 |
+
):
|
| 82 |
+
"""์๋ต ์์ฑ"""
|
| 83 |
+
if not message.strip():
|
| 84 |
+
return history, "", "*(๋ฉ์์ง๋ฅผ ์
๋ ฅํด์ฃผ์ธ์)*", ""
|
| 85 |
+
|
| 86 |
+
# ๋ํ ํ์คํ ๋ฆฌ ๊ตฌ์ฑ
|
| 87 |
+
messages = []
|
| 88 |
+
for msg in history:
|
| 89 |
+
if msg["role"] == "user":
|
| 90 |
+
messages.append({"role": "user", "content": msg["content"]})
|
| 91 |
+
elif msg["role"] == "assistant":
|
| 92 |
+
# Thinking ์ ๊ฑฐํ ํด๋ฆฐ ์๋ต๋ง ํ์คํ ๋ฆฌ์
|
| 93 |
+
_, clean = parse_thinking_response(msg["content"])
|
| 94 |
+
messages.append({"role": "assistant", "content": clean})
|
| 95 |
+
|
| 96 |
+
messages.append({"role": "user", "content": message})
|
| 97 |
+
|
| 98 |
+
system_prompt = build_system_prompt(character)
|
| 99 |
+
|
| 100 |
+
# Mock ๋๋ ์ค์ ์ถ๋ก
|
| 101 |
+
if use_mock or model_manager is None:
|
| 102 |
+
response_full = f"<think>\n{character}๋ก์ ์๊ฐํด๋ณด๋ฉด...\n</think>\n\n์๋
~ ๋ฐ๊ฐ์! (Mock Response)"
|
| 103 |
+
meta = {"latency_s": 0.5, "output_tokens": 30}
|
| 104 |
+
else:
|
| 105 |
+
try:
|
| 106 |
+
response_full, meta = model_manager.generate_response(
|
| 107 |
+
model_id, messages, system_prompt
|
| 108 |
+
)
|
| 109 |
+
except Exception as e:
|
| 110 |
+
response_full = f"*Error: {str(e)}*"
|
| 111 |
+
meta = {"latency_s": 0, "output_tokens": 0}
|
| 112 |
+
|
| 113 |
+
# Thinking ํ์ฑ
|
| 114 |
+
thinking, clean_response = parse_thinking_response(response_full)
|
| 115 |
+
|
| 116 |
+
# ํ์คํ ๋ฆฌ ์
๋ฐ์ดํธ
|
| 117 |
+
history.append({"role": "user", "content": message})
|
| 118 |
+
history.append({"role": "assistant", "content": response_full})
|
| 119 |
+
|
| 120 |
+
# ๋ฉํ๋ฐ์ดํฐ
|
| 121 |
+
meta_str = f"โฑ๏ธ {meta.get('latency_s', 0):.2f}s | {meta.get('output_tokens', 0)} tokens"
|
| 122 |
+
|
| 123 |
+
return (
|
| 124 |
+
history,
|
| 125 |
+
"", # ์
๋ ฅ ์ด๊ธฐํ
|
| 126 |
+
format_thinking_for_display(thinking) if thinking else "*No thinking*",
|
| 127 |
+
meta_str,
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
def clear_chat():
|
| 131 |
+
"""๋ํ ์ด๊ธฐํ"""
|
| 132 |
+
return [], "", "*(์๋ต ์์ฑ ํ ํ์๋ฉ๋๋ค)*", ""
|
| 133 |
+
|
| 134 |
+
# ============================================================
|
| 135 |
+
# ์ด๋ฒคํธ ๋ฐ์ธ๋ฉ
|
| 136 |
+
# ============================================================
|
| 137 |
+
|
| 138 |
+
send_btn.click(
|
| 139 |
+
fn=respond,
|
| 140 |
+
inputs=[model_dropdown, character_dropdown, user_input, chatbot],
|
| 141 |
+
outputs=[chatbot, user_input, thinking_display, metadata_display],
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
user_input.submit(
|
| 145 |
+
fn=respond,
|
| 146 |
+
inputs=[model_dropdown, character_dropdown, user_input, chatbot],
|
| 147 |
+
outputs=[chatbot, user_input, thinking_display, metadata_display],
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
clear_btn.click(
|
| 151 |
+
fn=clear_chat,
|
| 152 |
+
outputs=[chatbot, user_input, thinking_display, metadata_display],
|
| 153 |
+
)
|
ui/history_tab.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""History ํญ UI"""
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
from voting import get_vote_storage
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def create_history_tab():
|
| 11 |
+
"""History ํญ ์์ฑ"""
|
| 12 |
+
|
| 13 |
+
vote_storage = get_vote_storage()
|
| 14 |
+
|
| 15 |
+
# ============================================================
|
| 16 |
+
# UI ๊ตฌ์ฑ
|
| 17 |
+
# ============================================================
|
| 18 |
+
|
| 19 |
+
gr.Markdown("## ํฌํ ํ์คํ ๋ฆฌ")
|
| 20 |
+
gr.Markdown("์ต๊ทผ ํฌํ ๊ธฐ๋ก์ ํ์ธํ๊ณ ๋ฐ์ดํฐ๋ฅผ ๋ค์ด๋ก๋ํ ์ ์์ต๋๋ค.")
|
| 21 |
+
|
| 22 |
+
with gr.Row():
|
| 23 |
+
refresh_btn = gr.Button("์๋ก๊ณ ์นจ")
|
| 24 |
+
download_btn = gr.Button("JSON ๋ค์ด๋ก๋")
|
| 25 |
+
|
| 26 |
+
history_table = gr.Dataframe(
|
| 27 |
+
headers=["ID", "์๊ฐ", "Model A", "Model B", "๊ฒฐ๊ณผ", "์บ๋ฆญํฐ"],
|
| 28 |
+
label="์ต๊ทผ ํฌํ (50๊ฑด)",
|
| 29 |
+
interactive=False,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
download_output = gr.File(label="๋ค์ด๋ก๋ ํ์ผ", visible=False)
|
| 33 |
+
|
| 34 |
+
gr.Markdown("### ํฌํ ์์ธ ์ ๋ณด")
|
| 35 |
+
|
| 36 |
+
with gr.Row():
|
| 37 |
+
with gr.Column():
|
| 38 |
+
detail_model_a = gr.Textbox(label="Model A", interactive=False)
|
| 39 |
+
detail_response_a = gr.Textbox(label="Response A", lines=5, interactive=False)
|
| 40 |
+
with gr.Column():
|
| 41 |
+
detail_model_b = gr.Textbox(label="Model B", interactive=False)
|
| 42 |
+
detail_response_b = gr.Textbox(label="Response B", lines=5, interactive=False)
|
| 43 |
+
|
| 44 |
+
detail_user_input = gr.Textbox(label="์ฌ์ฉ์ ์
๋ ฅ", interactive=False)
|
| 45 |
+
detail_reason = gr.Textbox(label="ํฌํ ์ด์ ", interactive=False)
|
| 46 |
+
|
| 47 |
+
# ============================================================
|
| 48 |
+
# ์ด๋ฒคํธ ํธ๋ค๋ฌ
|
| 49 |
+
# ============================================================
|
| 50 |
+
|
| 51 |
+
def refresh_history():
|
| 52 |
+
"""ํ์คํ ๋ฆฌ ์๋ก๊ณ ์นจ"""
|
| 53 |
+
votes = vote_storage.get_recent_votes(50)
|
| 54 |
+
|
| 55 |
+
rows = []
|
| 56 |
+
for v in reversed(votes): # ์ต์ ์
|
| 57 |
+
result_map = {"a": "A ์น", "b": "B ์น", "tie": "๋ฌด์น๋ถ", "skip": "์คํต"}
|
| 58 |
+
rows.append([
|
| 59 |
+
v.get("id", ""),
|
| 60 |
+
v.get("timestamp", "")[:19], # ์ด๊น์ง๋ง
|
| 61 |
+
v.get("model_a", "")[:30],
|
| 62 |
+
v.get("model_b", "")[:30],
|
| 63 |
+
result_map.get(v.get("vote", ""), v.get("vote", "")),
|
| 64 |
+
v.get("character", ""),
|
| 65 |
+
])
|
| 66 |
+
|
| 67 |
+
df = pd.DataFrame(
|
| 68 |
+
rows,
|
| 69 |
+
columns=["ID", "์๊ฐ", "Model A", "Model B", "๊ฒฐ๊ณผ", "์บ๋ฆญํฐ"],
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
return df
|
| 73 |
+
|
| 74 |
+
def prepare_download():
|
| 75 |
+
"""JSON ๋ค์ด๋ก๋ ์ค๋น"""
|
| 76 |
+
output_path = "/tmp/votes_export.json"
|
| 77 |
+
vote_storage.export_to_json(output_path)
|
| 78 |
+
return gr.File(value=output_path, visible=True)
|
| 79 |
+
|
| 80 |
+
def show_vote_detail(evt: gr.SelectData, df: pd.DataFrame):
|
| 81 |
+
"""ํฌํ ์์ธ ์ ๋ณด ํ์"""
|
| 82 |
+
if evt.index[0] is None:
|
| 83 |
+
return "", "", "", "", "", ""
|
| 84 |
+
|
| 85 |
+
row_idx = evt.index[0]
|
| 86 |
+
vote_id = df.iloc[row_idx]["ID"]
|
| 87 |
+
|
| 88 |
+
# ํฌํ ์ฐพ๊ธฐ
|
| 89 |
+
votes = vote_storage.get_all_votes()
|
| 90 |
+
vote = next((v for v in votes if v.get("id") == vote_id), None)
|
| 91 |
+
|
| 92 |
+
if not vote:
|
| 93 |
+
return "", "", "", "", "", ""
|
| 94 |
+
|
| 95 |
+
return (
|
| 96 |
+
vote.get("model_a", ""),
|
| 97 |
+
vote.get("response_a", "")[:500] + "..." if len(vote.get("response_a", "")) > 500 else vote.get("response_a", ""),
|
| 98 |
+
vote.get("model_b", ""),
|
| 99 |
+
vote.get("response_b", "")[:500] + "..." if len(vote.get("response_b", "")) > 500 else vote.get("response_b", ""),
|
| 100 |
+
vote.get("user_input", ""),
|
| 101 |
+
vote.get("reason", ""),
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# ============================================================
|
| 105 |
+
# ์ด๋ฒคํธ ๋ฐ์ธ๋ฉ
|
| 106 |
+
# ============================================================
|
| 107 |
+
|
| 108 |
+
refresh_btn.click(
|
| 109 |
+
fn=refresh_history,
|
| 110 |
+
outputs=[history_table],
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
download_btn.click(
|
| 114 |
+
fn=prepare_download,
|
| 115 |
+
outputs=[download_output],
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
history_table.select(
|
| 119 |
+
fn=show_vote_detail,
|
| 120 |
+
inputs=[history_table],
|
| 121 |
+
outputs=[detail_model_a, detail_response_a, detail_model_b, detail_response_b, detail_user_input, detail_reason],
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
# ์ด๊ธฐ ๋ก๋ ํจ์ ๋ฐํ
|
| 125 |
+
return refresh_history
|
ui/leaderboard_tab.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Leaderboard ํญ UI"""
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
from voting import get_vote_storage, get_elo_calculator
|
| 7 |
+
from models.model_registry import get_model_info
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def create_leaderboard_tab():
|
| 11 |
+
"""Leaderboard ํญ ์์ฑ"""
|
| 12 |
+
|
| 13 |
+
vote_storage = get_vote_storage()
|
| 14 |
+
elo_calculator = get_elo_calculator()
|
| 15 |
+
|
| 16 |
+
# ============================================================
|
| 17 |
+
# UI ๊ตฌ์ฑ
|
| 18 |
+
# ============================================================
|
| 19 |
+
|
| 20 |
+
gr.Markdown("## ELO ๋ฆฌ๋๋ณด๋")
|
| 21 |
+
gr.Markdown("ํฌํ ๊ฒฐ๊ณผ์ ๊ธฐ๋ฐํ ๋ชจ๋ธ ์์์
๋๋ค.")
|
| 22 |
+
|
| 23 |
+
refresh_btn = gr.Button("์๋ก๊ณ ์นจ")
|
| 24 |
+
|
| 25 |
+
leaderboard_table = gr.Dataframe(
|
| 26 |
+
headers=["์์", "๋ชจ๋ธ", "ELO", "์น", "ํจ", "๋ฌด", "์ด", "์น๋ฅ "],
|
| 27 |
+
label="๋ฆฌ๋๋ณด๋",
|
| 28 |
+
interactive=False,
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
gr.Markdown("### ํฌํ ์์ฝ")
|
| 32 |
+
|
| 33 |
+
with gr.Row():
|
| 34 |
+
total_votes = gr.Textbox(label="์ด ํฌํ ์", interactive=False)
|
| 35 |
+
a_wins_count = gr.Textbox(label="A ์น๋ฆฌ", interactive=False)
|
| 36 |
+
b_wins_count = gr.Textbox(label="B ์น๋ฆฌ", interactive=False)
|
| 37 |
+
ties_count = gr.Textbox(label="๋ฌด์น๋ถ", interactive=False)
|
| 38 |
+
|
| 39 |
+
# ============================================================
|
| 40 |
+
# ์ด๋ฒคํธ ํธ๋ค๋ฌ
|
| 41 |
+
# ============================================================
|
| 42 |
+
|
| 43 |
+
def refresh_leaderboard():
|
| 44 |
+
"""๋ฆฌ๋๋ณด๋ ์๋ก๊ณ ์นจ"""
|
| 45 |
+
# ํฌํ ํต๊ณ
|
| 46 |
+
vote_stats = vote_storage.get_model_stats()
|
| 47 |
+
|
| 48 |
+
# ELO ๋ฆฌ๋๋ณด๋
|
| 49 |
+
leaderboard = elo_calculator.get_leaderboard_with_stats(vote_stats)
|
| 50 |
+
|
| 51 |
+
# DataFrame ๊ตฌ์ฑ
|
| 52 |
+
rows = []
|
| 53 |
+
for i, entry in enumerate(leaderboard, 1):
|
| 54 |
+
model_info = get_model_info(entry["model"])
|
| 55 |
+
display_name = model_info.get("description", entry["model"]) if model_info else entry["model"]
|
| 56 |
+
rows.append([
|
| 57 |
+
i,
|
| 58 |
+
display_name,
|
| 59 |
+
entry["elo"],
|
| 60 |
+
entry["wins"],
|
| 61 |
+
entry["losses"],
|
| 62 |
+
entry["ties"],
|
| 63 |
+
entry["total"],
|
| 64 |
+
entry["win_rate"],
|
| 65 |
+
])
|
| 66 |
+
|
| 67 |
+
df = pd.DataFrame(
|
| 68 |
+
rows,
|
| 69 |
+
columns=["์์", "๋ชจ๋ธ", "ELO", "์น", "ํจ", "๋ฌด", "์ด", "์น๋ฅ "],
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# ํฌํ ์์ฝ
|
| 73 |
+
summary = vote_storage.get_vote_summary()
|
| 74 |
+
|
| 75 |
+
return (
|
| 76 |
+
df,
|
| 77 |
+
str(summary["total"]),
|
| 78 |
+
str(summary["a_wins"]),
|
| 79 |
+
str(summary["b_wins"]),
|
| 80 |
+
str(summary["ties"]),
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
# ============================================================
|
| 84 |
+
# ์ด๋ฒคํธ ๋ฐ์ธ๋ฉ
|
| 85 |
+
# ============================================================
|
| 86 |
+
|
| 87 |
+
refresh_btn.click(
|
| 88 |
+
fn=refresh_leaderboard,
|
| 89 |
+
outputs=[leaderboard_table, total_votes, a_wins_count, b_wins_count, ties_count],
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# ์ด๊ธฐ ๋ก๋
|
| 93 |
+
leaderboard_tab = gr.Blocks()
|
| 94 |
+
|
| 95 |
+
# ํ์ด์ง ๋ก๋ ์ ์๋ ์๋ก๊ณ ์นจ์ ์ํด ๋ฐ๋ชจ ์์ ์ ํธ์ถ
|
| 96 |
+
return refresh_leaderboard
|
utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .thinking_parser import parse_thinking_response, format_thinking_for_display
|
utils/thinking_parser.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""<think> ํ๊ทธ ํ์ฑ ์ ํธ๋ฆฌํฐ"""
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from typing import Tuple, Optional
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def parse_thinking_response(response: str) -> Tuple[Optional[str], str]:
|
| 8 |
+
"""
|
| 9 |
+
์๋ต์์ <think> ํ๊ทธ ์ถ์ถ
|
| 10 |
+
|
| 11 |
+
Returns:
|
| 12 |
+
(thinking_content, clean_response)
|
| 13 |
+
"""
|
| 14 |
+
if not response:
|
| 15 |
+
return None, ""
|
| 16 |
+
|
| 17 |
+
# <think>...</think> ํจํด ๋งค์นญ
|
| 18 |
+
pattern = r'<think>(.*?)</think>'
|
| 19 |
+
match = re.search(pattern, response, re.DOTALL)
|
| 20 |
+
|
| 21 |
+
if match:
|
| 22 |
+
thinking = match.group(1).strip()
|
| 23 |
+
# <think> ํ๊ทธ ์ ๊ฑฐํ ํด๋ฆฐ ์๋ต
|
| 24 |
+
clean = re.sub(pattern, '', response, flags=re.DOTALL).strip()
|
| 25 |
+
return thinking, clean
|
| 26 |
+
|
| 27 |
+
return None, response
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def format_thinking_for_display(thinking: str) -> str:
|
| 31 |
+
"""Thinking ๋ด์ฉ์ ๋งํฌ๋ค์ด์ผ๋ก ํฌ๋งทํ
"""
|
| 32 |
+
if not thinking:
|
| 33 |
+
return "*No thinking process*"
|
| 34 |
+
|
| 35 |
+
# 6๋จ๊ณ ๊ตฌ์กฐ ํ์ด๋ผ์ดํธ (์๋ ๊ฒฝ์ฐ)
|
| 36 |
+
sections = [
|
| 37 |
+
"[์ํฉ๋ถ์]", "[๊ด๊ณ๋จ๊ณ]", "[์บ๋ฆญํฐ์คํ์ผ]",
|
| 38 |
+
"[๋ฐ๋น๊ฒฐ์ ]", "[๊ธ์งํจํด์ฒดํฌ]", "[์๋ต์ค๊ณ]"
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
formatted = thinking
|
| 42 |
+
for section in sections:
|
| 43 |
+
formatted = formatted.replace(
|
| 44 |
+
section,
|
| 45 |
+
f"**{section}**"
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
return formatted
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def extract_response_only(full_response: str) -> str:
|
| 52 |
+
"""Thinking ์ ๊ฑฐํ๊ณ ์๋ต๋ง ์ถ์ถ"""
|
| 53 |
+
_, clean = parse_thinking_response(full_response)
|
| 54 |
+
return clean
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def has_thinking_tag(response: str) -> bool:
|
| 58 |
+
"""์๋ต์ <think> ํ๊ทธ๊ฐ ์๋์ง ํ์ธ"""
|
| 59 |
+
pattern = r'<think>.*?</think>'
|
| 60 |
+
return bool(re.search(pattern, response, re.DOTALL))
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def get_thinking_stats(response: str) -> dict:
|
| 64 |
+
"""Thinking ๊ด๋ จ ํต๊ณ"""
|
| 65 |
+
thinking, clean = parse_thinking_response(response)
|
| 66 |
+
|
| 67 |
+
return {
|
| 68 |
+
"has_thinking": thinking is not None,
|
| 69 |
+
"thinking_length": len(thinking) if thinking else 0,
|
| 70 |
+
"response_length": len(clean),
|
| 71 |
+
"total_length": len(response),
|
| 72 |
+
}
|
voting/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .vote_storage import VoteStorage, get_vote_storage
|
| 2 |
+
from .elo_calculator import ELOCalculator, get_elo_calculator
|
voting/elo_calculator.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""ELO ๋ ์ดํ
์์คํ
"""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Dict, List, Tuple, Optional
|
| 6 |
+
from threading import Lock
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class ELOCalculator:
|
| 10 |
+
"""ELO ๋ ์ดํ
๊ณ์ฐ"""
|
| 11 |
+
|
| 12 |
+
def __init__(
|
| 13 |
+
self,
|
| 14 |
+
k_factor: float = 32,
|
| 15 |
+
initial_rating: int = 1500,
|
| 16 |
+
ratings_path: str = "elo_ratings.json",
|
| 17 |
+
):
|
| 18 |
+
self.k_factor = k_factor
|
| 19 |
+
self.initial_rating = initial_rating
|
| 20 |
+
self.ratings_path = Path(ratings_path)
|
| 21 |
+
self.lock = Lock()
|
| 22 |
+
self.ratings: Dict[str, float] = {}
|
| 23 |
+
self._load_ratings()
|
| 24 |
+
|
| 25 |
+
def _load_ratings(self):
|
| 26 |
+
"""์ ์ฅ๋ ๋ ์ดํ
๋ก๋"""
|
| 27 |
+
if self.ratings_path.exists():
|
| 28 |
+
try:
|
| 29 |
+
with open(self.ratings_path, "r", encoding="utf-8") as f:
|
| 30 |
+
self.ratings = json.load(f)
|
| 31 |
+
except (json.JSONDecodeError, IOError):
|
| 32 |
+
self.ratings = {}
|
| 33 |
+
|
| 34 |
+
def _save_ratings(self):
|
| 35 |
+
"""๋ ์ดํ
์ ์ฅ"""
|
| 36 |
+
with self.lock:
|
| 37 |
+
with open(self.ratings_path, "w", encoding="utf-8") as f:
|
| 38 |
+
json.dump(self.ratings, f, ensure_ascii=False, indent=2)
|
| 39 |
+
|
| 40 |
+
def get_rating(self, model: str) -> float:
|
| 41 |
+
"""๋ชจ๋ธ์ ํ์ฌ ๋ ์ดํ
"""
|
| 42 |
+
return self.ratings.get(model, self.initial_rating)
|
| 43 |
+
|
| 44 |
+
def expected_score(self, rating_a: float, rating_b: float) -> float:
|
| 45 |
+
"""์์ ์น๋ฅ ๊ณ์ฐ"""
|
| 46 |
+
return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
|
| 47 |
+
|
| 48 |
+
def update_ratings(
|
| 49 |
+
self,
|
| 50 |
+
model_a: str,
|
| 51 |
+
model_b: str,
|
| 52 |
+
result: str, # "a", "b", "tie"
|
| 53 |
+
) -> Tuple[float, float]:
|
| 54 |
+
"""๋ ์ดํ
์
๋ฐ์ดํธ"""
|
| 55 |
+
rating_a = self.get_rating(model_a)
|
| 56 |
+
rating_b = self.get_rating(model_b)
|
| 57 |
+
|
| 58 |
+
expected_a = self.expected_score(rating_a, rating_b)
|
| 59 |
+
expected_b = self.expected_score(rating_b, rating_a)
|
| 60 |
+
|
| 61 |
+
# ์ค์ ์ ์
|
| 62 |
+
if result == "a":
|
| 63 |
+
actual_a, actual_b = 1.0, 0.0
|
| 64 |
+
elif result == "b":
|
| 65 |
+
actual_a, actual_b = 0.0, 1.0
|
| 66 |
+
else: # tie
|
| 67 |
+
actual_a, actual_b = 0.5, 0.5
|
| 68 |
+
|
| 69 |
+
# ์ ๋ ์ดํ
๊ณ์ฐ
|
| 70 |
+
new_rating_a = rating_a + self.k_factor * (actual_a - expected_a)
|
| 71 |
+
new_rating_b = rating_b + self.k_factor * (actual_b - expected_b)
|
| 72 |
+
|
| 73 |
+
with self.lock:
|
| 74 |
+
self.ratings[model_a] = new_rating_a
|
| 75 |
+
self.ratings[model_b] = new_rating_b
|
| 76 |
+
|
| 77 |
+
# ์ ์ฅ
|
| 78 |
+
self._save_ratings()
|
| 79 |
+
|
| 80 |
+
return new_rating_a, new_rating_b
|
| 81 |
+
|
| 82 |
+
def get_leaderboard(self) -> List[Tuple[str, float]]:
|
| 83 |
+
"""๋ ์ดํ
์ ๋ฆฌ๋๋ณด๋"""
|
| 84 |
+
return sorted(
|
| 85 |
+
[(model, rating) for model, rating in self.ratings.items()],
|
| 86 |
+
key=lambda x: x[1],
|
| 87 |
+
reverse=True,
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
def get_leaderboard_with_stats(
|
| 91 |
+
self,
|
| 92 |
+
vote_stats: Dict[str, Dict],
|
| 93 |
+
) -> List[Dict]:
|
| 94 |
+
"""ํต๊ณ ํฌํจ ๋ฆฌ๋๋ณด๋"""
|
| 95 |
+
leaderboard = []
|
| 96 |
+
for model, rating in self.get_leaderboard():
|
| 97 |
+
stats = vote_stats.get(model, {})
|
| 98 |
+
leaderboard.append({
|
| 99 |
+
"model": model,
|
| 100 |
+
"elo": round(rating),
|
| 101 |
+
"wins": stats.get("wins", 0),
|
| 102 |
+
"losses": stats.get("losses", 0),
|
| 103 |
+
"ties": stats.get("ties", 0),
|
| 104 |
+
"total": stats.get("total", 0),
|
| 105 |
+
"win_rate": f"{stats.get('win_rate', 0) * 100:.1f}%",
|
| 106 |
+
})
|
| 107 |
+
return leaderboard
|
| 108 |
+
|
| 109 |
+
def get_all_ratings(self) -> Dict[str, float]:
|
| 110 |
+
"""๋ชจ๋ ๋ ์ดํ
"""
|
| 111 |
+
return self.ratings.copy()
|
| 112 |
+
|
| 113 |
+
def reset_ratings(self):
|
| 114 |
+
"""๋ ์ดํ
์ด๊ธฐํ"""
|
| 115 |
+
self.ratings = {}
|
| 116 |
+
self._save_ratings()
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
# ์ฑ๊ธํค ์ธ์คํด์ค
|
| 120 |
+
_elo_calculator: Optional[ELOCalculator] = None
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def get_elo_calculator(
|
| 124 |
+
k_factor: float = 32,
|
| 125 |
+
initial_rating: int = 1500,
|
| 126 |
+
ratings_path: str = "elo_ratings.json",
|
| 127 |
+
) -> ELOCalculator:
|
| 128 |
+
"""ELOCalculator ์ฑ๊ธํค ์ธ์คํด์ค"""
|
| 129 |
+
global _elo_calculator
|
| 130 |
+
if _elo_calculator is None:
|
| 131 |
+
_elo_calculator = ELOCalculator(
|
| 132 |
+
k_factor=k_factor,
|
| 133 |
+
initial_rating=initial_rating,
|
| 134 |
+
ratings_path=ratings_path,
|
| 135 |
+
)
|
| 136 |
+
return _elo_calculator
|
voting/vote_storage.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""ํฌํ ๋ฐ์ดํฐ ์ ์ฅ ๋ฐ ๊ด๋ฆฌ"""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Dict, List, Optional
|
| 8 |
+
from threading import Lock
|
| 9 |
+
import uuid
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class VoteStorage:
|
| 13 |
+
"""ํฌํ ๋ฐ์ดํฐ JSONL ์ ์ฅ์"""
|
| 14 |
+
|
| 15 |
+
def __init__(self, storage_path: str = "votes.jsonl"):
|
| 16 |
+
self.storage_path = Path(storage_path)
|
| 17 |
+
self.lock = Lock()
|
| 18 |
+
self._ensure_file_exists()
|
| 19 |
+
|
| 20 |
+
def _ensure_file_exists(self):
|
| 21 |
+
"""ํ์ผ์ด ์์ผ๋ฉด ์์ฑ"""
|
| 22 |
+
if not self.storage_path.exists():
|
| 23 |
+
self.storage_path.parent.mkdir(parents=True, exist_ok=True)
|
| 24 |
+
self.storage_path.touch()
|
| 25 |
+
|
| 26 |
+
def save_vote(self, vote_data: Dict) -> str:
|
| 27 |
+
"""ํฌํ ์ ์ฅ"""
|
| 28 |
+
vote_id = str(uuid.uuid4())[:8]
|
| 29 |
+
|
| 30 |
+
record = {
|
| 31 |
+
"id": vote_id,
|
| 32 |
+
"timestamp": datetime.now().isoformat(),
|
| 33 |
+
**vote_data,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
with self.lock:
|
| 37 |
+
with open(self.storage_path, "a", encoding="utf-8") as f:
|
| 38 |
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
| 39 |
+
|
| 40 |
+
return vote_id
|
| 41 |
+
|
| 42 |
+
def get_all_votes(self) -> List[Dict]:
|
| 43 |
+
"""๋ชจ๋ ํฌํ ์กฐํ"""
|
| 44 |
+
votes = []
|
| 45 |
+
if not self.storage_path.exists():
|
| 46 |
+
return votes
|
| 47 |
+
|
| 48 |
+
with open(self.storage_path, "r", encoding="utf-8") as f:
|
| 49 |
+
for line in f:
|
| 50 |
+
if line.strip():
|
| 51 |
+
try:
|
| 52 |
+
votes.append(json.loads(line))
|
| 53 |
+
except json.JSONDecodeError:
|
| 54 |
+
continue
|
| 55 |
+
return votes
|
| 56 |
+
|
| 57 |
+
def get_recent_votes(self, limit: int = 50) -> List[Dict]:
|
| 58 |
+
"""์ต๊ทผ ํฌํ ์กฐํ"""
|
| 59 |
+
votes = self.get_all_votes()
|
| 60 |
+
return votes[-limit:]
|
| 61 |
+
|
| 62 |
+
def get_model_stats(self) -> Dict[str, Dict]:
|
| 63 |
+
"""๋ชจ๋ธ๋ณ ํต๊ณ"""
|
| 64 |
+
votes = self.get_all_votes()
|
| 65 |
+
stats = {}
|
| 66 |
+
|
| 67 |
+
for vote in votes:
|
| 68 |
+
model_a = vote.get("model_a")
|
| 69 |
+
model_b = vote.get("model_b")
|
| 70 |
+
result = vote.get("vote")
|
| 71 |
+
|
| 72 |
+
if not model_a or not model_b:
|
| 73 |
+
continue
|
| 74 |
+
|
| 75 |
+
for model in [model_a, model_b]:
|
| 76 |
+
if model not in stats:
|
| 77 |
+
stats[model] = {"wins": 0, "losses": 0, "ties": 0, "total": 0}
|
| 78 |
+
|
| 79 |
+
if result == "a":
|
| 80 |
+
stats[model_a]["wins"] += 1
|
| 81 |
+
stats[model_b]["losses"] += 1
|
| 82 |
+
elif result == "b":
|
| 83 |
+
stats[model_b]["wins"] += 1
|
| 84 |
+
stats[model_a]["losses"] += 1
|
| 85 |
+
elif result == "tie":
|
| 86 |
+
stats[model_a]["ties"] += 1
|
| 87 |
+
stats[model_b]["ties"] += 1
|
| 88 |
+
|
| 89 |
+
if result != "skip":
|
| 90 |
+
stats[model_a]["total"] += 1
|
| 91 |
+
stats[model_b]["total"] += 1
|
| 92 |
+
|
| 93 |
+
# ์น๋ฅ ๊ณ์ฐ
|
| 94 |
+
for model, s in stats.items():
|
| 95 |
+
if s["total"] > 0:
|
| 96 |
+
s["win_rate"] = s["wins"] / s["total"]
|
| 97 |
+
else:
|
| 98 |
+
s["win_rate"] = 0.0
|
| 99 |
+
|
| 100 |
+
return stats
|
| 101 |
+
|
| 102 |
+
def get_total_votes(self) -> int:
|
| 103 |
+
"""์ด ํฌํ ์"""
|
| 104 |
+
return len(self.get_all_votes())
|
| 105 |
+
|
| 106 |
+
def get_vote_summary(self) -> Dict:
|
| 107 |
+
"""ํฌํ ์์ฝ"""
|
| 108 |
+
votes = self.get_all_votes()
|
| 109 |
+
|
| 110 |
+
a_wins = sum(1 for v in votes if v.get("vote") == "a")
|
| 111 |
+
b_wins = sum(1 for v in votes if v.get("vote") == "b")
|
| 112 |
+
ties = sum(1 for v in votes if v.get("vote") == "tie")
|
| 113 |
+
skips = sum(1 for v in votes if v.get("vote") == "skip")
|
| 114 |
+
|
| 115 |
+
return {
|
| 116 |
+
"total": len(votes),
|
| 117 |
+
"a_wins": a_wins,
|
| 118 |
+
"b_wins": b_wins,
|
| 119 |
+
"ties": ties,
|
| 120 |
+
"skips": skips,
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
def export_to_json(self, output_path: str):
|
| 124 |
+
"""JSON ํ์ผ๋ก ๋ด๋ณด๋ด๊ธฐ"""
|
| 125 |
+
votes = self.get_all_votes()
|
| 126 |
+
with open(output_path, "w", encoding="utf-8") as f:
|
| 127 |
+
json.dump(votes, f, ensure_ascii=False, indent=2)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
# ์ฑ๊ธํค ์ธ์คํด์ค
|
| 131 |
+
_vote_storage: Optional[VoteStorage] = None
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def get_vote_storage(storage_path: str = "votes.jsonl") -> VoteStorage:
|
| 135 |
+
"""VoteStorage ์ฑ๊ธํค ์ธ์คํด์ค"""
|
| 136 |
+
global _vote_storage
|
| 137 |
+
if _vote_storage is None:
|
| 138 |
+
_vote_storage = VoteStorage(storage_path)
|
| 139 |
+
return _vote_storage
|