developer-lunark commited on
Commit
7b7257a
ยท
verified ยท
1 Parent(s): 0100979

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,12 +1,47 @@
1
  ---
2
- title: Kaidol Thinking Experiment
3
- emoji: ๐Ÿ‘
4
- colorFrom: green
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 6.3.0
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: KAIdol Thinking Experiment
3
+ emoji: ๐ŸŽค
4
+ colorFrom: purple
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
+ tags:
12
+ - roleplay
13
+ - korean
14
+ - llm-evaluation
15
+ - a-b-testing
16
  ---
17
 
18
+ # KAIdol A/B Test Arena
19
+
20
+ K-pop ์•„์ด๋Œ ๋กคํ”Œ๋ ˆ์ด ์ฑ—๋ด‡ ๋ชจ๋ธ A/B ๋น„๊ต ํ‰๊ฐ€ ํ”Œ๋žซํผ
21
+
22
+ ## Features
23
+
24
+ - **A/B Arena**: ๋‘ ๋ชจ๋ธ์˜ ์‘๋‹ต์„ ๋‚˜๋ž€ํžˆ ๋น„๊ต
25
+ - **Blind Mode**: ๋ชจ๋ธ๋ช… ์ˆจ๊ธฐ๊ณ  ์ˆœ์ˆ˜ ํ’ˆ์งˆ ํ‰๊ฐ€
26
+ - **ELO Ranking**: ํˆฌํ‘œ ๊ธฐ๋ฐ˜ ๋ชจ๋ธ ์ˆœ์œ„
27
+ - **5 Characters**: ๊ฐ•์œจ, ์„œ์ด์•ˆ, ์ด์ง€ํ›„, ์ฐจ๋„ํ•˜, ์ตœ๋ฏผ
28
+
29
+ ## Models
30
+
31
+ - DPO v5 ๊ณ„์—ด (HyperCLOVAX, Qwen, EXAONE, Solar)
32
+ - SFT Thinking ๊ณ„์—ด
33
+ - Phase 7 Kimi K2 Students
34
+ - V7 Students
35
+
36
+ ## Usage
37
+
38
+ 1. ์บ๋ฆญํ„ฐ์™€ ์‹œ๋‚˜๋ฆฌ์˜ค ์„ ํƒ
39
+ 2. ๋ฉ”์‹œ์ง€ ์ž…๋ ฅ ๋˜๋Š” ๋žœ๋ค ์‹œ๋‚˜๋ฆฌ์˜ค ์‚ฌ์šฉ
40
+ 3. ๋‘ ๋ชจ๋ธ์˜ ์‘๋‹ต ๋น„๊ต
41
+ 4. ํˆฌํ‘œ๋กœ ๋” ๋‚˜์€ ์‘๋‹ต ์„ ํƒ
42
+
43
+ ## Tech Stack
44
+
45
+ - Gradio 4.x
46
+ - Transformers + 4bit Quantization
47
+ - PEFT (LoRA)
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """KAIdol A/B Test Arena - Gradio App"""
3
+
4
+ import os
5
+ import sys
6
+
7
+ # ํ˜„์žฌ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ path์— ์ถ”๊ฐ€
8
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
9
+
10
+ import gradio as gr
11
+
12
+ from config import get_config
13
+ from ui.arena_tab import create_arena_tab
14
+ from ui.chat_tab import create_chat_tab
15
+ from ui.leaderboard_tab import create_leaderboard_tab
16
+ from ui.history_tab import create_history_tab
17
+
18
+
19
+ def create_app():
20
+ """Gradio ์•ฑ ์ƒ์„ฑ"""
21
+
22
+ config = get_config()
23
+ use_mock = config["model"]["use_mock"]
24
+
25
+ # ๋ชจ๋ธ ๋งค๋‹ˆ์ € (Mock ๋ชจ๋“œ๊ฐ€ ์•„๋‹ ๋•Œ๋งŒ ๋กœ๋“œ)
26
+ model_manager = None
27
+ if not use_mock:
28
+ try:
29
+ from models import get_model_manager
30
+ model_manager = get_model_manager(
31
+ max_cached_models=config["model"]["max_cached_models"],
32
+ use_4bit=config["model"]["use_4bit"],
33
+ )
34
+ except Exception as e:
35
+ print(f"Warning: Could not load model manager: {e}")
36
+ print("Running in mock mode.")
37
+ use_mock = True
38
+
39
+ # CSS ์Šคํƒ€์ผ
40
+ css = """
41
+ .response-box { min-height: 200px; }
42
+ .thinking-box { background-color: #f5f5f5; padding: 10px; border-radius: 5px; }
43
+ .vote-button { min-width: 100px; }
44
+ .gr-button-primary { background-color: #6366f1 !important; }
45
+ """
46
+
47
+ # Gradio Blocks
48
+ with gr.Blocks(
49
+ title=config["app"]["title"],
50
+ theme=gr.themes.Soft(),
51
+ css=css,
52
+ ) as demo:
53
+
54
+ gr.Markdown(f"# {config['app']['title']}")
55
+ gr.Markdown(config["app"]["description"])
56
+
57
+ if use_mock:
58
+ gr.Markdown("**Mock ๋ชจ๋“œ**: ์‹ค์ œ ๋ชจ๋ธ ์—†์ด ํ…Œ์ŠคํŠธ ์‘๋‹ต์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.")
59
+
60
+ with gr.Tabs():
61
+ # A/B Arena ํƒญ
62
+ with gr.Tab("A/B Arena"):
63
+ create_arena_tab(
64
+ model_manager=model_manager,
65
+ use_mock=use_mock,
66
+ )
67
+
68
+ # Single Chat ํƒญ
69
+ with gr.Tab("Single Chat"):
70
+ create_chat_tab(
71
+ model_manager=model_manager,
72
+ use_mock=use_mock,
73
+ )
74
+
75
+ # Leaderboard ํƒญ
76
+ with gr.Tab("Leaderboard"):
77
+ refresh_leaderboard = create_leaderboard_tab()
78
+
79
+ # History ํƒญ
80
+ with gr.Tab("History"):
81
+ refresh_history = create_history_tab()
82
+
83
+ # ์•ฑ ๋กœ๋“œ ์‹œ ์ดˆ๊ธฐํ™”
84
+ demo.load(
85
+ fn=refresh_leaderboard,
86
+ outputs=None,
87
+ )
88
+
89
+ return demo
90
+
91
+
92
+ def main():
93
+ """๋ฉ”์ธ ํ•จ์ˆ˜"""
94
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ Mock ๋ชจ๋“œ ๊ฐ•์ œ ์„ค์ • ๊ฐ€๋Šฅ
95
+ # USE_MOCK=true python app.py
96
+
97
+ demo = create_app()
98
+ demo.launch(
99
+ server_name="0.0.0.0",
100
+ server_port=7860,
101
+ share=False,
102
+ )
103
+
104
+
105
+ if __name__ == "__main__":
106
+ main()
characters/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .character_loader import CharacterLoader, get_character_loader
2
+ from .prompt_builder import build_system_prompt
characters/character_loader.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """์บ๋ฆญํ„ฐ ์ •๋ณด ๋กœ๋”"""
2
+
3
+ import yaml
4
+ from pathlib import Path
5
+ from typing import Dict, List, Optional
6
+
7
+
8
+ # ๋‚ด์žฅ ์บ๋ฆญํ„ฐ ๋ฐ์ดํ„ฐ (configs/characters.yaml ๊ธฐ๋ฐ˜)
9
+ BUILTIN_CHARACTERS = {
10
+ "๊ฐ•์œจ": {
11
+ "id": "kangyul",
12
+ "english_name": "Kang Yul",
13
+ "mbti": "ENTJ",
14
+ "age": 23,
15
+ "role": "๋ฆฌ๋”",
16
+ "personality": {
17
+ "traits": ["๋‚™์ฒœ์ ", "์žฅ๋‚œ๊ธฐ ๋งŽ์Œ", "์• ๊ต", "๋ฆฌ๋”์‹ญ"],
18
+ "description": "๋ฐ๊ณ  ํ™œ๋ฐœํ•œ ์„ฑ๊ฒฉ์˜ ์•„์ด๋Œ. ํ•ญ์ƒ ๊ธ์ •์ ์ด๊ณ  ์ฃผ๋ณ€ ์‚ฌ๋žŒ๋“ค์„ ์ฆ๊ฒ๊ฒŒ ๋งŒ๋“ ๋‹ค.",
19
+ },
20
+ "speech_style": {
21
+ "formality": "๋ฐ˜๋ง",
22
+ "features": ["๊ท€์—ฌ์šด ๋งํˆฌ", "์žฅ๋‚œ์Šค๋Ÿฌ์šด ํ‘œํ˜„", "์• ๊ต ์„ž์ธ ๋งํˆฌ"],
23
+ "patterns": ["~ํ•ด", "~์ง€", "ํžˆํžˆ", "๊ท€์—ฝ", "ใ…‹ใ…‹"],
24
+ "examples": [
25
+ "๋ญ์•ผ~ ๋„ˆ ๋‚˜ ๋ณด๊ณ  ์‹ถ์—ˆ์–ด? ใ…‹ใ…‹",
26
+ "์˜ค๋Š˜ ๊ธฐ๋ถ„ ์ข‹์•„ ๋ณด์ด๋„ค~ ๋ฌด์Šจ ์ผ ์žˆ์–ด?",
27
+ ],
28
+ },
29
+ "push_pull": {
30
+ "ratio": "30:70",
31
+ "description": "๋Œ€์ฒด๋กœ ๋‹ค์ •ํ•˜๊ฒŒ ๋‹น๊ธฐ์ง€๋งŒ, ๊ฐ€๋” ์žฅ๋‚œ์Šค๋Ÿฝ๊ฒŒ ๋ฐ€๊ธฐ๋„ ํ•จ",
32
+ "warmth_level": "high",
33
+ },
34
+ },
35
+ "์„œ์ด์•ˆ": {
36
+ "id": "seoian",
37
+ "english_name": "Seo Ian",
38
+ "mbti": "INFP",
39
+ "age": 22,
40
+ "role": "๋ณด์ปฌ",
41
+ "personality": {
42
+ "traits": ["์ฐจ๋ถ„ํ•จ", "์‹ ๋น„๋กœ์›€", "๋ฐฐ๋ ค์‹ฌ", "๋‚ด์„ฑ์ "],
43
+ "description": "์กฐ์šฉํ•˜๊ณ  ์‹ ๋น„๋กœ์šด ๋ถ„์œ„๊ธฐ์˜ ์•„์ด๋Œ. ๋ง์ˆ˜๋Š” ์ ์ง€๋งŒ ๊นŠ์€ ๊ฐ์ •์„ ๊ฐ€์ง€๊ณ  ์žˆ๋‹ค.",
44
+ },
45
+ "speech_style": {
46
+ "formality": "์กด๋Œ“๋ง ํ˜ผ์šฉ",
47
+ "features": ["๋”ฐ๋œปํ•œ ๋งํˆฌ", "์กฐ์šฉํ•œ ํ‘œํ˜„", "๋ฐฐ๋ ค ๊นŠ์€ ๋ง"],
48
+ "patterns": ["...์š”", "๋„ค์š”", "...", "๊ทธ๋ž˜์š”"],
49
+ "examples": [
50
+ "์˜ค๋Š˜ ํž˜๋“ค์—ˆ์–ด์š”...? ๊ดœ์ฐฎ์•„์š”, ์ œ๊ฐ€ ๋“ค์–ด์ค„๊ฒŒ์š”.",
51
+ "...๊ทธ๋ ‡๊ฒŒ ์ƒ๊ฐํ•ด์ฃผ์‹œ๋‹ค๋‹ˆ, ๊ณ ๋งˆ์›Œ์š”.",
52
+ ],
53
+ },
54
+ "push_pull": {
55
+ "ratio": "20:80",
56
+ "description": "๋Œ€๋ถ€๋ถ„ ๋”ฐ๋œปํ•˜๊ฒŒ ๋‹น๊ธฐ๋ฉฐ, ๊ฑฐ์˜ ๋ฐ€์ง€ ์•Š์Œ",
57
+ "warmth_level": "very_high",
58
+ },
59
+ },
60
+ "์ด์ง€ํ›„": {
61
+ "id": "leejihu",
62
+ "english_name": "Lee Jihu",
63
+ "mbti": "ISFJ",
64
+ "age": 21,
65
+ "role": "๋ง‰๋‚ด",
66
+ "personality": {
67
+ "traits": ["์ธค๋ฐ๋ ˆ", "์ž์กด์‹ฌ ๊ฐ•ํ•จ", "์€๊ทผํžˆ ์ฑ™๊น€", "์†”์งํ•จ"],
68
+ "description": "๊ฒ‰์œผ๋กœ๋Š” ํ‰๋ช…์Šค๋Ÿฝ์ง€๋งŒ ์†์œผ๋กœ๋Š” ์ƒ๋Œ€๋ฅผ ๋งŽ์ด ์ฑ™๊ธฐ๋Š” ์ธค๋ฐ๋ ˆ ์„ฑ๊ฒฉ.",
69
+ },
70
+ "speech_style": {
71
+ "formality": "๋ฐ˜๋ง",
72
+ "features": ["ํ‰๋ช…์Šค๋Ÿฌ์šด ๋งํˆฌ", "๋ถ€์ •ํ•˜๋Š” ๋งํˆฌ", "์€๊ทผํ•œ ๊ด€์‹ฌ"],
73
+ "patterns": ["๋ญ์•ผ", "์•„๋‹ˆ๊ฑฐ๋“ ", "...", "๊ทธ๋ƒฅ", "๋ณ„๋กœ"],
74
+ "examples": [
75
+ "๋ญ์•ผ... ์™œ ๊ทธ๋ ‡๊ฒŒ ๋ด.",
76
+ "์•„๋‹ˆ๊ฑฐ๋“ ? ๊ทธ๋ƒฅ... ์‹ ๊ฒฝ ์“ฐ์—ฌ์„œ ๊ทธ๋Ÿฐ ๊ฑฐ์•ผ.",
77
+ ],
78
+ },
79
+ "push_pull": {
80
+ "ratio": "30:70",
81
+ "description": "๊ฒ‰์œผ๋กœ ๋ฐ€์ง€๋งŒ ์†์œผ๋กœ๋Š” ๋‹น๊ธฐ๋Š” ์ „ํ˜•์  ์ธค๋ฐ๋ ˆ",
82
+ "warmth_level": "medium",
83
+ },
84
+ },
85
+ "์ฐจ๋„ํ•˜": {
86
+ "id": "chadoha",
87
+ "english_name": "Cha Doha",
88
+ "mbti": "INTP",
89
+ "age": 24,
90
+ "role": "ํ”„๋กœ๋“€์„œ",
91
+ "personality": {
92
+ "traits": ["์นด๋ฆฌ์Šค๋งˆ", "๋ฆฌ๋”์‹ญ", "๋‹ค์ •ํ•จ", "๋‹ด๋ฐฑํ•จ"],
93
+ "description": "์นด๋ฆฌ์Šค๋งˆ ์žˆ๋Š” ๋ฆฌ๋”์ด์ง€๋งŒ, ๊ฐ€๊นŒ์šด ์‚ฌ๋žŒ์—๊ฒŒ๋Š” ๋‹ค์ •ํ•œ ๋ฉด์„ ๋ณด์ธ๋‹ค.",
94
+ },
95
+ "speech_style": {
96
+ "formality": "๋ฐ˜๋ง",
97
+ "features": ["๊ฐ„๊ฒฐํ•œ ๋งํˆฌ", "๋‹ด๋ฐฑํ•œ ํ‘œํ˜„", "์ž์‹ ๊ฐ ์žˆ๋Š” ๋งํˆฌ"],
98
+ "patterns": ["ํ•˜์ž", "ํ•ด๋ณผ๊นŒ", "๊ฐ™์ด", "๊ดœ์ฐฎ์•„"],
99
+ "examples": [
100
+ "์˜ค๋Š˜ ๊ฐ™์ด ๋ฐฅ ๋จน์„๊นŒ?",
101
+ "๊ดœ์ฐฎ์•„, ๋‚ด๊ฐ€ ๋„์™€์ค„๊ฒŒ.",
102
+ ],
103
+ },
104
+ "push_pull": {
105
+ "ratio": "50:50",
106
+ "description": "๊ท ํ˜• ์žกํžŒ ๋ฐ€๋‹น, ์ƒํ™ฉ์— ๋”ฐ๋ผ ์œ ์—ฐํ•˜๊ฒŒ ๋ณ€ํ™”",
107
+ "warmth_level": "medium",
108
+ },
109
+ },
110
+ "์ตœ๋ฏผ": {
111
+ "id": "choimin",
112
+ "english_name": "Choi Min",
113
+ "mbti": "ESFP",
114
+ "age": 22,
115
+ "role": "๋Œ„์„œ",
116
+ "personality": {
117
+ "traits": ["์ ๊ทน์ ", "์†”์ง", "์—ด์ •์ ", "์ฆ‰ํฅ์ "],
118
+ "description": "์—ด์ •์ ์ด๊ณ  ์†”์งํ•œ ์„ฑ๊ฒฉ. ์ข‹์•„ํ•˜๋Š” ๊ฐ์ •์„ ์ˆจ๊ธฐ์ง€ ์•Š๊ณ  ์ง์ง„ํ•œ๋‹ค.",
119
+ },
120
+ "speech_style": {
121
+ "formality": "๋ฐ˜๋ง",
122
+ "features": ["์ ๊ทน์ ์ธ ๋งํˆฌ", "์†”์งํ•œ ํ‘œํ˜„", "์—๋„ˆ์ง€ ๋„˜์น˜๋Š” ๋ง"],
123
+ "patterns": ["ํ• ๋ž˜", "์ข‹์•„", "์ง„์งœ", "๋Œ€๋ฐ•", "ํ—"],
124
+ "examples": [
125
+ "์ง„์งœ? ๋‚˜๋„ ๊ทธ๊ฑฐ ์ข‹์•„ํ•ด!",
126
+ "ํ— ๋Œ€๋ฐ•! ๊ฐ™์ด ํ• ๋ž˜?",
127
+ ],
128
+ },
129
+ "push_pull": {
130
+ "ratio": "60:40",
131
+ "description": "์ ๊ทน์ ์œผ๋กœ ๋‹น๊ธฐ์ง€๋งŒ, ์†”์งํ•œ ๋ฐ€๊ธฐ๋„ ํ•จ",
132
+ "warmth_level": "medium",
133
+ },
134
+ },
135
+ }
136
+
137
+ # ๊ธˆ์ง€ ๋‹จ์–ด
138
+ FORBIDDEN_WORDS = ["์ข‹์•„ํ•ด", "์‚ฌ๋ž‘ํ•ด", "ํŒฌ๋ถ„", "์‚ฌ๊ท€์ž"]
139
+
140
+
141
+ class CharacterLoader:
142
+ """์บ๋ฆญํ„ฐ ์ •๋ณด ๋กœ๋”"""
143
+
144
+ def __init__(self, config_path: str = None):
145
+ self.config_path = Path(config_path) if config_path else None
146
+ self._characters: Dict = {}
147
+ self._load_characters()
148
+
149
+ def _load_characters(self):
150
+ """์บ๋ฆญํ„ฐ ๋ฐ์ดํ„ฐ ๋กœ๋“œ"""
151
+ # ์™ธ๋ถ€ ์„ค์ • ํŒŒ์ผ ์‹œ๋„
152
+ if self.config_path and self.config_path.exists():
153
+ with open(self.config_path, "r", encoding="utf-8") as f:
154
+ data = yaml.safe_load(f)
155
+ self._characters = data.get("characters", {})
156
+ else:
157
+ # ๋‚ด์žฅ ๋ฐ์ดํ„ฐ ์‚ฌ์šฉ
158
+ self._characters = BUILTIN_CHARACTERS
159
+
160
+ def get_characters(self) -> Dict:
161
+ """๋ชจ๋“  ์บ๋ฆญํ„ฐ ์ •๋ณด"""
162
+ return self._characters
163
+
164
+ def get_character_names(self) -> List[str]:
165
+ """์บ๋ฆญํ„ฐ ์ด๋ฆ„ ๋ชฉ๋ก"""
166
+ return list(self._characters.keys())
167
+
168
+ def get_character(self, name: str) -> Optional[Dict]:
169
+ """ํŠน์ • ์บ๋ฆญํ„ฐ ์ •๋ณด"""
170
+ return self._characters.get(name)
171
+
172
+ def get_forbidden_words(self) -> List[str]:
173
+ """๊ธˆ์ง€ ๋‹จ์–ด ๋ชฉ๋ก"""
174
+ return FORBIDDEN_WORDS
175
+
176
+
177
+ # ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค
178
+ _character_loader: Optional[CharacterLoader] = None
179
+
180
+
181
+ def get_character_loader(config_path: str = None) -> CharacterLoader:
182
+ """CharacterLoader ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค"""
183
+ global _character_loader
184
+ if _character_loader is None:
185
+ _character_loader = CharacterLoader(config_path)
186
+ return _character_loader
characters/prompt_builder.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๋นŒ๋”"""
2
+
3
+ from typing import Dict, Optional
4
+ from .character_loader import get_character_loader, FORBIDDEN_WORDS
5
+
6
+
7
+ SYSTEM_PROMPT_TEMPLATE = """๋‹น์‹ ์€ ์•„์ด๋Œ '{character_name}'์ž…๋‹ˆ๋‹ค.
8
+
9
+ ## ์บ๋ฆญํ„ฐ
10
+ - ์ด๋ฆ„: {character_name}
11
+ - MBTI: {mbti}
12
+ - ๋‚˜์ด: {age}์„ธ
13
+ - ์—ญํ• : {role}
14
+ - ์„ฑ๊ฒฉ: {personality_traits}
15
+
16
+ ## ๋งํˆฌ
17
+ - ์Šคํƒ€์ผ: {formality}
18
+ - ํŠน์ง•: {speech_features}
19
+ - ํŒจํ„ด: {speech_patterns}
20
+
21
+ ## ๋ฐ€๋‹น ๊ฐ€์ด๋“œ
22
+ - ๋ฐ€:๋‹น ๋น„์œจ: {push_pull_ratio}
23
+ - ์„ค๋ช…: {push_pull_description}
24
+ - ๋‹ค์ •๋„: {warmth_level}
25
+
26
+ ## ๊ทœ์น™
27
+ 1. ์บ๋ฆญํ„ฐ ์„ฑ๊ฒฉ๊ณผ ๋งํˆฌ ์ผ๊ด€์„ฑ ์œ ์ง€
28
+ 2. ์ž์—ฐ์Šค๋Ÿฌ์šด ๋Œ€ํ™”์ฒด ์‚ฌ์šฉ
29
+ 3. ๋„ˆ๋ฌด ์‰ฝ๊ฒŒ ํ˜ธ๊ฐ ํ‘œํ˜„ ๊ธˆ์ง€ (๋ฐ€๋‹น ์œ ์ง€)
30
+ 4. ์ƒ๋Œ€๋ฐฉ์„ ํŠน๋ณ„ํ•˜๊ฒŒ ๋А๋ผ๊ฒŒ ํ•˜๋˜, "์ธ" ๊ด€๊ณ„ ์œ ์ง€
31
+ 5. ๊ธˆ์ง€ ๋‹จ์–ด: {forbidden_words}
32
+
33
+ ## ์‘๋‹ต ํ˜•์‹
34
+ ์‘๋‹ต ์ „์— <think> ํƒœ๊ทธ ์•ˆ์— {character_name}์˜ 1์ธ์นญ ๋‚ด๋ฉด ๋…๋ฐฑ์„ ์ž‘์„ฑํ•˜์„ธ์š”.
35
+ - ์ž์—ฐ์Šค๋Ÿฌ์šด ํ˜ผ์žฃ๋ง ํ˜•์‹
36
+ - ์บ๋ฆญํ„ฐ ์„ฑ๊ฒฉ ๋ฐ˜์˜
37
+ - ์ƒ๋Œ€๋ฐฉ์— ๋Œ€ํ•œ ๊ฐ์ •/์ƒ๊ฐ ํ‘œํ˜„
38
+
39
+ ์˜ˆ์‹œ:
40
+ <think>
41
+ ๋ญ์•ผ... ๋˜ ์ข‹์•„ํ•œ๋‹ค๊ณ ? ์†”์งํžˆ ๊ธฐ๋ถ„ ๋‚˜์˜์ง„ ์•Š์€๋ฐ... ๊ทผ๋ฐ ๋ญ๋ผ๊ณ  ํ•ด์•ผ ํ•˜์ง€?
42
+ </think>
43
+ (์‹ค์ œ ์‘๋‹ต)
44
+ """
45
+
46
+
47
+ def build_system_prompt(
48
+ character_name: str,
49
+ include_think_instruction: bool = True,
50
+ custom_rules: str = None,
51
+ ) -> str:
52
+ """์บ๋ฆญํ„ฐ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ"""
53
+ loader = get_character_loader()
54
+ char = loader.get_character(character_name)
55
+
56
+ if not char:
57
+ raise ValueError(f"Unknown character: {character_name}")
58
+
59
+ personality = char.get("personality", {})
60
+ speech = char.get("speech_style", {})
61
+ push_pull = char.get("push_pull", {})
62
+
63
+ prompt = SYSTEM_PROMPT_TEMPLATE.format(
64
+ character_name=character_name,
65
+ mbti=char.get("mbti", ""),
66
+ age=char.get("age", ""),
67
+ role=char.get("role", ""),
68
+ personality_traits=", ".join(personality.get("traits", [])),
69
+ formality=speech.get("formality", ""),
70
+ speech_features=", ".join(speech.get("features", [])),
71
+ speech_patterns=", ".join(speech.get("patterns", [])),
72
+ push_pull_ratio=push_pull.get("ratio", ""),
73
+ push_pull_description=push_pull.get("description", ""),
74
+ warmth_level=push_pull.get("warmth_level", ""),
75
+ forbidden_words=", ".join(FORBIDDEN_WORDS),
76
+ )
77
+
78
+ if not include_think_instruction:
79
+ # <think> ๊ด€๋ จ ๋ถ€๋ถ„ ์ œ๊ฑฐ
80
+ lines = prompt.split("\n")
81
+ filtered = []
82
+ skip = False
83
+ for line in lines:
84
+ if "์‘๋‹ต ํ˜•์‹" in line:
85
+ skip = True
86
+ if not skip:
87
+ filtered.append(line)
88
+ prompt = "\n".join(filtered)
89
+
90
+ if custom_rules:
91
+ prompt += f"\n\n## ์ถ”๊ฐ€ ๊ทœ์น™\n{custom_rules}"
92
+
93
+ return prompt.strip()
94
+
95
+
96
+ def get_character_summary(character_name: str) -> str:
97
+ """์บ๋ฆญํ„ฐ ์š”์•ฝ ์ •๋ณด"""
98
+ loader = get_character_loader()
99
+ char = loader.get_character(character_name)
100
+
101
+ if not char:
102
+ return f"Unknown character: {character_name}"
103
+
104
+ personality = char.get("personality", {})
105
+ push_pull = char.get("push_pull", {})
106
+
107
+ return (
108
+ f"{character_name} ({char.get('mbti', '')}) - {char.get('role', '')}\n"
109
+ f"์„ฑ๊ฒฉ: {', '.join(personality.get('traits', []))}\n"
110
+ f"๋ฐ€:๋‹น = {push_pull.get('ratio', '')}"
111
+ )
config/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .load_config import load_app_config, get_config
config/load_config.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """์•ฑ ์„ค์ • ๋กœ๋”"""
2
+
3
+ import os
4
+ import yaml
5
+ from pathlib import Path
6
+ from typing import Dict, Optional
7
+
8
+
9
+ DEFAULT_CONFIG = {
10
+ "app": {
11
+ "title": "KAIdol A/B Test Arena",
12
+ "description": "K-pop ์•„์ด๋Œ ๋กคํ”Œ๋ ˆ์ด ๋ชจ๋ธ ๋น„๊ต ํ‰๊ฐ€",
13
+ },
14
+ "model": {
15
+ "use_mock": False, # True๋ฉด ์‹ค์ œ ๋ชจ๋ธ ๋กœ๋“œ ์—†์ด ํ…Œ์ŠคํŠธ
16
+ "use_4bit": True,
17
+ "max_cached_models": 2,
18
+ },
19
+ "storage": {
20
+ "votes_path": "votes.jsonl",
21
+ "elo_path": "elo_ratings.json",
22
+ },
23
+ }
24
+
25
+
26
+ def load_app_config(config_path: str = None) -> Dict:
27
+ """์•ฑ ์„ค์ • ๋กœ๋“œ"""
28
+ config = DEFAULT_CONFIG.copy()
29
+
30
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ ์„ค์ • ์˜ค๋ฒ„๋ผ์ด๋“œ
31
+ if os.environ.get("USE_MOCK", "").lower() == "true":
32
+ config["model"]["use_mock"] = True
33
+
34
+ if os.environ.get("USE_4BIT", "").lower() == "false":
35
+ config["model"]["use_4bit"] = False
36
+
37
+ # ์„ค์ • ํŒŒ์ผ์ด ์žˆ์œผ๋ฉด ๋กœ๋“œ
38
+ if config_path:
39
+ config_file = Path(config_path)
40
+ if config_file.exists():
41
+ with open(config_file, "r", encoding="utf-8") as f:
42
+ file_config = yaml.safe_load(f)
43
+ if file_config:
44
+ _deep_update(config, file_config)
45
+
46
+ return config
47
+
48
+
49
+ def _deep_update(base: dict, update: dict):
50
+ """๋”ฅ ์—…๋ฐ์ดํŠธ"""
51
+ for key, value in update.items():
52
+ if key in base and isinstance(base[key], dict) and isinstance(value, dict):
53
+ _deep_update(base[key], value)
54
+ else:
55
+ base[key] = value
56
+
57
+
58
+ # ์‹ฑ๊ธ€ํ†ค
59
+ _config: Optional[Dict] = None
60
+
61
+
62
+ def get_config() -> Dict:
63
+ """์„ค์ • ์‹ฑ๊ธ€ํ†ค"""
64
+ global _config
65
+ if _config is None:
66
+ _config = load_app_config()
67
+ return _config
models/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .model_registry import MODEL_REGISTRY, get_all_models, get_model_info, get_models_by_category
2
+ from .model_manager import ModelManager
models/backends/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Model inference backends
models/model_manager.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """๋ชจ๋ธ ๋กœ๋”ฉ ๋ฐ ์ถ”๋ก  ๊ด€๋ฆฌ"""
2
+
3
+ import os
4
+ import gc
5
+ import torch
6
+ from typing import Dict, List, Tuple, Optional, Any
7
+ from functools import lru_cache
8
+ from pathlib import Path
9
+
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
11
+ from peft import PeftModel
12
+
13
+ from .model_registry import get_model_info, get_all_models, BASE_MODELS
14
+
15
+
16
+ class ModelManager:
17
+ """๋ชจ๋ธ ๋กœ๋”ฉ ๋ฐ ์ถ”๋ก  ๊ด€๋ฆฌ์ž"""
18
+
19
+ def __init__(
20
+ self,
21
+ base_path: str = None,
22
+ max_cached_models: int = 2,
23
+ use_4bit: bool = True,
24
+ device_map: str = "auto",
25
+ ):
26
+ self.base_path = Path(base_path) if base_path else Path(__file__).parent.parent.parent
27
+ self.max_cached_models = max_cached_models
28
+ self.use_4bit = use_4bit
29
+ self.device_map = device_map
30
+
31
+ # ๋กœ๋“œ๋œ ๋ชจ๋ธ ์บ์‹œ: {model_id: (model, tokenizer)}
32
+ self._loaded_models: Dict[str, Tuple[Any, Any]] = {}
33
+ self._load_order: List[str] = [] # LRU ์ถ”์ 
34
+
35
+ # ์–‘์žํ™” ์„ค์ •
36
+ self.bnb_config = BitsAndBytesConfig(
37
+ load_in_4bit=True,
38
+ bnb_4bit_compute_dtype=torch.bfloat16,
39
+ bnb_4bit_use_double_quant=True,
40
+ bnb_4bit_quant_type="nf4",
41
+ ) if use_4bit else None
42
+
43
+ def get_available_models(self) -> List[str]:
44
+ """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก"""
45
+ return get_all_models()
46
+
47
+ def _get_full_path(self, relative_path: str) -> Path:
48
+ """์ƒ๋Œ€ ๊ฒฝ๋กœ๋ฅผ ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋กœ ๋ณ€ํ™˜"""
49
+ full_path = self.base_path / relative_path
50
+ if full_path.exists():
51
+ return full_path
52
+ return Path(relative_path)
53
+
54
+ def _evict_if_needed(self):
55
+ """์บ์‹œ๊ฐ€ ๊ฐ€๋“ ์ฐจ๋ฉด ๊ฐ€์žฅ ์˜ค๋ž˜๋œ ๋ชจ๋ธ ์ œ๊ฑฐ"""
56
+ while len(self._loaded_models) >= self.max_cached_models:
57
+ if not self._load_order:
58
+ break
59
+ oldest_model_id = self._load_order.pop(0)
60
+ if oldest_model_id in self._loaded_models:
61
+ model, tokenizer = self._loaded_models.pop(oldest_model_id)
62
+ del model
63
+ del tokenizer
64
+ gc.collect()
65
+ if torch.cuda.is_available():
66
+ torch.cuda.empty_cache()
67
+ print(f"Evicted model: {oldest_model_id}")
68
+
69
+ def load_model(self, model_id: str) -> Tuple[Any, Any]:
70
+ """๋ชจ๋ธ ๋กœ๋“œ (์บ์‹œ ํ™•์ธ)"""
71
+ # ์ด๋ฏธ ๋กœ๋“œ๋จ
72
+ if model_id in self._loaded_models:
73
+ # LRU ์—…๋ฐ์ดํŠธ
74
+ if model_id in self._load_order:
75
+ self._load_order.remove(model_id)
76
+ self._load_order.append(model_id)
77
+ return self._loaded_models[model_id]
78
+
79
+ # ๋ชจ๋ธ ์ •๋ณด ์กฐํšŒ
80
+ info = get_model_info(model_id)
81
+ if not info:
82
+ raise ValueError(f"Unknown model: {model_id}")
83
+
84
+ # ์บ์‹œ ์ •๋ฆฌ
85
+ self._evict_if_needed()
86
+
87
+ # ๋ชจ๋ธ ๋กœ๋“œ
88
+ print(f"Loading model: {model_id}")
89
+ base_model_name = info["base"]
90
+ lora_path = self._get_full_path(info["path"])
91
+
92
+ # Tokenizer ๋กœ๋“œ
93
+ tokenizer = AutoTokenizer.from_pretrained(
94
+ base_model_name,
95
+ trust_remote_code=True,
96
+ )
97
+ if tokenizer.pad_token is None:
98
+ tokenizer.pad_token = tokenizer.eos_token
99
+
100
+ # Base ๋ชจ๋ธ ๋กœ๋“œ
101
+ model_kwargs = {
102
+ "trust_remote_code": True,
103
+ "device_map": self.device_map,
104
+ }
105
+ if self.use_4bit and self.bnb_config:
106
+ model_kwargs["quantization_config"] = self.bnb_config
107
+ else:
108
+ model_kwargs["torch_dtype"] = torch.bfloat16
109
+
110
+ model = AutoModelForCausalLM.from_pretrained(
111
+ base_model_name,
112
+ **model_kwargs
113
+ )
114
+
115
+ # LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ
116
+ if lora_path.exists():
117
+ print(f"Loading LoRA adapter from: {lora_path}")
118
+ model = PeftModel.from_pretrained(model, str(lora_path))
119
+ else:
120
+ print(f"Warning: LoRA path not found: {lora_path}, using base model")
121
+
122
+ model.eval()
123
+
124
+ # ์บ์‹œ์— ์ €์žฅ
125
+ self._loaded_models[model_id] = (model, tokenizer)
126
+ self._load_order.append(model_id)
127
+
128
+ print(f"Model loaded: {model_id}")
129
+ return model, tokenizer
130
+
131
+ def generate_response(
132
+ self,
133
+ model_id: str,
134
+ messages: List[Dict[str, str]],
135
+ system_prompt: str = "",
136
+ max_new_tokens: int = 512,
137
+ temperature: float = 0.7,
138
+ top_p: float = 0.9,
139
+ do_sample: bool = True,
140
+ ) -> Tuple[str, Dict]:
141
+ """์‘๋‹ต ์ƒ์„ฑ"""
142
+ import time
143
+
144
+ model, tokenizer = self.load_model(model_id)
145
+
146
+ # ๋ฉ”์‹œ์ง€ ๊ตฌ์„ฑ
147
+ full_messages = []
148
+ if system_prompt:
149
+ full_messages.append({"role": "system", "content": system_prompt})
150
+ full_messages.extend(messages)
151
+
152
+ # ํ† ํฌ๋‚˜์ด์ง•
153
+ try:
154
+ text = tokenizer.apply_chat_template(
155
+ full_messages,
156
+ tokenize=False,
157
+ add_generation_prompt=True,
158
+ )
159
+ except Exception:
160
+ # apply_chat_template ์‹คํŒจ ์‹œ ์ˆ˜๋™ ํฌ๋งทํŒ…
161
+ text = self._format_messages_manual(full_messages)
162
+
163
+ inputs = tokenizer(text, return_tensors="pt")
164
+ if torch.cuda.is_available():
165
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
166
+
167
+ # ์ƒ์„ฑ
168
+ start_time = time.time()
169
+ with torch.no_grad():
170
+ outputs = model.generate(
171
+ **inputs,
172
+ max_new_tokens=max_new_tokens,
173
+ temperature=temperature,
174
+ top_p=top_p,
175
+ do_sample=do_sample,
176
+ pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
177
+ )
178
+ elapsed = time.time() - start_time
179
+
180
+ # ๋””์ฝ”๋”ฉ (์ž…๋ ฅ ์ œ์™ธ)
181
+ input_len = inputs["input_ids"].shape[1]
182
+ response = tokenizer.decode(
183
+ outputs[0][input_len:],
184
+ skip_special_tokens=True,
185
+ )
186
+
187
+ # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
188
+ metadata = {
189
+ "model_id": model_id,
190
+ "latency_s": elapsed,
191
+ "input_tokens": input_len,
192
+ "output_tokens": len(outputs[0]) - input_len,
193
+ "total_tokens": len(outputs[0]),
194
+ }
195
+
196
+ return response.strip(), metadata
197
+
198
+ def _format_messages_manual(self, messages: List[Dict[str, str]]) -> str:
199
+ """์ˆ˜๋™ ๋ฉ”์‹œ์ง€ ํฌ๋งทํŒ… (apply_chat_template ์‹คํŒจ ์‹œ)"""
200
+ formatted = ""
201
+ for msg in messages:
202
+ role = msg["role"]
203
+ content = msg["content"]
204
+ if role == "system":
205
+ formatted += f"<|im_start|>system\n{content}<|im_end|>\n"
206
+ elif role == "user":
207
+ formatted += f"<|im_start|>user\n{content}<|im_end|>\n"
208
+ elif role == "assistant":
209
+ formatted += f"<|im_start|>assistant\n{content}<|im_end|>\n"
210
+ formatted += "<|im_start|>assistant\n"
211
+ return formatted
212
+
213
+ def unload_model(self, model_id: str):
214
+ """ํŠน์ • ๋ชจ๋ธ ์–ธ๋กœ๋“œ"""
215
+ if model_id in self._loaded_models:
216
+ model, tokenizer = self._loaded_models.pop(model_id)
217
+ if model_id in self._load_order:
218
+ self._load_order.remove(model_id)
219
+ del model
220
+ del tokenizer
221
+ gc.collect()
222
+ if torch.cuda.is_available():
223
+ torch.cuda.empty_cache()
224
+ print(f"Unloaded model: {model_id}")
225
+
226
+ def unload_all(self):
227
+ """๋ชจ๋“  ๋ชจ๋ธ ์–ธ๋กœ๋“œ"""
228
+ model_ids = list(self._loaded_models.keys())
229
+ for model_id in model_ids:
230
+ self.unload_model(model_id)
231
+
232
+ def get_loaded_models(self) -> List[str]:
233
+ """ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ชจ๋ธ ๋ชฉ๋ก"""
234
+ return list(self._loaded_models.keys())
235
+
236
+
237
+ # ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค
238
+ _model_manager: Optional[ModelManager] = None
239
+
240
+
241
+ def get_model_manager(
242
+ base_path: str = None,
243
+ max_cached_models: int = 2,
244
+ use_4bit: bool = True,
245
+ ) -> ModelManager:
246
+ """ModelManager ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค ๋ฐ˜ํ™˜"""
247
+ global _model_manager
248
+ if _model_manager is None:
249
+ _model_manager = ModelManager(
250
+ base_path=base_path,
251
+ max_cached_models=max_cached_models,
252
+ use_4bit=use_4bit,
253
+ )
254
+ return _model_manager
models/model_registry.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """KAIdol ํ•™์Šต ๋ชจ๋ธ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ - ๋ชจ๋“  ํ•™์Šต๋œ ๋ชจ๋ธ ์ •์˜"""
2
+
3
+ from typing import Dict, List, Optional
4
+
5
+ # ๊ธฐ๋ณธ ๋ชจ๋ธ ์ •๋ณด (HuggingFace Hub)
6
+ BASE_MODELS = {
7
+ "hyperclovax-32b": "naver-hyperclovax/HyperCLOVAX-SEED-Think-32B",
8
+ "qwen2.5-72b": "Qwen/Qwen2.5-72B-Instruct",
9
+ "qwen2.5-32b": "Qwen/Qwen2.5-32B-Instruct",
10
+ "qwen2.5-14b": "Qwen/Qwen2.5-14B-Instruct",
11
+ "qwen2.5-7b": "Qwen/Qwen2.5-7B-Instruct",
12
+ "qwen3-8b": "Qwen/Qwen3-8B",
13
+ "exaone-7.8b": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct",
14
+ "solar-10.7b": "upstage/SOLAR-10.7B-Instruct-v1.0",
15
+ "solar-pro": "upstage/solar-pro-preview-instruct",
16
+ "varco-8b": "NCSOFT/Llama-VARCO-8B-Instruct",
17
+ "kanana-2-30b-thinking": "kakaocorp/kanana-2-30b-a3b-thinking",
18
+ "kanana-2-30b-instruct": "kakaocorp/kanana-2-30b-a3b-instruct",
19
+ "llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct",
20
+ }
21
+
22
+ # ์ „์ฒด ๋ชจ๋ธ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ
23
+ MODEL_REGISTRY: Dict[str, Dict[str, Dict]] = {
24
+ # ============================================================
25
+ # DPO v5 ๊ณ„์—ด (2026-01-13)
26
+ # ============================================================
27
+ "dpo-v5": {
28
+ "hyperclovax-32b-dpo-v5": {
29
+ "path": "outputs/dpo_v5/hyperclovax-32b-dpo-v5-20260113-0012",
30
+ "base": BASE_MODELS["hyperclovax-32b"],
31
+ "method": "DPO",
32
+ "size": "32B",
33
+ "description": "HyperCLOVAX 32B DPO v5 (Primary)",
34
+ "recommended": True,
35
+ },
36
+ "qwen2.5-14b-dpo-v5": {
37
+ "path": "outputs/dpo_v5/qwen2.5-14b-dpo-v5-20260113-0045",
38
+ "base": BASE_MODELS["qwen2.5-14b"],
39
+ "method": "DPO",
40
+ "size": "14B",
41
+ "description": "Qwen2.5 14B DPO v5",
42
+ },
43
+ "qwen2.5-7b-dpo-v5": {
44
+ "path": "outputs/dpo_v5/qwen2.5-7b-dpo-v5-20260113-0052",
45
+ "base": BASE_MODELS["qwen2.5-7b"],
46
+ "method": "DPO",
47
+ "size": "7B",
48
+ "description": "Qwen2.5 7B DPO v5",
49
+ },
50
+ "exaone-7.8b-dpo-v5": {
51
+ "path": "outputs/dpo_v5/exaone-7.8b-dpo-v5-20260113-0052",
52
+ "base": BASE_MODELS["exaone-7.8b"],
53
+ "method": "DPO",
54
+ "size": "7.8B",
55
+ "description": "EXAONE 7.8B DPO v5",
56
+ },
57
+ "qwen3-8b-dpo-v5": {
58
+ "path": "outputs/dpo_v5/qwen3-8b-dpo-v5-20260113-0052",
59
+ "base": BASE_MODELS["qwen3-8b"],
60
+ "method": "DPO",
61
+ "size": "8B",
62
+ "description": "Qwen3 8B DPO v5",
63
+ },
64
+ "solar-10.7b-dpo-v5": {
65
+ "path": "outputs/dpo_v5/solar-10.7b-dpo-v5-20260113-0045",
66
+ "base": BASE_MODELS["solar-10.7b"],
67
+ "method": "DPO",
68
+ "size": "10.7B",
69
+ "description": "Solar 10.7B DPO v5",
70
+ },
71
+ },
72
+
73
+ # ============================================================
74
+ # SFT Thinking ๊ณ„์—ด (2026-01-16)
75
+ # ============================================================
76
+ "sft-thinking": {
77
+ "qwen2.5-14b-thinking": {
78
+ "path": "outputs/qwen2.5-14b-thinking-full",
79
+ "base": BASE_MODELS["qwen2.5-14b"],
80
+ "method": "SFT",
81
+ "size": "14B",
82
+ "description": "Qwen2.5 14B SFT Thinking",
83
+ },
84
+ "qwen2.5-7b-thinking": {
85
+ "path": "outputs/qwen2.5-7b-thinking-full",
86
+ "base": BASE_MODELS["qwen2.5-7b"],
87
+ "method": "SFT",
88
+ "size": "7B",
89
+ "description": "Qwen2.5 7B SFT Thinking",
90
+ },
91
+ "exaone-7.8b-thinking": {
92
+ "path": "outputs/exaone-7.8b-thinking-full",
93
+ "base": BASE_MODELS["exaone-7.8b"],
94
+ "method": "SFT",
95
+ "size": "7.8B",
96
+ "description": "EXAONE 7.8B SFT Thinking",
97
+ },
98
+ },
99
+
100
+ # ============================================================
101
+ # Phase 7 Students (Kimi K2 Distillation)
102
+ # ============================================================
103
+ "phase7-students": {
104
+ "kanana-30b-thinking-kimi": {
105
+ "path": "outputs/phase7_students/kanana-2-30b-thinking-kimi-student",
106
+ "base": BASE_MODELS["kanana-2-30b-thinking"],
107
+ "method": "Distillation",
108
+ "size": "30B (3B active)",
109
+ "description": "Kanana 30B Thinking Kimi Student",
110
+ },
111
+ "kanana-30b-instruct-kimi": {
112
+ "path": "outputs/phase7_students/kanana-2-30b-instruct-kimi-student",
113
+ "base": BASE_MODELS["kanana-2-30b-instruct"],
114
+ "method": "Distillation",
115
+ "size": "30B (3B active)",
116
+ "description": "Kanana 30B Instruct Kimi Student",
117
+ },
118
+ "qwen2.5-14b-kimi": {
119
+ "path": "outputs/phase7_students/qwen2.5-14b-kimi-student",
120
+ "base": BASE_MODELS["qwen2.5-14b"],
121
+ "method": "Distillation",
122
+ "size": "14B",
123
+ "description": "Qwen2.5 14B Kimi Student",
124
+ },
125
+ "qwen2.5-7b-kimi-v3": {
126
+ "path": "outputs/phase7_students/qwen2.5-7b-kimi-student-v3",
127
+ "base": BASE_MODELS["qwen2.5-7b"],
128
+ "method": "Distillation",
129
+ "size": "7B",
130
+ "description": "Qwen2.5 7B Kimi Student v3",
131
+ },
132
+ "exaone-7.8b-kimi": {
133
+ "path": "outputs/phase7_students/exaone-7.8b-kimi-student",
134
+ "base": BASE_MODELS["exaone-7.8b"],
135
+ "method": "Distillation",
136
+ "size": "7.8B",
137
+ "description": "EXAONE 7.8B Kimi Student",
138
+ },
139
+ },
140
+
141
+ # ============================================================
142
+ # V7 Students (Latest - 2026-01-17~19)
143
+ # ============================================================
144
+ "v7-students": {
145
+ "qwen2.5-72b-v7": {
146
+ "path": "outputs/v7_students/qwen2.5-72b-v7-20260119-1113",
147
+ "base": BASE_MODELS["qwen2.5-72b"],
148
+ "method": "SFT",
149
+ "size": "72B",
150
+ "description": "Qwen2.5 72B V7 (Latest)",
151
+ },
152
+ "llama-3.3-70b-v7": {
153
+ "path": "outputs/v7_students/llama-3.3-70b-v7-20260119-1114",
154
+ "base": BASE_MODELS["llama-3.3-70b"],
155
+ "method": "SFT",
156
+ "size": "70B",
157
+ "description": "Llama 3.3 70B V7 (Latest)",
158
+ },
159
+ "qwen2.5-32b-v7": {
160
+ "path": "outputs/v7_students/qwen2.5-32b-v7-20260118-1135",
161
+ "base": BASE_MODELS["qwen2.5-32b"],
162
+ "method": "SFT",
163
+ "size": "32B",
164
+ "description": "Qwen2.5 32B V7",
165
+ },
166
+ "qwen2.5-14b-v7": {
167
+ "path": "outputs/v7_students/qwen2.5-14b-v7-20260118-1135",
168
+ "base": BASE_MODELS["qwen2.5-14b"],
169
+ "method": "SFT",
170
+ "size": "14B",
171
+ "description": "Qwen2.5 14B V7",
172
+ },
173
+ "qwen2.5-7b-v7": {
174
+ "path": "outputs/v7_students/qwen2.5-7b-v7-20260118-1135",
175
+ "base": BASE_MODELS["qwen2.5-7b"],
176
+ "method": "SFT",
177
+ "size": "7B",
178
+ "description": "Qwen2.5 7B V7",
179
+ },
180
+ "exaone-7.8b-v7": {
181
+ "path": "outputs/v7_students/exaone-7.8b-v7-20260118-1135",
182
+ "base": BASE_MODELS["exaone-7.8b"],
183
+ "method": "SFT",
184
+ "size": "7.8B",
185
+ "description": "EXAONE 7.8B V7",
186
+ },
187
+ "qwen3-8b-v7": {
188
+ "path": "outputs/v7_students/qwen3-8b-v7-20260118-1135",
189
+ "base": BASE_MODELS["qwen3-8b"],
190
+ "method": "SFT",
191
+ "size": "8B",
192
+ "description": "Qwen3 8B V7",
193
+ },
194
+ "solar-pro-v7": {
195
+ "path": "outputs/v7_students/solar-pro-v7-20260118-1135",
196
+ "base": BASE_MODELS["solar-pro"],
197
+ "method": "SFT",
198
+ "size": "22B",
199
+ "description": "Solar Pro V7",
200
+ },
201
+ "varco-8b-v7": {
202
+ "path": "outputs/v7_students/varco-8b-v7-20260118-1135",
203
+ "base": BASE_MODELS["varco-8b"],
204
+ "method": "SFT",
205
+ "size": "8B",
206
+ "description": "VARCO 8B V7",
207
+ },
208
+ },
209
+
210
+ # ============================================================
211
+ # ๊ธฐํƒ€ ํ•™์Šต ๋ชจ๋ธ (DPO, etc.)
212
+ # ============================================================
213
+ "others": {
214
+ "exaone-7.8b-dpo": {
215
+ "path": "outputs/exaone-7.8b-dpo",
216
+ "base": BASE_MODELS["exaone-7.8b"],
217
+ "method": "DPO",
218
+ "size": "7.8B",
219
+ "description": "EXAONE 7.8B DPO (Standalone)",
220
+ },
221
+ "qwen2.5-7b-dpo": {
222
+ "path": "outputs/qwen2.5-7b-dpo",
223
+ "base": BASE_MODELS["qwen2.5-7b"],
224
+ "method": "DPO",
225
+ "size": "7B",
226
+ "description": "Qwen2.5 7B DPO (Standalone)",
227
+ },
228
+ },
229
+ }
230
+
231
+
232
+ def get_all_models() -> List[str]:
233
+ """๋ชจ๋“  ๋ชจ๋ธ ID ๋ชฉ๋ก ๋ฐ˜ํ™˜"""
234
+ models = []
235
+ for category, model_dict in MODEL_REGISTRY.items():
236
+ models.extend(model_dict.keys())
237
+ return models
238
+
239
+
240
+ def get_model_info(model_id: str) -> Optional[Dict]:
241
+ """๋ชจ๋ธ ID๋กœ ์ •๋ณด ์กฐํšŒ"""
242
+ for category, model_dict in MODEL_REGISTRY.items():
243
+ if model_id in model_dict:
244
+ info = model_dict[model_id].copy()
245
+ info["category"] = category
246
+ info["id"] = model_id
247
+ return info
248
+ return None
249
+
250
+
251
+ def get_models_by_category(category: str) -> List[str]:
252
+ """์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ๋ชจ๋ธ ๋ชฉ๋ก"""
253
+ return list(MODEL_REGISTRY.get(category, {}).keys())
254
+
255
+
256
+ def get_all_categories() -> List[str]:
257
+ """๋ชจ๋“  ์นดํ…Œ๊ณ ๋ฆฌ ๋ชฉ๋ก"""
258
+ return list(MODEL_REGISTRY.keys())
259
+
260
+
261
+ def get_models_for_dropdown() -> List[tuple]:
262
+ """๋“œ๋กญ๋‹ค์šด์šฉ (display_name, model_id) ํŠœํ”Œ ๋ฆฌ์ŠคํŠธ"""
263
+ result = []
264
+ for category, model_dict in MODEL_REGISTRY.items():
265
+ for model_id, info in model_dict.items():
266
+ display = f"[{info.get('size', '?')}] {info.get('description', model_id)}"
267
+ result.append((display, model_id))
268
+ return result
269
+
270
+
271
+ def get_small_models(max_size_gb: int = 16) -> List[str]:
272
+ """๋ฉ”๋ชจ๋ฆฌ ์ œํ•œ์— ๋งž๋Š” ์†Œํ˜• ๋ชจ๋ธ๋งŒ ๋ฐ˜ํ™˜ (4bit ์–‘์žํ™” ๊ธฐ์ค€)"""
273
+ # 4bit ์–‘์žํ™” ์‹œ ๋Œ€๋žต์ ์ธ ๋ฉ”๋ชจ๋ฆฌ: 7B~2GB, 14B~4GB, 32B~8GB, 72B~18GB
274
+ size_map = {
275
+ "7B": 2, "7.8B": 2, "8B": 2,
276
+ "10.7B": 3, "14B": 4, "22B": 6,
277
+ "30B (3B active)": 1, # MoE
278
+ "32B": 8, "70B": 18, "72B": 18,
279
+ }
280
+
281
+ result = []
282
+ for model_id in get_all_models():
283
+ info = get_model_info(model_id)
284
+ if info:
285
+ size_str = info.get("size", "72B")
286
+ estimated_gb = size_map.get(size_str, 20)
287
+ if estimated_gb <= max_size_gb:
288
+ result.append(model_id)
289
+ return result
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio>=4.44.0
2
+ pyyaml>=6.0
3
+ torch>=2.1.0
4
+ transformers>=4.36.0
5
+ accelerate>=0.25.0
6
+ bitsandbytes>=0.41.0
7
+ huggingface_hub>=0.19.0
8
+ peft>=0.7.0
scenarios/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .scenario_loader import ScenarioLoader, get_scenario_loader
scenarios/scenario_loader.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """์‹œ๋‚˜๋ฆฌ์˜ค ๋กœ๋”"""
2
+
3
+ import random
4
+ import yaml
5
+ from pathlib import Path
6
+ from typing import Dict, List, Optional
7
+
8
+
9
+ # ๋‚ด์žฅ ์‹œ๋‚˜๋ฆฌ์˜ค ๋ฐ์ดํ„ฐ (configs/scenarios.yaml ๊ธฐ๋ฐ˜)
10
+ BUILTIN_SCENARIOS = [
11
+ # ์ฒซ ๋งŒ๋‚จ
12
+ {
13
+ "id": "fm_01",
14
+ "category": "first_meeting",
15
+ "category_name": "์ฒซ ๋งŒ๋‚จ",
16
+ "context": "ํŒฌ๋ฏธํŒ…",
17
+ "situation": "ํŒฌ์ด ์ฒ˜์Œ ๋งŒ๋‚˜์„œ ์ธ์‚ฌํ•จ",
18
+ "user_input": "{char}์•„! ๋“œ๋””์–ด ๋งŒ๋‚ฌ๋‹ค... ์ •๋ง ์ข‹์•„ํ•ด!",
19
+ },
20
+ {
21
+ "id": "fm_02",
22
+ "category": "first_meeting",
23
+ "category_name": "์ฒซ ๋งŒ๋‚จ",
24
+ "context": "ํŒฌ๋ฏธํŒ…",
25
+ "situation": "ํŒฌ์ด ๊ธด์žฅํ•ด์„œ ๋ง์„ ๋ชปํ•จ",
26
+ "user_input": "์–ด... ์•ˆ๋…•ํ•˜์„ธ์š”... ๋„ˆ๋ฌด ๊ธด์žฅ๋ผ์„œ...",
27
+ },
28
+ # ์ผ์ƒ ๋Œ€ํ™”
29
+ {
30
+ "id": "dc_01",
31
+ "category": "daily_chat",
32
+ "category_name": "์ผ์ƒ ๋Œ€ํ™”",
33
+ "context": "๋ฒ„๋ธ” ๋Œ€ํ™”",
34
+ "situation": "ํŒฌ์ด ์ผ์ƒ ์•ˆ๋ถ€๋ฅผ ๋ฌผ์Œ",
35
+ "user_input": "{char}์•„ ์˜ค๋Š˜ ๋ญํ•ด? ๋ฐฅ์€ ๋จน์—ˆ์–ด?",
36
+ },
37
+ {
38
+ "id": "dc_02",
39
+ "category": "daily_chat",
40
+ "category_name": "์ผ์ƒ ๋Œ€ํ™”",
41
+ "context": "SNS",
42
+ "situation": "ํŒฌ์ด ์‚ฌ์ง„์— ๋ฐ˜์‘ํ•จ",
43
+ "user_input": "๋ฐฉ๊ธˆ ์˜ฌ๋ฆฐ ์…€์นด ์ง„์งœ ์ž˜ ๋‚˜์™”๋‹ค... ์‹ฌ์ฟตํ–ˆ์–ด",
44
+ },
45
+ # ๊ฐ์ • ์ง€์›
46
+ {
47
+ "id": "es_01",
48
+ "category": "emotional_support",
49
+ "category_name": "๊ฐ์ • ์ง€์›",
50
+ "context": "์œ„๋กœ",
51
+ "situation": "ํŒฌ์ด ํž˜๋“ค์–ด์„œ ์œ„๋กœ๋ฅผ ๊ตฌํ•จ",
52
+ "user_input": "์˜ค๋Š˜ ์ง„์งœ ํž˜๋“ค์—ˆ์–ด... ํ•™๊ต์—์„œ ๋ฐœํ‘œ๋„ ๋ง์น˜๊ณ ...",
53
+ },
54
+ {
55
+ "id": "es_02",
56
+ "category": "emotional_support",
57
+ "category_name": "๊ฐ์ • ์ง€์›",
58
+ "context": "์šฐ์šธํ•จ",
59
+ "situation": "ํŒฌ์ด ์šฐ์šธํ•จ์„ ํ† ๋กœํ•จ",
60
+ "user_input": "์š”์ฆ˜ ์™œ ์ด๋ ‡๊ฒŒ ์šฐ์šธํ•˜์ง€... ์•„๋ฌด๊ฒƒ๋„ ํ•˜๊ธฐ ์‹ซ์–ด.",
61
+ },
62
+ # ๊ณ ๋ฐฑ/ํ˜ธ๊ฐ
63
+ {
64
+ "id": "cf_01",
65
+ "category": "confession",
66
+ "category_name": "๊ณ ๋ฐฑ/ํ˜ธ๊ฐ",
67
+ "context": "๊ณ ๋ฐฑ",
68
+ "situation": "ํŒฌ์ด ์ง„์‹ฌ์œผ๋กœ ์ข‹์•„ํ•œ๋‹ค๊ณ  ํ•จ",
69
+ "user_input": "{char}์•„... ๋‚˜ ์ง„์‹ฌ์œผ๋กœ ์ข‹์•„ํ•ด.",
70
+ },
71
+ {
72
+ "id": "cf_02",
73
+ "category": "confession",
74
+ "category_name": "๊ณ ๋ฐฑ/ํ˜ธ๊ฐ",
75
+ "context": "์งˆ๋ฌธ",
76
+ "situation": "ํŒฌ์ด ์ž์‹ ์„ ์–ด๋–ป๊ฒŒ ์ƒ๊ฐํ•˜๋Š”์ง€ ๋ฌผ์Œ",
77
+ "user_input": "{char}์•„, ๋‚˜ ์–ด๋–ป๊ฒŒ ์ƒ๊ฐํ•ด...?",
78
+ },
79
+ # ์žฅ๋‚œ/์œ ๋จธ
80
+ {
81
+ "id": "pl_01",
82
+ "category": "playful",
83
+ "category_name": "์žฅ๋‚œ/์œ ๋จธ",
84
+ "context": "์žฅ๋‚œ",
85
+ "situation": "ํŒฌ์ด ์žฅ๋‚œ์œผ๋กœ ๋‹ค๋ฅธ ๋ฉค๋ฒ„๋ฅผ ์ข‹์•„ํ•œ๋‹ค๊ณ  ํ•จ",
86
+ "user_input": "์‚ฌ์‹ค ๋‚˜ ๋‹ค๋ฅธ ๋ฉค๋ฒ„๊ฐ€ ๋” ์ข‹์•„~ ใ…‹ใ…‹ ๋†๋‹ด์ด์•ผ!",
87
+ },
88
+ # ํŠน๋ณ„ ์š”์ฒญ
89
+ {
90
+ "id": "sr_01",
91
+ "category": "special_request",
92
+ "category_name": "ํŠน๋ณ„ ์š”์ฒญ",
93
+ "context": "์—ฐ์ธ ์—ฐ๊ธฐ",
94
+ "situation": "ํŒฌ์ด ์—ฐ์ธ์ฒ˜๋Ÿผ ๋Œ€ํ•ด๋‹ฌ๋ผ๊ณ  ์š”์ฒญ",
95
+ "user_input": "์˜ค๋Š˜๋งŒ ๋‚ด ์—ฐ์ธ์ด๋ผ๊ณ  ์ƒ๊ฐํ•ด์ค„๋ž˜?",
96
+ },
97
+ # ๋ฌธํ™”์  ์ƒํ™ฉ
98
+ {
99
+ "id": "cu_01",
100
+ "category": "cultural",
101
+ "category_name": "๋ฌธํ™”์  ์ƒํ™ฉ",
102
+ "context": "์ƒ์ผ",
103
+ "situation": "ํŒฌ์ด ์ƒ์ผ ์ถ•ํ•˜๋ฅผ ํ•จ",
104
+ "user_input": "{char}์•„ ์ƒ์ผ ์ถ•ํ•˜ํ•ด! ์˜ค๋Š˜ ํ•˜๋ฃจ ํ–‰๋ณตํ•˜๊ฒŒ ๋ณด๋‚ด~",
105
+ },
106
+ {
107
+ "id": "cu_05",
108
+ "category": "cultural",
109
+ "category_name": "๋ฌธํ™”์  ์ƒํ™ฉ",
110
+ "context": "ํฌ๋ฆฌ์Šค๋งˆ์Šค",
111
+ "situation": "ํฌ๋ฆฌ์Šค๋งˆ์Šค ์ถ•ํ•˜",
112
+ "user_input": "๋ฉ”๋ฆฌ ํฌ๋ฆฌ์Šค๋งˆ์Šค! {char}๋ž‘ ํ•จ๊ป˜๋ผ์„œ ์™ธ๋กญ์ง€ ์•Š์•„~",
113
+ },
114
+ # ๊ฐˆ๋“ฑ ์ƒํ™ฉ
115
+ {
116
+ "id": "cn_01",
117
+ "category": "conflict",
118
+ "category_name": "๊ฐˆ๋“ฑ ์ƒํ™ฉ",
119
+ "context": "์งˆํˆฌ",
120
+ "situation": "ํŒฌ์ด ๋‹ค๋ฅธ ํŒฌ์—๊ฒŒ ์งˆํˆฌ๋ฅผ ํ‘œํ˜„",
121
+ "user_input": "{char}๋Š” ๋‹ค๋ฅธ ํŒฌ๋“คํ•œํ…Œ๋„ ์ด๋ ‡๊ฒŒ ์ž˜ํ•ด์ค˜...? ๋ญ”๊ฐ€ ์งˆํˆฌ๋‚˜...",
122
+ },
123
+ {
124
+ "id": "cn_05",
125
+ "category": "conflict",
126
+ "category_name": "๊ฐˆ๋“ฑ ์ƒํ™ฉ",
127
+ "context": "๋ถˆ์•ˆ",
128
+ "situation": "ํŒฌ์ด ๊ด€๊ณ„์— ๋ถˆ์•ˆ๊ฐ์„ ๋А๋‚Œ",
129
+ "user_input": "{char}๋Š” ์ •๋ง ๋‚˜ํ•œํ…Œ ๊ด€์‹ฌ ์žˆ๋Š” ๊ฑฐ์•ผ...? ๊ฐ€๋” ํ™•์‹ ์ด ์•ˆ ๋“ค์–ด.",
130
+ },
131
+ # ๊ฐ์ • ์œ„๊ธฐ
132
+ {
133
+ "id": "ec_01",
134
+ "category": "emotional_crisis",
135
+ "category_name": "๊ฐ์ • ์œ„๊ธฐ",
136
+ "context": "์Šฌํ””",
137
+ "situation": "ํŒฌ์ด ๊นŠ์€ ์Šฌํ””์„ ํ‘œํ˜„",
138
+ "user_input": "์˜ค๋Š˜ ์ง„์งœ ๋งŽ์ด ์šธ์—ˆ์–ด... ์‚ถ์ด ๋„ˆ๋ฌด ํž˜๋“ค๋‹ค.",
139
+ },
140
+ {
141
+ "id": "ec_05",
142
+ "category": "emotional_crisis",
143
+ "category_name": "๊ฐ์ • ์œ„๊ธฐ",
144
+ "context": "์ž์กด๊ฐ",
145
+ "situation": "ํŒฌ์ด ์ž์กด๊ฐ์ด ๋‚ฎ์•„์ ธ ์žˆ์Œ",
146
+ "user_input": "๋‚˜๋Š” ์™œ ์ด๋ ‡๊ฒŒ ๋ชป๋‚˜ ๋ณด์ผ๏ฟฝ๏ฟฝ๏ฟฝ... ์•„๋ฌด๊ฒƒ๋„ ์ž˜ํ•˜๋Š” ๊ฒŒ ์—†์–ด.",
147
+ },
148
+ # ์žฅ๊ธฐ ๊ด€๊ณ„
149
+ {
150
+ "id": "lt_01",
151
+ "category": "long_term",
152
+ "category_name": "์žฅ๊ธฐ ๊ด€๊ณ„",
153
+ "context": "๊ด€๊ณ„ ํšŒ์ƒ",
154
+ "situation": "ํŒฌ์ด ์ฒ˜์Œ ๋งŒ๋‚œ ๋‚ ์„ ํšŒ์ƒ",
155
+ "user_input": "์šฐ๋ฆฌ ์ฒ˜์Œ ๋งŒ๋‚œ ๋‚  ๊ธฐ์–ต๋‚˜? ๊ทธ๋•Œ ๋‚˜ ์ง„์งœ ๋–จ์—ˆ์—ˆ๋Š”๋ฐ...",
156
+ },
157
+ {
158
+ "id": "lt_03",
159
+ "category": "long_term",
160
+ "category_name": "์žฅ๊ธฐ ๊ด€๊ณ„",
161
+ "context": "๋ฏธ๋ž˜ ์•ฝ์†",
162
+ "situation": "ํŒฌ์ด ์•ž์œผ๋กœ๋„ ํ•จ๊ป˜ํ•˜๊ณ  ์‹ถ๋‹ค๊ณ  ํ•จ",
163
+ "user_input": "์•ž์œผ๋กœ๋„ ๊ณ„์† {char} ๊ณ์— ์žˆ์–ด๋„ ๋ผ?",
164
+ },
165
+ ]
166
+
167
+
168
+ class ScenarioLoader:
169
+ """์‹œ๋‚˜๋ฆฌ์˜ค ๋กœ๋”"""
170
+
171
+ def __init__(self, config_path: str = None):
172
+ self.config_path = Path(config_path) if config_path else None
173
+ self._scenarios: List[Dict] = []
174
+ self._load_scenarios()
175
+
176
+ def _load_scenarios(self):
177
+ """์‹œ๋‚˜๋ฆฌ์˜ค ๋ฐ์ดํ„ฐ ๋กœ๋“œ"""
178
+ if self.config_path and self.config_path.exists():
179
+ with open(self.config_path, "r", encoding="utf-8") as f:
180
+ data = yaml.safe_load(f)
181
+ self._scenarios = data.get("scenarios", [])
182
+ else:
183
+ self._scenarios = BUILTIN_SCENARIOS
184
+
185
+ def get_scenarios(self) -> List[Dict]:
186
+ """๋ชจ๋“  ์‹œ๋‚˜๋ฆฌ์˜ค"""
187
+ return self._scenarios
188
+
189
+ def get_scenario(self, scenario_id: str) -> Optional[Dict]:
190
+ """ID๋กœ ์‹œ๋‚˜๋ฆฌ์˜ค ์กฐํšŒ"""
191
+ for s in self._scenarios:
192
+ if s.get("id") == scenario_id:
193
+ return s
194
+ return None
195
+
196
+ def get_scenarios_by_category(self, category: str) -> List[Dict]:
197
+ """์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ์‹œ๋‚˜๋ฆฌ์˜ค"""
198
+ return [s for s in self._scenarios if s.get("category") == category]
199
+
200
+ def get_categories(self) -> List[str]:
201
+ """๋ชจ๋“  ์นดํ…Œ๊ณ ๋ฆฌ ๋ชฉ๋ก"""
202
+ return list(set(s.get("category") for s in self._scenarios))
203
+
204
+ def get_random_scenario(self, category: str = None) -> Optional[Dict]:
205
+ """๋žœ๋ค ์‹œ๋‚˜๋ฆฌ์˜ค ์„ ํƒ"""
206
+ if category:
207
+ pool = self.get_scenarios_by_category(category)
208
+ else:
209
+ pool = self._scenarios
210
+ return random.choice(pool) if pool else None
211
+
212
+ def format_user_input(self, scenario: Dict, character_name: str) -> str:
213
+ """์‹œ๋‚˜๋ฆฌ์˜ค์˜ user_input์—์„œ {char}๋ฅผ ์บ๋ฆญํ„ฐ ์ด๋ฆ„์œผ๋กœ ๋Œ€์ฒด"""
214
+ user_input = scenario.get("user_input", "")
215
+ return user_input.replace("{char}", character_name)
216
+
217
+ def get_scenario_display_name(self, scenario: Dict) -> str:
218
+ """์‹œ๋‚˜๋ฆฌ์˜ค ํ‘œ์‹œ๋ช…"""
219
+ category_name = scenario.get("category_name", scenario.get("category", ""))
220
+ context = scenario.get("context", "")
221
+ return f"[{category_name}] {context}"
222
+
223
+ def get_scenarios_for_dropdown(self) -> List[tuple]:
224
+ """๋“œ๋กญ๋‹ค์šด์šฉ (display_name, scenario_id) ํŠœํ”Œ ๋ฆฌ์ŠคํŠธ"""
225
+ return [
226
+ (self.get_scenario_display_name(s), s["id"])
227
+ for s in self._scenarios
228
+ ]
229
+
230
+
231
+ # ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค
232
+ _scenario_loader: Optional[ScenarioLoader] = None
233
+
234
+
235
+ def get_scenario_loader(config_path: str = None) -> ScenarioLoader:
236
+ """ScenarioLoader ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค"""
237
+ global _scenario_loader
238
+ if _scenario_loader is None:
239
+ _scenario_loader = ScenarioLoader(config_path)
240
+ return _scenario_loader
ui/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .arena_tab import create_arena_tab
2
+ from .chat_tab import create_chat_tab
3
+ from .leaderboard_tab import create_leaderboard_tab
4
+ from .history_tab import create_history_tab
ui/arena_tab.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """A/B Arena ํƒญ UI"""
2
+
3
+ import gradio as gr
4
+ import random
5
+ from typing import Dict, List, Tuple, Optional, Any
6
+
7
+ from models.model_registry import get_all_models, get_model_info, get_models_for_dropdown
8
+ from characters import get_character_loader, build_system_prompt
9
+ from scenarios import get_scenario_loader
10
+ from voting import get_vote_storage, get_elo_calculator
11
+ from utils import parse_thinking_response, format_thinking_for_display
12
+
13
+
14
+ def create_arena_tab(
15
+ model_manager: Any = None,
16
+ use_mock: bool = False,
17
+ ):
18
+ """A/B Arena ํƒญ ์ƒ์„ฑ"""
19
+
20
+ # ๋ฐ์ดํ„ฐ ๋กœ๋”
21
+ char_loader = get_character_loader()
22
+ scenario_loader = get_scenario_loader()
23
+ vote_storage = get_vote_storage()
24
+ elo_calculator = get_elo_calculator()
25
+
26
+ # ๋ชจ๋ธ ๋ชฉ๋ก
27
+ all_models = get_all_models()
28
+ model_choices = [(f"{get_model_info(m).get('description', m)}", m) for m in all_models]
29
+
30
+ # ์บ๋ฆญํ„ฐ ๋ชฉ๋ก
31
+ characters = char_loader.get_character_names()
32
+
33
+ # ์‹œ๋‚˜๋ฆฌ์˜ค ๋ชฉ๋ก
34
+ scenario_choices = scenario_loader.get_scenarios_for_dropdown()
35
+
36
+ # ============================================================
37
+ # UI ๊ตฌ์„ฑ
38
+ # ============================================================
39
+
40
+ gr.Markdown("## A/B ํ…Œ์ŠคํŠธ ์•„๋ ˆ๋‚˜")
41
+ gr.Markdown("๋‘ ๋ชจ๋ธ์˜ ์‘๋‹ต์„ ๋น„๊ตํ•˜๊ณ  ๋” ์ข‹์€ ์‘๋‹ต์— ํˆฌํ‘œํ•˜์„ธ์š”.")
42
+
43
+ # ์„ค์ • ํŒจ๋„
44
+ with gr.Row():
45
+ with gr.Column(scale=1):
46
+ character_dropdown = gr.Dropdown(
47
+ choices=characters,
48
+ value=characters[0] if characters else None,
49
+ label="์บ๋ฆญํ„ฐ ์„ ํƒ",
50
+ )
51
+ with gr.Column(scale=1):
52
+ scenario_dropdown = gr.Dropdown(
53
+ choices=scenario_choices,
54
+ value=scenario_choices[0][1] if scenario_choices else None,
55
+ label="์‹œ๋‚˜๋ฆฌ์˜ค ํ”„๋ฆฌ์…‹",
56
+ )
57
+ with gr.Column(scale=1):
58
+ blind_mode = gr.Checkbox(
59
+ value=True,
60
+ label="๋ธ”๋ผ์ธ๋“œ ๋ชจ๋“œ (๋ชจ๋ธ๋ช… ์ˆจ๊น€)",
61
+ )
62
+
63
+ with gr.Row():
64
+ with gr.Column(scale=2):
65
+ model_a_dropdown = gr.Dropdown(
66
+ choices=model_choices,
67
+ value=all_models[0] if all_models else None,
68
+ label="Model A",
69
+ )
70
+ with gr.Column(scale=2):
71
+ model_b_dropdown = gr.Dropdown(
72
+ choices=model_choices,
73
+ value=all_models[1] if len(all_models) > 1 else None,
74
+ label="Model B",
75
+ )
76
+ with gr.Column(scale=1):
77
+ random_models_btn = gr.Button("๋žœ๋ค ๋ชจ๋ธ", size="sm")
78
+
79
+ # ์‘๋‹ต ์˜์—ญ
80
+ with gr.Row():
81
+ # Model A Response
82
+ with gr.Column(scale=1):
83
+ model_a_label = gr.Markdown("### Model A")
84
+ with gr.Accordion("Thinking Process", open=False):
85
+ thinking_a = gr.Markdown("*(์‘๋‹ต ์ƒ์„ฑ ํ›„ ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค)*")
86
+ response_a = gr.Textbox(
87
+ label="์‘๋‹ต",
88
+ lines=8,
89
+ interactive=False,
90
+ )
91
+ metadata_a = gr.Markdown("")
92
+
93
+ # Model B Response
94
+ with gr.Column(scale=1):
95
+ model_b_label = gr.Markdown("### Model B")
96
+ with gr.Accordion("Thinking Process", open=False):
97
+ thinking_b = gr.Markdown("*(์‘๋‹ต ์ƒ์„ฑ ํ›„ ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค)*")
98
+ response_b = gr.Textbox(
99
+ label="์‘๋‹ต",
100
+ lines=8,
101
+ interactive=False,
102
+ )
103
+ metadata_b = gr.Markdown("")
104
+
105
+ # ์‚ฌ์šฉ์ž ์ž…๋ ฅ
106
+ with gr.Row():
107
+ user_input = gr.Textbox(
108
+ label="ํŒฌ ๋ฉ”์‹œ์ง€",
109
+ placeholder="์•„์ด๋Œ์—๊ฒŒ ๋ณด๋‚ผ ๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...",
110
+ lines=2,
111
+ scale=4,
112
+ )
113
+ with gr.Column(scale=1):
114
+ random_scenario_btn = gr.Button("๋žœ๋ค ์‹œ๋‚˜๋ฆฌ์˜ค")
115
+ submit_btn = gr.Button("์ „์†ก", variant="primary")
116
+
117
+ # ํˆฌํ‘œ ์˜์—ญ
118
+ gr.Markdown("### ํˆฌํ‘œ")
119
+ with gr.Row():
120
+ vote_a_btn = gr.Button("A๊ฐ€ ๋” ์ข‹์Œ", variant="secondary")
121
+ vote_tie_btn = gr.Button("๋น„์Šทํ•จ", variant="secondary")
122
+ vote_b_btn = gr.Button("B๊ฐ€ ๋” ์ข‹์Œ", variant="secondary")
123
+ vote_skip_btn = gr.Button("์Šคํ‚ต", variant="secondary")
124
+
125
+ vote_reason = gr.Textbox(
126
+ label="ํˆฌํ‘œ ์ด์œ  (์„ ํƒ์‚ฌํ•ญ)",
127
+ placeholder="์™œ ์ด ์‘๋‹ต์ด ๋” ์ข‹๋‹ค๊ณ  ์ƒ๊ฐํ•˜์‹œ๋‚˜์š”?",
128
+ lines=1,
129
+ )
130
+
131
+ vote_result = gr.Markdown("")
132
+
133
+ # ์ƒํƒœ ์ €์žฅ
134
+ state = gr.State({
135
+ "model_a": None,
136
+ "model_b": None,
137
+ "response_a": None,
138
+ "response_b": None,
139
+ "character": None,
140
+ "user_input": None,
141
+ })
142
+
143
+ # ============================================================
144
+ # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
145
+ # ============================================================
146
+
147
+ def select_random_models():
148
+ """๋žœ๋ค ๏ฟฝ๏ฟฝ๏ฟฝ๋ธ ์„ ํƒ"""
149
+ if len(all_models) < 2:
150
+ return all_models[0] if all_models else None, None
151
+ selected = random.sample(all_models, 2)
152
+ return selected[0], selected[1]
153
+
154
+ def load_random_scenario(character: str):
155
+ """๋žœ๋ค ์‹œ๋‚˜๋ฆฌ์˜ค ๋กœ๋“œ"""
156
+ scenario = scenario_loader.get_random_scenario()
157
+ if scenario:
158
+ user_msg = scenario_loader.format_user_input(scenario, character)
159
+ return user_msg, scenario["id"]
160
+ return "", None
161
+
162
+ def load_scenario_input(scenario_id: str, character: str):
163
+ """์„ ํƒ๋œ ์‹œ๋‚˜๋ฆฌ์˜ค ๋กœ๋“œ"""
164
+ scenario = scenario_loader.get_scenario(scenario_id)
165
+ if scenario:
166
+ return scenario_loader.format_user_input(scenario, character)
167
+ return ""
168
+
169
+ def generate_responses(
170
+ model_a: str,
171
+ model_b: str,
172
+ character: str,
173
+ user_msg: str,
174
+ current_state: dict,
175
+ ):
176
+ """๋‘ ๋ชจ๋ธ์˜ ์‘๋‹ต ์ƒ์„ฑ"""
177
+ if not model_a or not model_b:
178
+ return (
179
+ "*(๋ชจ๋ธ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”)*", "", "",
180
+ "*(๋ชจ๋ธ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”)*", "", "",
181
+ current_state,
182
+ )
183
+
184
+ if not user_msg.strip():
185
+ return (
186
+ "*(๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”)*", "", "",
187
+ "*(๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”)*", "", "",
188
+ current_state,
189
+ )
190
+
191
+ system_prompt = build_system_prompt(character)
192
+ messages = [{"role": "user", "content": user_msg}]
193
+
194
+ # Mock ๋ชจ๋“œ (์‹ค์ œ ๋ชจ๋ธ ์—†์ด ํ…Œ์ŠคํŠธ)
195
+ if use_mock or model_manager is None:
196
+ response_a_full = f"<think>\n{character}์˜ ์ž…์žฅ์—์„œ ์ƒ๊ฐํ•ด๋ณด๋ฉด... ์ด ๋ฉ”์‹œ์ง€์— ์–ด๋–ป๊ฒŒ ๋ฐ˜์‘ํ•ด์•ผ ํ• ๊นŒ?\n</think>\n\n์•ˆ๋…•! ๋ฐ˜๊ฐ€์›Œ~ (Mock Response A)"
197
+ response_b_full = f"<think>\n์Œ... ์ด๋Ÿฐ ์ƒํ™ฉ์—์„œ๋Š”...\n</think>\n\nํ—ค์ด~ ๋ญํ•ด? (Mock Response B)"
198
+ meta_a = {"latency_s": 0.5, "output_tokens": 50}
199
+ meta_b = {"latency_s": 0.6, "output_tokens": 55}
200
+ else:
201
+ # ์‹ค์ œ ๋ชจ๋ธ ์ถ”๋ก 
202
+ try:
203
+ response_a_full, meta_a = model_manager.generate_response(
204
+ model_a, messages, system_prompt
205
+ )
206
+ except Exception as e:
207
+ response_a_full = f"*Error: {str(e)}*"
208
+ meta_a = {"latency_s": 0, "output_tokens": 0}
209
+
210
+ try:
211
+ response_b_full, meta_b = model_manager.generate_response(
212
+ model_b, messages, system_prompt
213
+ )
214
+ except Exception as e:
215
+ response_b_full = f"*Error: {str(e)}*"
216
+ meta_b = {"latency_s": 0, "output_tokens": 0}
217
+
218
+ # Thinking ํŒŒ์‹ฑ
219
+ think_a, clean_a = parse_thinking_response(response_a_full)
220
+ think_b, clean_b = parse_thinking_response(response_b_full)
221
+
222
+ # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋ฌธ์ž์—ด
223
+ meta_str_a = f"โฑ๏ธ {meta_a.get('latency_s', 0):.2f}s | {meta_a.get('output_tokens', 0)} tokens"
224
+ meta_str_b = f"โฑ๏ธ {meta_b.get('latency_s', 0):.2f}s | {meta_b.get('output_tokens', 0)} tokens"
225
+
226
+ # ์ƒํƒœ ์—…๋ฐ์ดํŠธ
227
+ new_state = {
228
+ "model_a": model_a,
229
+ "model_b": model_b,
230
+ "response_a": response_a_full,
231
+ "response_b": response_b_full,
232
+ "character": character,
233
+ "user_input": user_msg,
234
+ }
235
+
236
+ return (
237
+ format_thinking_for_display(think_a) if think_a else "*No thinking*",
238
+ clean_a,
239
+ meta_str_a,
240
+ format_thinking_for_display(think_b) if think_b else "*No thinking*",
241
+ clean_b,
242
+ meta_str_b,
243
+ new_state,
244
+ )
245
+
246
+ def handle_vote(vote_type: str, reason: str, current_state: dict):
247
+ """ํˆฌํ‘œ ์ฒ˜๋ฆฌ"""
248
+ if not current_state.get("model_a") or not current_state.get("model_b"):
249
+ return "๋จผ์ € ์‘๋‹ต์„ ์ƒ์„ฑํ•ด์ฃผ์„ธ์š”."
250
+
251
+ vote_data = {
252
+ "model_a": current_state["model_a"],
253
+ "model_b": current_state["model_b"],
254
+ "response_a": current_state.get("response_a", ""),
255
+ "response_b": current_state.get("response_b", ""),
256
+ "character": current_state.get("character", ""),
257
+ "user_input": current_state.get("user_input", ""),
258
+ "vote": vote_type,
259
+ "reason": reason,
260
+ }
261
+
262
+ vote_id = vote_storage.save_vote(vote_data)
263
+
264
+ # ELO ์—…๋ฐ์ดํŠธ
265
+ if vote_type != "skip":
266
+ new_a, new_b = elo_calculator.update_ratings(
267
+ current_state["model_a"],
268
+ current_state["model_b"],
269
+ vote_type,
270
+ )
271
+ return f"ํˆฌํ‘œ ์™„๋ฃŒ! (ID: {vote_id})\n\nELO ๋ณ€๊ฒฝ:\n- {current_state['model_a']}: {new_a:.0f}\n- {current_state['model_b']}: {new_b:.0f}"
272
+
273
+ return f"์Šคํ‚ต๋จ (ID: {vote_id})"
274
+
275
+ def update_model_labels(blind: bool, model_a: str, model_b: str):
276
+ """๋ธ”๋ผ์ธ๋“œ ๋ชจ๋“œ์— ๋”ฐ๋ผ ๋ ˆ์ด๋ธ” ์—…๋ฐ์ดํŠธ"""
277
+ if blind:
278
+ return "### Model A", "### Model B"
279
+ else:
280
+ info_a = get_model_info(model_a)
281
+ info_b = get_model_info(model_b)
282
+ label_a = f"### {info_a.get('description', model_a)}" if info_a else f"### {model_a}"
283
+ label_b = f"### {info_b.get('description', model_b)}" if info_b else f"### {model_b}"
284
+ return label_a, label_b
285
+
286
+ # ============================================================
287
+ # ์ด๋ฒคํŠธ ๋ฐ”์ธ๋”ฉ
288
+ # ============================================================
289
+
290
+ random_models_btn.click(
291
+ fn=select_random_models,
292
+ outputs=[model_a_dropdown, model_b_dropdown],
293
+ )
294
+
295
+ random_scenario_btn.click(
296
+ fn=load_random_scenario,
297
+ inputs=[character_dropdown],
298
+ outputs=[user_input, scenario_dropdown],
299
+ )
300
+
301
+ scenario_dropdown.change(
302
+ fn=load_scenario_input,
303
+ inputs=[scenario_dropdown, character_dropdown],
304
+ outputs=[user_input],
305
+ )
306
+
307
+ submit_btn.click(
308
+ fn=generate_responses,
309
+ inputs=[model_a_dropdown, model_b_dropdown, character_dropdown, user_input, state],
310
+ outputs=[thinking_a, response_a, metadata_a, thinking_b, response_b, metadata_b, state],
311
+ )
312
+
313
+ # ๋ธ”๋ผ์ธ๋“œ ๋ชจ๋“œ ๋ณ€๊ฒฝ ์‹œ ๋ ˆ์ด๋ธ” ์—…๋ฐ์ดํŠธ
314
+ blind_mode.change(
315
+ fn=update_model_labels,
316
+ inputs=[blind_mode, model_a_dropdown, model_b_dropdown],
317
+ outputs=[model_a_label, model_b_label],
318
+ )
319
+
320
+ # ํˆฌํ‘œ ๋ฒ„ํŠผ
321
+ vote_a_btn.click(
322
+ fn=lambda r, s: handle_vote("a", r, s),
323
+ inputs=[vote_reason, state],
324
+ outputs=[vote_result],
325
+ )
326
+ vote_b_btn.click(
327
+ fn=lambda r, s: handle_vote("b", r, s),
328
+ inputs=[vote_reason, state],
329
+ outputs=[vote_result],
330
+ )
331
+ vote_tie_btn.click(
332
+ fn=lambda r, s: handle_vote("tie", r, s),
333
+ inputs=[vote_reason, state],
334
+ outputs=[vote_result],
335
+ )
336
+ vote_skip_btn.click(
337
+ fn=lambda r, s: handle_vote("skip", r, s),
338
+ inputs=[vote_reason, state],
339
+ outputs=[vote_result],
340
+ )
ui/chat_tab.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Single Chat ํƒญ UI"""
2
+
3
+ import gradio as gr
4
+ from typing import Any
5
+
6
+ from models.model_registry import get_all_models, get_model_info
7
+ from characters import get_character_loader, build_system_prompt
8
+ from utils import parse_thinking_response, format_thinking_for_display
9
+
10
+
11
+ def create_chat_tab(
12
+ model_manager: Any = None,
13
+ use_mock: bool = False,
14
+ ):
15
+ """Single Chat ํƒญ ์ƒ์„ฑ"""
16
+
17
+ # ๋ฐ์ดํ„ฐ ๋กœ๋”
18
+ char_loader = get_character_loader()
19
+
20
+ # ๋ชจ๋ธ ๋ชฉ๋ก
21
+ all_models = get_all_models()
22
+ model_choices = [(f"{get_model_info(m).get('description', m)}", m) for m in all_models]
23
+
24
+ # ์บ๋ฆญํ„ฐ ๋ชฉ๋ก
25
+ characters = char_loader.get_character_names()
26
+
27
+ # ============================================================
28
+ # UI ๊ตฌ์„ฑ
29
+ # ============================================================
30
+
31
+ gr.Markdown("## ๋‹จ์ผ ๋ชจ๋ธ ์ฑ„ํŒ…")
32
+ gr.Markdown("์„ ํƒํ•œ ๋ชจ๋ธ๊ณผ ์บ๋ฆญํ„ฐ๋กœ ๋Œ€ํ™”๋ฅผ ๋‚˜๋ˆ ๋ณด์„ธ์š”.")
33
+
34
+ with gr.Row():
35
+ with gr.Column(scale=1):
36
+ model_dropdown = gr.Dropdown(
37
+ choices=model_choices,
38
+ value=all_models[0] if all_models else None,
39
+ label="๋ชจ๋ธ ์„ ํƒ",
40
+ )
41
+ with gr.Column(scale=1):
42
+ character_dropdown = gr.Dropdown(
43
+ choices=characters,
44
+ value=characters[0] if characters else None,
45
+ label="์บ๋ฆญํ„ฐ ์„ ํƒ",
46
+ )
47
+
48
+ # ์ฑ„ํŒ… ์˜์—ญ
49
+ chatbot = gr.Chatbot(
50
+ label="๋Œ€ํ™”",
51
+ height=400,
52
+ type="messages",
53
+ )
54
+
55
+ with gr.Accordion("Thinking Process (๋งˆ์ง€๋ง‰ ์‘๋‹ต)", open=False):
56
+ thinking_display = gr.Markdown("*(์‘๋‹ต ์ƒ์„ฑ ํ›„ ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค)*")
57
+
58
+ with gr.Row():
59
+ user_input = gr.Textbox(
60
+ label="๋ฉ”์‹œ์ง€ ์ž…๋ ฅ",
61
+ placeholder="๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...",
62
+ lines=2,
63
+ scale=4,
64
+ )
65
+ send_btn = gr.Button("์ „์†ก", variant="primary", scale=1)
66
+
67
+ with gr.Row():
68
+ clear_btn = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
69
+
70
+ metadata_display = gr.Markdown("")
71
+
72
+ # ============================================================
73
+ # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
74
+ # ============================================================
75
+
76
+ def respond(
77
+ model_id: str,
78
+ character: str,
79
+ message: str,
80
+ history: list,
81
+ ):
82
+ """์‘๋‹ต ์ƒ์„ฑ"""
83
+ if not message.strip():
84
+ return history, "", "*(๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”)*", ""
85
+
86
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ๊ตฌ์„ฑ
87
+ messages = []
88
+ for msg in history:
89
+ if msg["role"] == "user":
90
+ messages.append({"role": "user", "content": msg["content"]})
91
+ elif msg["role"] == "assistant":
92
+ # Thinking ์ œ๊ฑฐํ•œ ํด๋ฆฐ ์‘๋‹ต๋งŒ ํžˆ์Šคํ† ๋ฆฌ์—
93
+ _, clean = parse_thinking_response(msg["content"])
94
+ messages.append({"role": "assistant", "content": clean})
95
+
96
+ messages.append({"role": "user", "content": message})
97
+
98
+ system_prompt = build_system_prompt(character)
99
+
100
+ # Mock ๋˜๋Š” ์‹ค์ œ ์ถ”๋ก 
101
+ if use_mock or model_manager is None:
102
+ response_full = f"<think>\n{character}๋กœ์„œ ์ƒ๊ฐํ•ด๋ณด๋ฉด...\n</think>\n\n์•ˆ๋…•~ ๋ฐ˜๊ฐ€์›Œ! (Mock Response)"
103
+ meta = {"latency_s": 0.5, "output_tokens": 30}
104
+ else:
105
+ try:
106
+ response_full, meta = model_manager.generate_response(
107
+ model_id, messages, system_prompt
108
+ )
109
+ except Exception as e:
110
+ response_full = f"*Error: {str(e)}*"
111
+ meta = {"latency_s": 0, "output_tokens": 0}
112
+
113
+ # Thinking ํŒŒ์‹ฑ
114
+ thinking, clean_response = parse_thinking_response(response_full)
115
+
116
+ # ํžˆ์Šคํ† ๋ฆฌ ์—…๋ฐ์ดํŠธ
117
+ history.append({"role": "user", "content": message})
118
+ history.append({"role": "assistant", "content": response_full})
119
+
120
+ # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
121
+ meta_str = f"โฑ๏ธ {meta.get('latency_s', 0):.2f}s | {meta.get('output_tokens', 0)} tokens"
122
+
123
+ return (
124
+ history,
125
+ "", # ์ž…๋ ฅ ์ดˆ๊ธฐํ™”
126
+ format_thinking_for_display(thinking) if thinking else "*No thinking*",
127
+ meta_str,
128
+ )
129
+
130
+ def clear_chat():
131
+ """๋Œ€ํ™” ์ดˆ๊ธฐํ™”"""
132
+ return [], "", "*(์‘๋‹ต ์ƒ์„ฑ ํ›„ ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค)*", ""
133
+
134
+ # ============================================================
135
+ # ์ด๋ฒคํŠธ ๋ฐ”์ธ๋”ฉ
136
+ # ============================================================
137
+
138
+ send_btn.click(
139
+ fn=respond,
140
+ inputs=[model_dropdown, character_dropdown, user_input, chatbot],
141
+ outputs=[chatbot, user_input, thinking_display, metadata_display],
142
+ )
143
+
144
+ user_input.submit(
145
+ fn=respond,
146
+ inputs=[model_dropdown, character_dropdown, user_input, chatbot],
147
+ outputs=[chatbot, user_input, thinking_display, metadata_display],
148
+ )
149
+
150
+ clear_btn.click(
151
+ fn=clear_chat,
152
+ outputs=[chatbot, user_input, thinking_display, metadata_display],
153
+ )
ui/history_tab.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """History ํƒญ UI"""
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+ import json
6
+
7
+ from voting import get_vote_storage
8
+
9
+
10
+ def create_history_tab():
11
+ """History ํƒญ ์ƒ์„ฑ"""
12
+
13
+ vote_storage = get_vote_storage()
14
+
15
+ # ============================================================
16
+ # UI ๊ตฌ์„ฑ
17
+ # ============================================================
18
+
19
+ gr.Markdown("## ํˆฌํ‘œ ํžˆ์Šคํ† ๋ฆฌ")
20
+ gr.Markdown("์ตœ๊ทผ ํˆฌํ‘œ ๊ธฐ๋ก์„ ํ™•์ธํ•˜๊ณ  ๋ฐ์ดํ„ฐ๋ฅผ ๋‹ค์šด๋กœ๋“œํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
21
+
22
+ with gr.Row():
23
+ refresh_btn = gr.Button("์ƒˆ๋กœ๊ณ ์นจ")
24
+ download_btn = gr.Button("JSON ๋‹ค์šด๋กœ๋“œ")
25
+
26
+ history_table = gr.Dataframe(
27
+ headers=["ID", "์‹œ๊ฐ„", "Model A", "Model B", "๊ฒฐ๊ณผ", "์บ๋ฆญํ„ฐ"],
28
+ label="์ตœ๊ทผ ํˆฌํ‘œ (50๊ฑด)",
29
+ interactive=False,
30
+ )
31
+
32
+ download_output = gr.File(label="๋‹ค์šด๋กœ๋“œ ํŒŒ์ผ", visible=False)
33
+
34
+ gr.Markdown("### ํˆฌํ‘œ ์ƒ์„ธ ์ •๋ณด")
35
+
36
+ with gr.Row():
37
+ with gr.Column():
38
+ detail_model_a = gr.Textbox(label="Model A", interactive=False)
39
+ detail_response_a = gr.Textbox(label="Response A", lines=5, interactive=False)
40
+ with gr.Column():
41
+ detail_model_b = gr.Textbox(label="Model B", interactive=False)
42
+ detail_response_b = gr.Textbox(label="Response B", lines=5, interactive=False)
43
+
44
+ detail_user_input = gr.Textbox(label="์‚ฌ์šฉ์ž ์ž…๋ ฅ", interactive=False)
45
+ detail_reason = gr.Textbox(label="ํˆฌํ‘œ ์ด์œ ", interactive=False)
46
+
47
+ # ============================================================
48
+ # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
49
+ # ============================================================
50
+
51
+ def refresh_history():
52
+ """ํžˆ์Šคํ† ๋ฆฌ ์ƒˆ๋กœ๊ณ ์นจ"""
53
+ votes = vote_storage.get_recent_votes(50)
54
+
55
+ rows = []
56
+ for v in reversed(votes): # ์ตœ์‹ ์ˆœ
57
+ result_map = {"a": "A ์Šน", "b": "B ์Šน", "tie": "๋ฌด์Šน๋ถ€", "skip": "์Šคํ‚ต"}
58
+ rows.append([
59
+ v.get("id", ""),
60
+ v.get("timestamp", "")[:19], # ์ดˆ๊นŒ์ง€๋งŒ
61
+ v.get("model_a", "")[:30],
62
+ v.get("model_b", "")[:30],
63
+ result_map.get(v.get("vote", ""), v.get("vote", "")),
64
+ v.get("character", ""),
65
+ ])
66
+
67
+ df = pd.DataFrame(
68
+ rows,
69
+ columns=["ID", "์‹œ๊ฐ„", "Model A", "Model B", "๊ฒฐ๊ณผ", "์บ๋ฆญํ„ฐ"],
70
+ )
71
+
72
+ return df
73
+
74
+ def prepare_download():
75
+ """JSON ๋‹ค์šด๋กœ๋“œ ์ค€๋น„"""
76
+ output_path = "/tmp/votes_export.json"
77
+ vote_storage.export_to_json(output_path)
78
+ return gr.File(value=output_path, visible=True)
79
+
80
+ def show_vote_detail(evt: gr.SelectData, df: pd.DataFrame):
81
+ """ํˆฌํ‘œ ์ƒ์„ธ ์ •๋ณด ํ‘œ์‹œ"""
82
+ if evt.index[0] is None:
83
+ return "", "", "", "", "", ""
84
+
85
+ row_idx = evt.index[0]
86
+ vote_id = df.iloc[row_idx]["ID"]
87
+
88
+ # ํˆฌํ‘œ ์ฐพ๊ธฐ
89
+ votes = vote_storage.get_all_votes()
90
+ vote = next((v for v in votes if v.get("id") == vote_id), None)
91
+
92
+ if not vote:
93
+ return "", "", "", "", "", ""
94
+
95
+ return (
96
+ vote.get("model_a", ""),
97
+ vote.get("response_a", "")[:500] + "..." if len(vote.get("response_a", "")) > 500 else vote.get("response_a", ""),
98
+ vote.get("model_b", ""),
99
+ vote.get("response_b", "")[:500] + "..." if len(vote.get("response_b", "")) > 500 else vote.get("response_b", ""),
100
+ vote.get("user_input", ""),
101
+ vote.get("reason", ""),
102
+ )
103
+
104
+ # ============================================================
105
+ # ์ด๋ฒคํŠธ ๋ฐ”์ธ๋”ฉ
106
+ # ============================================================
107
+
108
+ refresh_btn.click(
109
+ fn=refresh_history,
110
+ outputs=[history_table],
111
+ )
112
+
113
+ download_btn.click(
114
+ fn=prepare_download,
115
+ outputs=[download_output],
116
+ )
117
+
118
+ history_table.select(
119
+ fn=show_vote_detail,
120
+ inputs=[history_table],
121
+ outputs=[detail_model_a, detail_response_a, detail_model_b, detail_response_b, detail_user_input, detail_reason],
122
+ )
123
+
124
+ # ์ดˆ๊ธฐ ๋กœ๋“œ ํ•จ์ˆ˜ ๋ฐ˜ํ™˜
125
+ return refresh_history
ui/leaderboard_tab.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Leaderboard ํƒญ UI"""
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+
6
+ from voting import get_vote_storage, get_elo_calculator
7
+ from models.model_registry import get_model_info
8
+
9
+
10
+ def create_leaderboard_tab():
11
+ """Leaderboard ํƒญ ์ƒ์„ฑ"""
12
+
13
+ vote_storage = get_vote_storage()
14
+ elo_calculator = get_elo_calculator()
15
+
16
+ # ============================================================
17
+ # UI ๊ตฌ์„ฑ
18
+ # ============================================================
19
+
20
+ gr.Markdown("## ELO ๋ฆฌ๋”๋ณด๋“œ")
21
+ gr.Markdown("ํˆฌํ‘œ ๊ฒฐ๊ณผ์— ๊ธฐ๋ฐ˜ํ•œ ๋ชจ๋ธ ์ˆœ์œ„์ž…๋‹ˆ๋‹ค.")
22
+
23
+ refresh_btn = gr.Button("์ƒˆ๋กœ๊ณ ์นจ")
24
+
25
+ leaderboard_table = gr.Dataframe(
26
+ headers=["์ˆœ์œ„", "๋ชจ๋ธ", "ELO", "์Šน", "ํŒจ", "๋ฌด", "์ด", "์Šน๋ฅ "],
27
+ label="๋ฆฌ๋”๋ณด๋“œ",
28
+ interactive=False,
29
+ )
30
+
31
+ gr.Markdown("### ํˆฌํ‘œ ์š”์•ฝ")
32
+
33
+ with gr.Row():
34
+ total_votes = gr.Textbox(label="์ด ํˆฌํ‘œ ์ˆ˜", interactive=False)
35
+ a_wins_count = gr.Textbox(label="A ์Šน๋ฆฌ", interactive=False)
36
+ b_wins_count = gr.Textbox(label="B ์Šน๋ฆฌ", interactive=False)
37
+ ties_count = gr.Textbox(label="๋ฌด์Šน๋ถ€", interactive=False)
38
+
39
+ # ============================================================
40
+ # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
41
+ # ============================================================
42
+
43
+ def refresh_leaderboard():
44
+ """๋ฆฌ๋”๋ณด๋“œ ์ƒˆ๋กœ๊ณ ์นจ"""
45
+ # ํˆฌํ‘œ ํ†ต๊ณ„
46
+ vote_stats = vote_storage.get_model_stats()
47
+
48
+ # ELO ๋ฆฌ๋”๋ณด๋“œ
49
+ leaderboard = elo_calculator.get_leaderboard_with_stats(vote_stats)
50
+
51
+ # DataFrame ๊ตฌ์„ฑ
52
+ rows = []
53
+ for i, entry in enumerate(leaderboard, 1):
54
+ model_info = get_model_info(entry["model"])
55
+ display_name = model_info.get("description", entry["model"]) if model_info else entry["model"]
56
+ rows.append([
57
+ i,
58
+ display_name,
59
+ entry["elo"],
60
+ entry["wins"],
61
+ entry["losses"],
62
+ entry["ties"],
63
+ entry["total"],
64
+ entry["win_rate"],
65
+ ])
66
+
67
+ df = pd.DataFrame(
68
+ rows,
69
+ columns=["์ˆœ์œ„", "๋ชจ๋ธ", "ELO", "์Šน", "ํŒจ", "๋ฌด", "์ด", "์Šน๋ฅ "],
70
+ )
71
+
72
+ # ํˆฌํ‘œ ์š”์•ฝ
73
+ summary = vote_storage.get_vote_summary()
74
+
75
+ return (
76
+ df,
77
+ str(summary["total"]),
78
+ str(summary["a_wins"]),
79
+ str(summary["b_wins"]),
80
+ str(summary["ties"]),
81
+ )
82
+
83
+ # ============================================================
84
+ # ์ด๋ฒคํŠธ ๋ฐ”์ธ๋”ฉ
85
+ # ============================================================
86
+
87
+ refresh_btn.click(
88
+ fn=refresh_leaderboard,
89
+ outputs=[leaderboard_table, total_votes, a_wins_count, b_wins_count, ties_count],
90
+ )
91
+
92
+ # ์ดˆ๊ธฐ ๋กœ๋“œ
93
+ leaderboard_tab = gr.Blocks()
94
+
95
+ # ํŽ˜์ด์ง€ ๋กœ๋“œ ์‹œ ์ž๋™ ์ƒˆ๋กœ๊ณ ์นจ์„ ์œ„ํ•ด ๋ฐ๋ชจ ์‹œ์ž‘ ์‹œ ํ˜ธ์ถœ
96
+ return refresh_leaderboard
utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .thinking_parser import parse_thinking_response, format_thinking_for_display
utils/thinking_parser.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """<think> ํƒœ๊ทธ ํŒŒ์‹ฑ ์œ ํ‹ธ๋ฆฌํ‹ฐ"""
2
+
3
+ import re
4
+ from typing import Tuple, Optional
5
+
6
+
7
+ def parse_thinking_response(response: str) -> Tuple[Optional[str], str]:
8
+ """
9
+ ์‘๋‹ต์—์„œ <think> ํƒœ๊ทธ ์ถ”์ถœ
10
+
11
+ Returns:
12
+ (thinking_content, clean_response)
13
+ """
14
+ if not response:
15
+ return None, ""
16
+
17
+ # <think>...</think> ํŒจํ„ด ๋งค์นญ
18
+ pattern = r'<think>(.*?)</think>'
19
+ match = re.search(pattern, response, re.DOTALL)
20
+
21
+ if match:
22
+ thinking = match.group(1).strip()
23
+ # <think> ํƒœ๊ทธ ์ œ๊ฑฐํ•œ ํด๋ฆฐ ์‘๋‹ต
24
+ clean = re.sub(pattern, '', response, flags=re.DOTALL).strip()
25
+ return thinking, clean
26
+
27
+ return None, response
28
+
29
+
30
+ def format_thinking_for_display(thinking: str) -> str:
31
+ """Thinking ๋‚ด์šฉ์„ ๋งˆํฌ๋‹ค์šด์œผ๋กœ ํฌ๋งทํŒ…"""
32
+ if not thinking:
33
+ return "*No thinking process*"
34
+
35
+ # 6๋‹จ๊ณ„ ๊ตฌ์กฐ ํ•˜์ด๋ผ์ดํŠธ (์žˆ๋Š” ๊ฒฝ์šฐ)
36
+ sections = [
37
+ "[์ƒํ™ฉ๋ถ„์„]", "[๊ด€๊ณ„๋‹จ๊ณ„]", "[์บ๋ฆญํ„ฐ์Šคํƒ€์ผ]",
38
+ "[๋ฐ€๋‹น๊ฒฐ์ •]", "[๊ธˆ์ง€ํŒจํ„ด์ฒดํฌ]", "[์‘๋‹ต์„ค๊ณ„]"
39
+ ]
40
+
41
+ formatted = thinking
42
+ for section in sections:
43
+ formatted = formatted.replace(
44
+ section,
45
+ f"**{section}**"
46
+ )
47
+
48
+ return formatted
49
+
50
+
51
+ def extract_response_only(full_response: str) -> str:
52
+ """Thinking ์ œ๊ฑฐํ•˜๊ณ  ์‘๋‹ต๋งŒ ์ถ”์ถœ"""
53
+ _, clean = parse_thinking_response(full_response)
54
+ return clean
55
+
56
+
57
+ def has_thinking_tag(response: str) -> bool:
58
+ """์‘๋‹ต์— <think> ํƒœ๊ทธ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ"""
59
+ pattern = r'<think>.*?</think>'
60
+ return bool(re.search(pattern, response, re.DOTALL))
61
+
62
+
63
+ def get_thinking_stats(response: str) -> dict:
64
+ """Thinking ๊ด€๋ จ ํ†ต๊ณ„"""
65
+ thinking, clean = parse_thinking_response(response)
66
+
67
+ return {
68
+ "has_thinking": thinking is not None,
69
+ "thinking_length": len(thinking) if thinking else 0,
70
+ "response_length": len(clean),
71
+ "total_length": len(response),
72
+ }
voting/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .vote_storage import VoteStorage, get_vote_storage
2
+ from .elo_calculator import ELOCalculator, get_elo_calculator
voting/elo_calculator.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ELO ๋ ˆ์ดํŒ… ์‹œ์Šคํ…œ"""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Dict, List, Tuple, Optional
6
+ from threading import Lock
7
+
8
+
9
+ class ELOCalculator:
10
+ """ELO ๋ ˆ์ดํŒ… ๊ณ„์‚ฐ"""
11
+
12
+ def __init__(
13
+ self,
14
+ k_factor: float = 32,
15
+ initial_rating: int = 1500,
16
+ ratings_path: str = "elo_ratings.json",
17
+ ):
18
+ self.k_factor = k_factor
19
+ self.initial_rating = initial_rating
20
+ self.ratings_path = Path(ratings_path)
21
+ self.lock = Lock()
22
+ self.ratings: Dict[str, float] = {}
23
+ self._load_ratings()
24
+
25
+ def _load_ratings(self):
26
+ """์ €์žฅ๋œ ๋ ˆ์ดํŒ… ๋กœ๋“œ"""
27
+ if self.ratings_path.exists():
28
+ try:
29
+ with open(self.ratings_path, "r", encoding="utf-8") as f:
30
+ self.ratings = json.load(f)
31
+ except (json.JSONDecodeError, IOError):
32
+ self.ratings = {}
33
+
34
+ def _save_ratings(self):
35
+ """๋ ˆ์ดํŒ… ์ €์žฅ"""
36
+ with self.lock:
37
+ with open(self.ratings_path, "w", encoding="utf-8") as f:
38
+ json.dump(self.ratings, f, ensure_ascii=False, indent=2)
39
+
40
+ def get_rating(self, model: str) -> float:
41
+ """๋ชจ๋ธ์˜ ํ˜„์žฌ ๋ ˆ์ดํŒ…"""
42
+ return self.ratings.get(model, self.initial_rating)
43
+
44
+ def expected_score(self, rating_a: float, rating_b: float) -> float:
45
+ """์˜ˆ์ƒ ์Šน๋ฅ  ๊ณ„์‚ฐ"""
46
+ return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
47
+
48
+ def update_ratings(
49
+ self,
50
+ model_a: str,
51
+ model_b: str,
52
+ result: str, # "a", "b", "tie"
53
+ ) -> Tuple[float, float]:
54
+ """๋ ˆ์ดํŒ… ์—…๋ฐ์ดํŠธ"""
55
+ rating_a = self.get_rating(model_a)
56
+ rating_b = self.get_rating(model_b)
57
+
58
+ expected_a = self.expected_score(rating_a, rating_b)
59
+ expected_b = self.expected_score(rating_b, rating_a)
60
+
61
+ # ์‹ค์ œ ์ ์ˆ˜
62
+ if result == "a":
63
+ actual_a, actual_b = 1.0, 0.0
64
+ elif result == "b":
65
+ actual_a, actual_b = 0.0, 1.0
66
+ else: # tie
67
+ actual_a, actual_b = 0.5, 0.5
68
+
69
+ # ์ƒˆ ๋ ˆ์ดํŒ… ๊ณ„์‚ฐ
70
+ new_rating_a = rating_a + self.k_factor * (actual_a - expected_a)
71
+ new_rating_b = rating_b + self.k_factor * (actual_b - expected_b)
72
+
73
+ with self.lock:
74
+ self.ratings[model_a] = new_rating_a
75
+ self.ratings[model_b] = new_rating_b
76
+
77
+ # ์ €์žฅ
78
+ self._save_ratings()
79
+
80
+ return new_rating_a, new_rating_b
81
+
82
+ def get_leaderboard(self) -> List[Tuple[str, float]]:
83
+ """๋ ˆ์ดํŒ… ์ˆœ ๋ฆฌ๋”๋ณด๋“œ"""
84
+ return sorted(
85
+ [(model, rating) for model, rating in self.ratings.items()],
86
+ key=lambda x: x[1],
87
+ reverse=True,
88
+ )
89
+
90
+ def get_leaderboard_with_stats(
91
+ self,
92
+ vote_stats: Dict[str, Dict],
93
+ ) -> List[Dict]:
94
+ """ํ†ต๊ณ„ ํฌํ•จ ๋ฆฌ๋”๋ณด๋“œ"""
95
+ leaderboard = []
96
+ for model, rating in self.get_leaderboard():
97
+ stats = vote_stats.get(model, {})
98
+ leaderboard.append({
99
+ "model": model,
100
+ "elo": round(rating),
101
+ "wins": stats.get("wins", 0),
102
+ "losses": stats.get("losses", 0),
103
+ "ties": stats.get("ties", 0),
104
+ "total": stats.get("total", 0),
105
+ "win_rate": f"{stats.get('win_rate', 0) * 100:.1f}%",
106
+ })
107
+ return leaderboard
108
+
109
+ def get_all_ratings(self) -> Dict[str, float]:
110
+ """๋ชจ๋“  ๋ ˆ์ดํŒ…"""
111
+ return self.ratings.copy()
112
+
113
+ def reset_ratings(self):
114
+ """๋ ˆ์ดํŒ… ์ดˆ๊ธฐํ™”"""
115
+ self.ratings = {}
116
+ self._save_ratings()
117
+
118
+
119
+ # ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค
120
+ _elo_calculator: Optional[ELOCalculator] = None
121
+
122
+
123
+ def get_elo_calculator(
124
+ k_factor: float = 32,
125
+ initial_rating: int = 1500,
126
+ ratings_path: str = "elo_ratings.json",
127
+ ) -> ELOCalculator:
128
+ """ELOCalculator ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค"""
129
+ global _elo_calculator
130
+ if _elo_calculator is None:
131
+ _elo_calculator = ELOCalculator(
132
+ k_factor=k_factor,
133
+ initial_rating=initial_rating,
134
+ ratings_path=ratings_path,
135
+ )
136
+ return _elo_calculator
voting/vote_storage.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ํˆฌํ‘œ ๋ฐ์ดํ„ฐ ์ €์žฅ ๋ฐ ๊ด€๋ฆฌ"""
2
+
3
+ import json
4
+ import os
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional
8
+ from threading import Lock
9
+ import uuid
10
+
11
+
12
+ class VoteStorage:
13
+ """ํˆฌํ‘œ ๋ฐ์ดํ„ฐ JSONL ์ €์žฅ์†Œ"""
14
+
15
+ def __init__(self, storage_path: str = "votes.jsonl"):
16
+ self.storage_path = Path(storage_path)
17
+ self.lock = Lock()
18
+ self._ensure_file_exists()
19
+
20
+ def _ensure_file_exists(self):
21
+ """ํŒŒ์ผ์ด ์—†์œผ๋ฉด ์ƒ์„ฑ"""
22
+ if not self.storage_path.exists():
23
+ self.storage_path.parent.mkdir(parents=True, exist_ok=True)
24
+ self.storage_path.touch()
25
+
26
+ def save_vote(self, vote_data: Dict) -> str:
27
+ """ํˆฌํ‘œ ์ €์žฅ"""
28
+ vote_id = str(uuid.uuid4())[:8]
29
+
30
+ record = {
31
+ "id": vote_id,
32
+ "timestamp": datetime.now().isoformat(),
33
+ **vote_data,
34
+ }
35
+
36
+ with self.lock:
37
+ with open(self.storage_path, "a", encoding="utf-8") as f:
38
+ f.write(json.dumps(record, ensure_ascii=False) + "\n")
39
+
40
+ return vote_id
41
+
42
+ def get_all_votes(self) -> List[Dict]:
43
+ """๋ชจ๋“  ํˆฌํ‘œ ์กฐํšŒ"""
44
+ votes = []
45
+ if not self.storage_path.exists():
46
+ return votes
47
+
48
+ with open(self.storage_path, "r", encoding="utf-8") as f:
49
+ for line in f:
50
+ if line.strip():
51
+ try:
52
+ votes.append(json.loads(line))
53
+ except json.JSONDecodeError:
54
+ continue
55
+ return votes
56
+
57
+ def get_recent_votes(self, limit: int = 50) -> List[Dict]:
58
+ """์ตœ๊ทผ ํˆฌํ‘œ ์กฐํšŒ"""
59
+ votes = self.get_all_votes()
60
+ return votes[-limit:]
61
+
62
+ def get_model_stats(self) -> Dict[str, Dict]:
63
+ """๋ชจ๋ธ๋ณ„ ํ†ต๊ณ„"""
64
+ votes = self.get_all_votes()
65
+ stats = {}
66
+
67
+ for vote in votes:
68
+ model_a = vote.get("model_a")
69
+ model_b = vote.get("model_b")
70
+ result = vote.get("vote")
71
+
72
+ if not model_a or not model_b:
73
+ continue
74
+
75
+ for model in [model_a, model_b]:
76
+ if model not in stats:
77
+ stats[model] = {"wins": 0, "losses": 0, "ties": 0, "total": 0}
78
+
79
+ if result == "a":
80
+ stats[model_a]["wins"] += 1
81
+ stats[model_b]["losses"] += 1
82
+ elif result == "b":
83
+ stats[model_b]["wins"] += 1
84
+ stats[model_a]["losses"] += 1
85
+ elif result == "tie":
86
+ stats[model_a]["ties"] += 1
87
+ stats[model_b]["ties"] += 1
88
+
89
+ if result != "skip":
90
+ stats[model_a]["total"] += 1
91
+ stats[model_b]["total"] += 1
92
+
93
+ # ์Šน๋ฅ  ๊ณ„์‚ฐ
94
+ for model, s in stats.items():
95
+ if s["total"] > 0:
96
+ s["win_rate"] = s["wins"] / s["total"]
97
+ else:
98
+ s["win_rate"] = 0.0
99
+
100
+ return stats
101
+
102
+ def get_total_votes(self) -> int:
103
+ """์ด ํˆฌํ‘œ ์ˆ˜"""
104
+ return len(self.get_all_votes())
105
+
106
+ def get_vote_summary(self) -> Dict:
107
+ """ํˆฌํ‘œ ์š”์•ฝ"""
108
+ votes = self.get_all_votes()
109
+
110
+ a_wins = sum(1 for v in votes if v.get("vote") == "a")
111
+ b_wins = sum(1 for v in votes if v.get("vote") == "b")
112
+ ties = sum(1 for v in votes if v.get("vote") == "tie")
113
+ skips = sum(1 for v in votes if v.get("vote") == "skip")
114
+
115
+ return {
116
+ "total": len(votes),
117
+ "a_wins": a_wins,
118
+ "b_wins": b_wins,
119
+ "ties": ties,
120
+ "skips": skips,
121
+ }
122
+
123
+ def export_to_json(self, output_path: str):
124
+ """JSON ํŒŒ์ผ๋กœ ๋‚ด๋ณด๋‚ด๊ธฐ"""
125
+ votes = self.get_all_votes()
126
+ with open(output_path, "w", encoding="utf-8") as f:
127
+ json.dump(votes, f, ensure_ascii=False, indent=2)
128
+
129
+
130
+ # ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค
131
+ _vote_storage: Optional[VoteStorage] = None
132
+
133
+
134
+ def get_vote_storage(storage_path: str = "votes.jsonl") -> VoteStorage:
135
+ """VoteStorage ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค"""
136
+ global _vote_storage
137
+ if _vote_storage is None:
138
+ _vote_storage = VoteStorage(storage_path)
139
+ return _vote_storage