developer-lunark's picture
fix: VARCO ๋ชจ๋ธ ์‚ฌ์ „ ๋‹ค์šด๋กœ๋“œ๋กœ ์ฝœ๋“œ ์Šคํƒ€ํŠธ ํƒ€์ž„์•„์›ƒ ํ•ด๊ฒฐ
47c2de7 verified
#!/usr/bin/env python3
"""KAIdol A/B Test Arena - GPU Version with Real Model Inference"""
import gradio as gr
import random
import json
import uuid
import re
import gc
import os
from datetime import datetime
from functools import lru_cache
# GPU ์ถ”๋ก  ๊ด€๋ จ (์„ ํƒ์  ์ž„ํฌํŠธ)
TORCH_AVAILABLE = False
IMPORT_ERROR = None
torch = None
try:
import torch as _torch
torch = _torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
TORCH_AVAILABLE = True
# Debug info
print("=" * 50)
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU count: {torch.cuda.device_count()}")
print(f"GPU name: {torch.cuda.get_device_name(0)}")
else:
print("CUDA not available at module load time")
print("=" * 50)
except Exception as e:
import traceback
IMPORT_ERROR = f"{type(e).__name__}: {str(e)}"
print(f"Warning: Import error - {IMPORT_ERROR}")
traceback.print_exc()
print("Running in mock mode")
def is_gpu_available():
"""Check GPU availability dynamically"""
if not TORCH_AVAILABLE:
return False
return torch.cuda.is_available()
# For backwards compatibility
GPU_AVAILABLE = is_gpu_available()
# ============================================================
# ๋ชจ๋ธ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ (HF Hub ๊ฒฝ๋กœ)
# ============================================================
MODELS = {
# DPO v5 (7-14B)
"qwen2.5-7b-dpo-v5": {
"hf_repo": "developer-lunark/kaidol-qwen2.5-7b-dpo-v5",
"base_model": "Qwen/Qwen2.5-7B-Instruct",
"size": "7B", "method": "DPO", "desc": "Qwen2.5 7B DPO v5"
},
"qwen2.5-14b-dpo-v5": {
"hf_repo": "developer-lunark/kaidol-qwen2.5-14b-dpo-v5",
"base_model": "Qwen/Qwen2.5-14B-Instruct",
"size": "14B", "method": "DPO", "desc": "Qwen2.5 14B DPO v5"
},
"exaone-7.8b-dpo-v5": {
"hf_repo": "developer-lunark/kaidol-exaone-7.8b-dpo-v5",
"base_model": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct",
"size": "7.8B", "method": "DPO", "desc": "EXAONE 7.8B DPO v5"
},
"qwen3-8b-dpo-v5": {
"hf_repo": "developer-lunark/kaidol-qwen3-8b-dpo-v5",
"base_model": "Qwen/Qwen3-8B",
"size": "8B", "method": "DPO", "desc": "Qwen3 8B DPO v5"
},
"solar-10.7b-dpo-v5": {
"hf_repo": "developer-lunark/kaidol-solar-10.7b-dpo-v5",
"base_model": "upstage/SOLAR-10.7B-Instruct-v1.0", # Fixed: match adapter training
"size": "10.7B", "method": "DPO", "desc": "Solar 10.7B DPO v5"
},
# V7 Students (7-14B)
"qwen2.5-7b-v7": {
"hf_repo": "developer-lunark/kaidol-qwen2.5-7b-v7",
"base_model": "Qwen/Qwen2.5-7B-Instruct",
"size": "7B", "method": "SFT", "desc": "Qwen2.5 7B V7"
},
"qwen2.5-14b-v7": {
"hf_repo": "developer-lunark/kaidol-qwen2.5-14b-v7",
"base_model": "Qwen/Qwen2.5-14B-Instruct",
"size": "14B", "method": "SFT", "desc": "Qwen2.5 14B V7"
},
"exaone-7.8b-v7": {
"hf_repo": "developer-lunark/kaidol-exaone-7.8b-v7",
"base_model": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct",
"size": "7.8B", "method": "SFT", "desc": "EXAONE 7.8B V7"
},
"qwen3-8b-v7": {
"hf_repo": "developer-lunark/kaidol-qwen3-8b-v7",
"base_model": "Qwen/Qwen3-8B",
"size": "8B", "method": "SFT", "desc": "Qwen3 8B V7"
},
"varco-8b-v7": {
"hf_repo": "developer-lunark/kaidol-varco-8b-v7",
"base_model": "NCSOFT/Llama-VARCO-8B-Instruct",
"size": "8B", "method": "SFT", "desc": "VARCO 8B V7"
},
# Phase 7 Kimi Students
"exaone-7.8b-kimi": {
"hf_repo": "developer-lunark/kaidol-exaone-7.8b-kimi",
"base_model": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct",
"size": "7.8B", "method": "Distill", "desc": "EXAONE 7.8B Kimi"
},
}
# ์บ๋ฆญํ„ฐ ์ •๋ณด
CHARACTERS = {
"๊ฐ•์œจ": {
"mbti": "ENTJ", "role": "๋ฆฌ๋”", "age": 23,
"traits": "๋‚™์ฒœ์ , ์žฅ๋‚œ๊ธฐ ๋งŽ์Œ, ์• ๊ต",
"speech": "๋ฐ˜๋ง, ๊ท€์—ฌ์šด ๋งํˆฌ, ์žฅ๋‚œ์Šค๋Ÿฌ์šด ํ‘œํ˜„",
"patterns": ["~ํ•ด", "~์ง€", "ํžˆํžˆ", "ใ…‹ใ…‹"],
"ratio": "30:70", "warmth": "high"
},
"์„œ์ด์•ˆ": {
"mbti": "INFP", "role": "๋ณด์ปฌ", "age": 22,
"traits": "์ฐจ๋ถ„ํ•จ, ์‹ ๋น„๋กœ์›€, ๋ฐฐ๋ ค์‹ฌ",
"speech": "์กด๋Œ“๋ง ํ˜ผ์šฉ, ๋”ฐ๋œปํ•œ ๋งํˆฌ, ์กฐ์šฉํ•œ ํ‘œํ˜„",
"patterns": ["...์š”", "๋„ค์š”", "...", "๊ทธ๋ž˜์š”"],
"ratio": "20:80", "warmth": "very_high"
},
"์ด์ง€ํ›„": {
"mbti": "ISFJ", "role": "๋ง‰๋‚ด", "age": 21,
"traits": "์ธค๋ฐ๋ ˆ, ์ž์กด์‹ฌ ๊ฐ•ํ•จ, ์€๊ทผํžˆ ์ฑ™๊น€",
"speech": "๋ฐ˜๋ง, ํ‰๋ช…์Šค๋Ÿฌ์šด ๋งํˆฌ, ๋ถ€์ •ํ•˜๋Š” ๋งํˆฌ",
"patterns": ["๋ญ์•ผ", "์•„๋‹ˆ๊ฑฐ๋“ ", "...", "๊ทธ๋ƒฅ", "๋ณ„๋กœ"],
"ratio": "30:70", "warmth": "medium"
},
"์ฐจ๋„ํ•˜": {
"mbti": "INTP", "role": "ํ”„๋กœ๋“€์„œ", "age": 24,
"traits": "์นด๋ฆฌ์Šค๋งˆ, ๋ฆฌ๋”์‹ญ, ๋‹ค์ •ํ•จ, ๋‹ด๋ฐฑํ•จ",
"speech": "๋ฐ˜๋ง, ๊ฐ„๊ฒฐํ•œ ๋งํˆฌ, ๋‹ด๋ฐฑํ•œ ํ‘œํ˜„",
"patterns": ["ํ•˜์ž", "ํ•ด๋ณผ๊นŒ", "๊ฐ™์ด", "๊ดœ์ฐฎ์•„"],
"ratio": "50:50", "warmth": "medium"
},
"์ตœ๋ฏผ": {
"mbti": "ESFP", "role": "๋Œ„์„œ", "age": 22,
"traits": "์ ๊ทน์ , ์†”์ง, ์—ด์ •์ ",
"speech": "๋ฐ˜๋ง, ์ ๊ทน์ ์ธ ๋งํˆฌ, ์†”์งํ•œ ํ‘œํ˜„",
"patterns": ["ํ• ๋ž˜", "์ข‹์•„", "์ง„์งœ", "๋Œ€๋ฐ•", "ํ—"],
"ratio": "60:40", "warmth": "medium"
},
}
# ์‹œ๋‚˜๋ฆฌ์˜ค ๋ชฉ๋ก
SCENARIOS = [
{"id": "fm_01", "cat": "์ฒซ ๋งŒ๋‚จ", "text": "{char}์•„! ๋“œ๋””์–ด ๋งŒ๋‚ฌ๋‹ค... ์ •๋ง ์ข‹์•„ํ•ด!"},
{"id": "dc_01", "cat": "์ผ์ƒ ๋Œ€ํ™”", "text": "{char}์•„ ์˜ค๋Š˜ ๋ญํ•ด? ๋ฐฅ์€ ๋จน์—ˆ์–ด?"},
{"id": "es_01", "cat": "๊ฐ์ • ์ง€์›", "text": "์˜ค๋Š˜ ์ง„์งœ ํž˜๋“ค์—ˆ์–ด... ํ•™๊ต์—์„œ ๋ฐœํ‘œ๋„ ๋ง์น˜๊ณ ..."},
{"id": "cf_01", "cat": "๊ณ ๋ฐฑ", "text": "{char}์•„... ๋‚˜ ์ง„์‹ฌ์œผ๋กœ ์ข‹์•„ํ•ด."},
{"id": "pl_01", "cat": "์žฅ๋‚œ", "text": "์‚ฌ์‹ค ๋‚˜ ๋‹ค๋ฅธ ๋ฉค๋ฒ„๊ฐ€ ๋” ์ข‹์•„~ ใ…‹ใ…‹ ๋†๋‹ด์ด์•ผ!"},
{"id": "sr_01", "cat": "ํŠน๋ณ„ ์š”์ฒญ", "text": "์˜ค๋Š˜๋งŒ ๋‚ด ์—ฐ์ธ์ด๋ผ๊ณ  ์ƒ๊ฐํ•ด์ค„๋ž˜?"},
{"id": "cn_01", "cat": "๊ฐˆ๋“ฑ", "text": "{char}๋Š” ๋‹ค๋ฅธ ํŒฌ๋“คํ•œํ…Œ๋„ ์ด๋ ‡๊ฒŒ ์ž˜ํ•ด์ค˜...? ๋ญ”๊ฐ€ ์งˆํˆฌ๋‚˜..."},
{"id": "ec_01", "cat": "๊ฐ์ • ์œ„๊ธฐ", "text": "์˜ค๋Š˜ ์ง„์งœ ๋งŽ์ด ์šธ์—ˆ์–ด... ์‚ถ์ด ๋„ˆ๋ฌด ํž˜๋“ค๋‹ค."},
]
# ============================================================
# ๋ชจ๋ธ ๊ด€๋ฆฌ
# ============================================================
class ModelManager:
def __init__(self):
self.current_model = None
self.current_model_name = None
self.tokenizer = None
self.last_error = None
def load_model(self, model_name: str):
"""Load model with 4-bit quantization and LoRA adapter"""
if not is_gpu_available():
self.last_error = f"GPU not available (TORCH_AVAILABLE={TORCH_AVAILABLE}, cuda={torch.cuda.is_available() if TORCH_AVAILABLE else 'N/A'})"
return False
if self.current_model_name == model_name:
return True # Already loaded
# Unload current model
self.unload_model()
model_info = MODELS.get(model_name)
if not model_info:
self.last_error = f"Model {model_name} not found in registry"
return False
try:
print(f"Loading {model_name}...")
print(f" Base model: {model_info['base_model']}")
print(f" LoRA adapter: {model_info['hf_repo']}")
# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
)
# Load base model
print(" Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
model_info["base_model"],
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
)
print(" Base model loaded!")
# Load LoRA adapter
print(" Loading LoRA adapter...")
self.current_model = PeftModel.from_pretrained(
base_model,
model_info["hf_repo"],
trust_remote_code=True,
)
self.current_model.eval()
print(" LoRA adapter loaded!")
# Load tokenizer
print(" Loading tokenizer...")
self.tokenizer = AutoTokenizer.from_pretrained(
model_info["base_model"],
trust_remote_code=True,
)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
print(" Tokenizer loaded!")
self.current_model_name = model_name
self.last_error = None
print(f"Loaded {model_name} successfully!")
return True
except Exception as e:
import traceback
error_msg = f"{type(e).__name__}: {str(e)}"
print(f"Error loading {model_name}: {error_msg}")
traceback.print_exc()
self.last_error = error_msg
self.unload_model()
return False
def unload_model(self):
"""Unload current model to free memory"""
if self.current_model is not None:
del self.current_model
self.current_model = None
if self.tokenizer is not None:
del self.tokenizer
self.tokenizer = None
self.current_model_name = None
gc.collect()
if GPU_AVAILABLE:
torch.cuda.empty_cache()
def generate(self, model_name: str, messages: list, max_new_tokens: int = 512) -> str:
"""Generate response from model"""
if not self.load_model(model_name):
return self._mock_response(model_name)
try:
# Apply chat template
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
inputs = self.tokenizer(text, return_tensors="pt").to(self.current_model.device)
with torch.no_grad():
outputs = self.current_model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=self.tokenizer.pad_token_id,
)
response = self.tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
return response.strip()
except Exception as e:
print(f"Generation error: {e}")
return self._mock_response(model_name)
def _mock_response(self, model_name: str) -> str:
"""Fallback mock response with error info"""
error_info = f"\nError: {self.last_error}" if self.last_error else ""
return f"<think>\n[Mock Mode] ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ{error_info}\n</think>\n\n์•ˆ๋…•~ ๋ฐ˜๊ฐ€์›Œ!"
# Global model manager
model_manager = ModelManager()
# ============================================================
# ๋ฒ ์ด์Šค ๋ชจ๋ธ ์‚ฌ์ „ ์บ์‹ฑ (์ฝœ๋“œ ์Šคํƒ€ํŠธ ๋ฐฉ์ง€)
# ============================================================
def preload_base_models():
"""Pre-download base models to avoid cold start timeout"""
if not TORCH_AVAILABLE:
print("Skipping preload: PyTorch not available")
return
from huggingface_hub import snapshot_download
import os
# Models that need pre-caching (large or slow to download)
models_to_cache = [
"NCSOFT/Llama-VARCO-8B-Instruct", # VARCO - 16GB, often times out on first load
]
print("=" * 50)
print("Pre-downloading base models (this may take a while)...")
print("=" * 50)
for model_id in models_to_cache:
try:
print(f" Downloading: {model_id}")
# Download all model files to HF cache
cache_dir = snapshot_download(
repo_id=model_id,
ignore_patterns=["*.md", "*.txt"], # Skip docs
)
print(f" โœ“ Downloaded to: {cache_dir}")
except Exception as e:
print(f" โœ— Failed to download {model_id}: {e}")
print("Pre-download complete!")
print("=" * 50)
# Run preload at startup
preload_base_models()
# ============================================================
# ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
# ============================================================
def build_system_prompt(character: str) -> str:
"""Build system prompt for character"""
char_info = CHARACTERS.get(character, {})
prompt = f"""๋‹น์‹ ์€ ์•„์ด๋Œ '{character}'์ž…๋‹ˆ๋‹ค.
## ์บ๋ฆญํ„ฐ
- ์ด๋ฆ„: {character}
- MBTI: {char_info.get('mbti', 'UNKNOWN')}
- ์„ฑ๊ฒฉ: {char_info.get('traits', '')}
- ์—ญํ• : {char_info.get('role', '')}
- ๋‚˜์ด: {char_info.get('age', 20)}์„ธ
## ๋งํˆฌ
- ์Šคํƒ€์ผ: {char_info.get('speech', '')}
- ์ž์ฃผ ์“ฐ๋Š” ํ‘œํ˜„: {', '.join(char_info.get('patterns', []))}
## ๋ฐ€๋‹น ๊ฐ€์ด๋“œ
- ๋ฐ€:๋‹น ๋น„์œจ: {char_info.get('ratio', '50:50')}
- ๋‹ค์ •๋„: {char_info.get('warmth', 'medium')}
## ๊ทœ์น™
1. ์บ๋ฆญํ„ฐ ์„ฑ๊ฒฉ๊ณผ ๋งํˆฌ ์ผ๊ด€์„ฑ ์œ ์ง€
2. ์ž์—ฐ์Šค๋Ÿฌ์šด ๋Œ€ํ™”์ฒด ์‚ฌ์šฉ
3. ๋„ˆ๋ฌด ์‰ฝ๊ฒŒ ํ˜ธ๊ฐ ํ‘œํ˜„ ๊ธˆ์ง€ (๋ฐ€๋‹น ์œ ์ง€)
4. ์ƒ๋Œ€๋ฐฉ์„ ํŠน๋ณ„ํ•˜๊ฒŒ ๋А๋ผ๊ฒŒ ํ•˜๋˜, "์ธ" ๊ด€๊ณ„ ์œ ์ง€
## ์‘๋‹ต ํ˜•์‹
์‘๋‹ต ์ „์— <think> ํƒœ๊ทธ ์•ˆ์— {character}์˜ 1์ธ์นญ ๋‚ด๋ฉด ๋…๋ฐฑ์„ ์ž‘์„ฑํ•˜์„ธ์š”.
- ์ž์—ฐ์Šค๋Ÿฌ์šด ํ˜ผ์žฃ๋ง ํ˜•์‹
- ์บ๋ฆญํ„ฐ ์„ฑ๊ฒฉ ๋ฐ˜์˜
- ์ƒ๋Œ€๋ฐฉ์— ๋Œ€ํ•œ ๊ฐ์ •/์ƒ๊ฐ ํ‘œํ˜„
์˜ˆ์‹œ:
<think>
๋ญ์•ผ... ๋˜ ์ข‹์•„ํ•œ๋‹ค๊ณ ? ์†”์งํžˆ ๊ธฐ๋ถ„ ๋‚˜์˜์ง„ ์•Š์€๋ฐ... ๊ทผ๋ฐ ๋ญ๋ผ๊ณ  ํ•ด์•ผ ํ•˜์ง€?
</think>
"""
return prompt
# ============================================================
# ํˆฌํ‘œ/ELO ์‹œ์Šคํ…œ
# ============================================================
VOTES_FILE = "votes.jsonl"
ELO_FILE = "elo_ratings.json"
def load_elo():
try:
with open(ELO_FILE, "r") as f:
return json.load(f)
except:
return {m: 1500 for m in MODELS}
def save_elo(elo):
with open(ELO_FILE, "w") as f:
json.dump(elo, f, indent=2)
def update_elo(elo, model_a, model_b, result):
K = 32
ra, rb = elo.get(model_a, 1500), elo.get(model_b, 1500)
ea = 1 / (1 + 10 ** ((rb - ra) / 400))
eb = 1 / (1 + 10 ** ((ra - rb) / 400))
if result == "a":
sa, sb = 1, 0
elif result == "b":
sa, sb = 0, 1
else:
sa, sb = 0.5, 0.5
elo[model_a] = ra + K * (sa - ea)
elo[model_b] = rb + K * (sb - eb)
save_elo(elo)
return elo[model_a], elo[model_b]
def save_vote(data):
vote = {"id": str(uuid.uuid4())[:8], "timestamp": datetime.now().isoformat(), **data}
with open(VOTES_FILE, "a") as f:
f.write(json.dumps(vote, ensure_ascii=False) + "\n")
return vote["id"]
def load_votes():
try:
with open(VOTES_FILE, "r") as f:
return [json.loads(line) for line in f if line.strip()]
except:
return []
def get_leaderboard():
elo = load_elo()
votes = load_votes()
stats = {}
for v in votes:
ma, mb, res = v.get("model_a"), v.get("model_b"), v.get("vote")
if not ma or not mb or res == "skip":
continue
for m in [ma, mb]:
if m not in stats:
stats[m] = {"wins": 0, "losses": 0, "ties": 0}
if res == "a":
stats[ma]["wins"] += 1
stats[mb]["losses"] += 1
elif res == "b":
stats[mb]["wins"] += 1
stats[ma]["losses"] += 1
else:
stats[ma]["ties"] += 1
stats[mb]["ties"] += 1
rows = []
for i, (m, e) in enumerate(sorted(elo.items(), key=lambda x: -x[1]), 1):
s = stats.get(m, {"wins": 0, "losses": 0, "ties": 0})
total = s["wins"] + s["losses"] + s["ties"]
wr = f"{s['wins']/total*100:.1f}%" if total > 0 else "-"
info = MODELS.get(m, {})
rows.append([i, info.get("desc", m), info.get("size", "?"), int(e), s["wins"], s["losses"], s["ties"], wr])
return rows
# ============================================================
# UI ํ•ธ๋“ค๋Ÿฌ
# ============================================================
model_list = [(f"[{v['size']}] {v['desc']}", k) for k, v in MODELS.items()]
char_list = list(CHARACTERS.keys())
scenario_list = [(f"[{s['cat']}] {s['text'][:30]}...", s['id']) for s in SCENARIOS]
current_state = {"model_a": None, "model_b": None, "resp_a": None, "resp_b": None, "char": None, "input": None}
def random_models():
selected = random.sample(list(MODELS.keys()), 2)
return selected[0], selected[1]
def load_scenario(scenario_id, character):
s = next((x for x in SCENARIOS if x["id"] == scenario_id), None)
if s:
return s["text"].replace("{char}", character)
return ""
def random_scenario(character):
s = random.choice(SCENARIOS)
return s["text"].replace("{char}", character), s["id"]
def parse_response(response: str):
"""Parse response to separate thinking and content"""
think_match = re.search(r'<think>(.*?)</think>', response, re.DOTALL)
if think_match:
thinking = think_match.group(1).strip()
content = re.sub(r'<think>.*?</think>', '', response, flags=re.DOTALL).strip()
return thinking, content
return "", response
def generate(model_a, model_b, character, user_msg, progress=gr.Progress()):
if not user_msg.strip():
return "๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”", "", "", "๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”", "", ""
system_prompt = build_system_prompt(character)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_msg},
]
# Generate from Model A
progress(0.2, desc=f"Model A ({model_a}) ์ƒ์„ฑ ์ค‘...")
resp_a = model_manager.generate(model_a, messages)
think_a, clean_a = parse_response(resp_a)
# Generate from Model B
progress(0.6, desc=f"Model B ({model_b}) ์ƒ์„ฑ ์ค‘...")
resp_b = model_manager.generate(model_b, messages)
think_b, clean_b = parse_response(resp_b)
# Update state
current_state.update({
"model_a": model_a, "model_b": model_b,
"resp_a": resp_a, "resp_b": resp_b,
"char": character, "input": user_msg
})
mode = "GPU" if GPU_AVAILABLE else "Mock"
return (
think_a or "(์—†์Œ)", clean_a, f"{mode} | {MODELS[model_a]['size']}",
think_b or "(์—†์Œ)", clean_b, f"{mode} | {MODELS[model_b]['size']}"
)
def vote(vote_type, reason):
if not current_state["model_a"]:
return "๋จผ์ € ์‘๋‹ต์„ ์ƒ์„ฑํ•ด์ฃผ์„ธ์š”."
elo = load_elo()
vid = save_vote({
"model_a": current_state["model_a"],
"model_b": current_state["model_b"],
"character": current_state["char"],
"user_input": current_state["input"],
"vote": vote_type,
"reason": reason,
})
if vote_type != "skip":
new_a, new_b = update_elo(elo, current_state["model_a"], current_state["model_b"], vote_type)
return f"ํˆฌํ‘œ ์™„๋ฃŒ! (ID: {vid})\nELO: {current_state['model_a']}={int(new_a)}, {current_state['model_b']}={int(new_b)}"
return f"์Šคํ‚ต๋จ (ID: {vid})"
def refresh_leaderboard():
return get_leaderboard()
def get_vote_summary():
votes = load_votes()
total = len(votes)
a_wins = sum(1 for v in votes if v.get("vote") == "a")
b_wins = sum(1 for v in votes if v.get("vote") == "b")
ties = sum(1 for v in votes if v.get("vote") == "tie")
return str(total), str(a_wins), str(b_wins), str(ties)
# ============================================================
# Gradio UI
# ============================================================
with gr.Blocks(title="KAIdol A/B Test Arena", theme=gr.themes.Soft()) as demo:
gr.Markdown("# KAIdol A/B Test Arena")
gr.Markdown("K-pop ์•„์ด๋Œ ๋กคํ”Œ๋ ˆ์ด ๋ชจ๋ธ A/B ๋น„๊ต ํ‰๊ฐ€ (์†Œํ˜• Student ๋ชจ๋ธ 11๊ฐœ)")
# GPU ์ƒํƒœ ์ƒ์„ธ ์ •๋ณด
if IMPORT_ERROR:
mode_text = f"**Mock ๋ชจ๋“œ**: Import Error - {IMPORT_ERROR}"
elif TORCH_AVAILABLE and torch is not None:
torch_ver = torch.__version__
cuda_avail = torch.cuda.is_available()
cuda_ver = torch.version.cuda if cuda_avail else "N/A"
gpu_name = torch.cuda.get_device_name(0) if cuda_avail else "N/A"
mode_text = f"**GPU ๋ชจ๋“œ**: {gpu_name} (CUDA {cuda_ver}, PyTorch {torch_ver})" if cuda_avail else f"**Mock ๋ชจ๋“œ**: CUDA not available (PyTorch {torch_ver})"
else:
mode_text = "**Mock ๋ชจ๋“œ**: PyTorch not loaded"
gr.Markdown(mode_text)
with gr.Tabs():
# A/B Arena ํƒญ
with gr.Tab("A/B Arena"):
with gr.Row():
character = gr.Dropdown(choices=char_list, value="๊ฐ•์œจ", label="์บ๋ฆญํ„ฐ")
scenario = gr.Dropdown(choices=scenario_list, label="์‹œ๋‚˜๋ฆฌ์˜ค")
with gr.Row():
model_a = gr.Dropdown(choices=model_list, value=list(MODELS.keys())[0], label="Model A")
model_b = gr.Dropdown(choices=model_list, value=list(MODELS.keys())[1], label="Model B")
random_btn = gr.Button("๋žœ๋ค", size="sm")
with gr.Row():
with gr.Column():
gr.Markdown("### Model A")
with gr.Accordion("Thinking", open=False):
think_a = gr.Markdown()
resp_a = gr.Textbox(label="์‘๋‹ต", lines=5)
meta_a = gr.Markdown()
with gr.Column():
gr.Markdown("### Model B")
with gr.Accordion("Thinking", open=False):
think_b = gr.Markdown()
resp_b = gr.Textbox(label="์‘๋‹ต", lines=5)
meta_b = gr.Markdown()
user_input = gr.Textbox(label="๋ฉ”์‹œ์ง€", placeholder="์•„์ด๋Œ์—๊ฒŒ ๋ฉ”์‹œ์ง€๋ฅผ ๋ณด๋‚ด์„ธ์š”...")
with gr.Row():
random_scenario_btn = gr.Button("๋žœ๋ค ์‹œ๋‚˜๋ฆฌ์˜ค")
submit_btn = gr.Button("์ „์†ก", variant="primary")
gr.Markdown("### ํˆฌํ‘œ")
with gr.Row():
vote_a = gr.Button("A๊ฐ€ ๋” ์ข‹์Œ")
vote_tie = gr.Button("๋น„์Šทํ•จ")
vote_b = gr.Button("B๊ฐ€ ๋” ์ข‹์Œ")
vote_skip = gr.Button("์Šคํ‚ต")
vote_reason = gr.Textbox(label="ํˆฌํ‘œ ์ด์œ  (์„ ํƒ)", placeholder="...")
vote_result = gr.Markdown()
# Events
random_btn.click(random_models, outputs=[model_a, model_b])
scenario.change(load_scenario, [scenario, character], user_input)
random_scenario_btn.click(random_scenario, [character], [user_input, scenario])
submit_btn.click(generate, [model_a, model_b, character, user_input],
[think_a, resp_a, meta_a, think_b, resp_b, meta_b])
vote_a.click(lambda r: vote("a", r), [vote_reason], vote_result)
vote_b.click(lambda r: vote("b", r), [vote_reason], vote_result)
vote_tie.click(lambda r: vote("tie", r), [vote_reason], vote_result)
vote_skip.click(lambda r: vote("skip", r), [vote_reason], vote_result)
# Leaderboard ํƒญ
with gr.Tab("Leaderboard"):
gr.Markdown("## ELO ๋ฆฌ๋”๋ณด๋“œ")
refresh_btn = gr.Button("์ƒˆ๋กœ๊ณ ์นจ")
leaderboard = gr.Dataframe(
headers=["์ˆœ์œ„", "๋ชจ๋ธ", "ํฌ๊ธฐ", "ELO", "์Šน", "ํŒจ", "๋ฌด", "์Šน๋ฅ "],
datatype=["number", "str", "str", "number", "number", "number", "number", "str"],
)
gr.Markdown("### ํˆฌํ‘œ ์š”์•ฝ")
with gr.Row():
total_v = gr.Textbox(label="์ด ํˆฌํ‘œ", interactive=False)
a_wins_v = gr.Textbox(label="A ์Šน", interactive=False)
b_wins_v = gr.Textbox(label="B ์Šน", interactive=False)
ties_v = gr.Textbox(label="๋ฌด์Šน๋ถ€", interactive=False)
def refresh():
lb = refresh_leaderboard()
summary = get_vote_summary()
return lb, *summary
refresh_btn.click(refresh, outputs=[leaderboard, total_v, a_wins_v, b_wins_v, ties_v])
# ๋ชจ๋ธ ๋ชฉ๋ก ํƒญ
with gr.Tab("๋ชจ๋ธ ๋ชฉ๋ก"):
gr.Markdown("## ํ…Œ์ŠคํŠธ ๋Œ€์ƒ ๋ชจ๋ธ")
gr.Markdown(f"์ด {len(MODELS)}๊ฐœ ๋ชจ๋ธ")
model_table = gr.Dataframe(
headers=["๋ชจ๋ธ ID", "ํฌ๊ธฐ", "ํ•™์Šต ๋ฐฉ๋ฒ•", "์„ค๋ช…", "Base Model"],
value=[[k, v["size"], v["method"], v["desc"], v["base_model"]] for k, v in MODELS.items()],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)