humanpreference / summarize_votes.py
hongweiyi's picture
Upload folder using huggingface_hub
89b5bcd verified
import argparse
import json
from pathlib import Path
from typing import Dict, List, Optional
def normalize_model_label(label: Optional[str]) -> Optional[str]:
if label is None:
return None
text = label.strip().lower()
if not text:
return None
if text == "base":
return "base"
if text == "lora":
return "lora"
return text
def summarize_votes(votes: List[Dict]) -> Dict[str, float]:
base_count = 0
lora_count = 0
tie_count = 0
other_count = 0
for entry in votes:
choice = (entry.get("choice") or "").strip().lower()
display_left = normalize_model_label(
entry.get("display_left_model") or entry.get("left_model")
)
display_right = normalize_model_label(
entry.get("display_right_model") or entry.get("right_model")
)
if choice == "tie":
tie_count += 1
continue
if choice == "left":
winner = display_left
elif choice == "right":
winner = display_right
else:
other_count += 1
continue
if winner == "base":
base_count += 1
elif winner == "lora":
lora_count += 1
else:
other_count += 1
total = base_count + lora_count + tie_count + other_count
if total == 0:
return {
"base": 0,
"lora": 0,
"tie": 0,
"other": 0,
"total": 0,
}
def pct(value: int) -> float:
return round((value / total) * 100.0, 2)
return {
"base": base_count,
"base_pct": pct(base_count),
"lora": lora_count,
"lora_pct": pct(lora_count),
"tie": tie_count,
"tie_pct": pct(tie_count),
"other": other_count,
"other_pct": pct(other_count),
"total": total,
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Summarize vote results into base/lora/tie counts and percentages.",
)
parser.add_argument(
"--votes",
type=Path,
default=Path("votes.json"),
help="Path to votes JSON file (default: ./votes.json).",
)
return parser.parse_args()
def main() -> None:
args = parse_args()
votes_path = args.votes
if not votes_path.exists():
raise FileNotFoundError(f"Votes file not found: {votes_path}")
with votes_path.open("r", encoding="utf-8") as f:
content = f.read().strip()
if not content:
print("No votes recorded.")
return
data = json.loads(content)
if not isinstance(data, list):
raise ValueError("Votes file must contain a JSON array.")
summary = summarize_votes(data)
if summary["total"] == 0:
print("No votes classified.")
return
print(f"Total votes: {summary['total']}")
print(f"Base preference: {summary['base']} ({summary['base_pct']}%)")
print(f"Lora preference: {summary['lora']} ({summary['lora_pct']}%)")
print(f"Tie: {summary['tie']} ({summary['tie_pct']}%)")
if summary["other"]:
print(f"Unclassified: {summary['other']} ({summary['other_pct']}%)")
if __name__ == "__main__":
main()