File size: 1,325 Bytes
7c50656 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | #!/usr/bin/env python3
"""
下载并转换 MT-Bench 数据到本地目录
"""
import json
import os
import requests
# 目标目录
DATA_DIR = "/workspace/hanrui/datasets/mtbench"
os.makedirs(DATA_DIR, exist_ok=True)
# 下载 MT-Bench 问题数据
url = "https://raw.githubusercontent.com/lm-sys/FastChat/main/fastchat/llm_judge/data/mt_bench/question.jsonl"
output_file = os.path.join(DATA_DIR, "question.jsonl")
print(f"Downloading MT-Bench questions from {url}")
print(f"Saving to {output_file}")
try:
# 使用代理下载
proxies = {
'http': 'http://10.1.2.1:7890',
'https': 'http://10.1.2.1:7890',
}
response = requests.get(url, proxies=proxies, timeout=30)
response.raise_for_status()
with open(output_file, 'wb') as f:
f.write(response.content)
print(f"✓ Downloaded successfully")
# 验证数据
with open(output_file, 'r') as f:
lines = f.readlines()
print(f"✓ Total questions: {len(lines)}")
# 显示第一个问题
first_question = json.loads(lines[0])
print(f"\nFirst question:")
print(json.dumps(first_question, indent=2))
except Exception as e:
print(f"✗ Download failed: {e}")
print(f"\nPlease manually download from:")
print(f" {url}")
print(f"And save to:")
print(f" {output_file}")
|