File size: 1,325 Bytes
7c50656
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python3
"""
下载并转换 MT-Bench 数据到本地目录
"""
import json
import os
import requests

# 目标目录
DATA_DIR = "/workspace/hanrui/datasets/mtbench"
os.makedirs(DATA_DIR, exist_ok=True)

# 下载 MT-Bench 问题数据
url = "https://raw.githubusercontent.com/lm-sys/FastChat/main/fastchat/llm_judge/data/mt_bench/question.jsonl"
output_file = os.path.join(DATA_DIR, "question.jsonl")

print(f"Downloading MT-Bench questions from {url}")
print(f"Saving to {output_file}")

try:
    # 使用代理下载
    proxies = {
        'http': 'http://10.1.2.1:7890',
        'https': 'http://10.1.2.1:7890',
    }

    response = requests.get(url, proxies=proxies, timeout=30)
    response.raise_for_status()

    with open(output_file, 'wb') as f:
        f.write(response.content)

    print(f"✓ Downloaded successfully")

    # 验证数据
    with open(output_file, 'r') as f:
        lines = f.readlines()

    print(f"✓ Total questions: {len(lines)}")

    # 显示第一个问题
    first_question = json.loads(lines[0])
    print(f"\nFirst question:")
    print(json.dumps(first_question, indent=2))

except Exception as e:
    print(f"✗ Download failed: {e}")
    print(f"\nPlease manually download from:")
    print(f"  {url}")
    print(f"And save to:")
    print(f"  {output_file}")