File size: 3,317 Bytes
2aa06ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/bin/bash
# 快速估算不同预算可以生成多少数据

echo "💰 OpenAI API 预算估算工具"
echo "=========================================="
echo ""
echo "基于您的测试结果:"
echo "  - 2个样本成本: \$0.001819"
echo "  - 平均每样本: \$0.0009095"
echo ""

# 从测试结果计算的平均值
AVG_INPUT_TOKENS=1917
AVG_OUTPUT_TOKENS=2552

# 标准 API 定价 (gpt-4o-mini)
STANDARD_INPUT_PRICE=0.15  # per 1M tokens
STANDARD_OUTPUT_PRICE=0.60 # per 1M tokens

# Batch API 定价 (50% off)
BATCH_INPUT_PRICE=0.075    # per 1M tokens
BATCH_OUTPUT_PRICE=0.30    # per 1M tokens

echo "📊 不同预算对比:"
echo "=========================================="
printf "%-15s %-15s %-15s %-15s\n" "预算" "标准API" "Batch API" "节省"
echo "----------------------------------------"

for BUDGET in 1 5 10 20 50 100; do
    # 计算标准 API 能生成多少
    STANDARD_COUNT=$(python3 -c "
import math
avg_cost_per_sample = ($AVG_INPUT_TOKENS * $STANDARD_INPUT_PRICE / 1_000_000) + ($AVG_OUTPUT_TOKENS * $STANDARD_OUTPUT_PRICE / 1_000_000)
print(int($BUDGET / avg_cost_per_sample))
")
    
    # 计算 Batch API 能生成多少
    BATCH_COUNT=$(python3 -c "
import math
avg_cost_per_sample = ($AVG_INPUT_TOKENS * $BATCH_INPUT_PRICE / 1_000_000) + ($AVG_OUTPUT_TOKENS * $BATCH_OUTPUT_PRICE / 1_000_000)
print(int($BUDGET / avg_cost_per_sample))
")
    
    SAVINGS=$((BATCH_COUNT - STANDARD_COUNT))
    
    printf "%-15s %-15s %-15s %-15s\n" "\$$BUDGET" "$STANDARD_COUNT" "$BATCH_COUNT" "+$SAVINGS"
done

echo ""
echo "🎯 推荐配置 (基于 \$10 预算):"
echo "=========================================="

# 估算 $10 预算下的详细信息
python3 -c "
budget = 10.0
avg_input = $AVG_INPUT_TOKENS
avg_output = $AVG_OUTPUT_TOKENS

# Batch API
batch_input_price = $BATCH_INPUT_PRICE / 1_000_000
batch_output_price = $BATCH_OUTPUT_PRICE / 1_000_000
batch_cost_per_sample = (avg_input * batch_input_price) + (avg_output * batch_output_price)
batch_samples = int(budget / batch_cost_per_sample)

# Standard API  
std_input_price = $STANDARD_INPUT_PRICE / 1_000_000
std_output_price = $STANDARD_OUTPUT_PRICE / 1_000_000
std_cost_per_sample = (avg_input * std_input_price) + (avg_output * std_output_price)
std_samples = int(budget / std_cost_per_sample)

print(f'使用 Batch API:')
print(f'  - 可生成样本数: {batch_samples:,}')
print(f'  - 每样本成本: \${batch_cost_per_sample:.6f}')
print(f'  - 总输入tokens: {batch_samples * avg_input:,}')
print(f'  - 总输出tokens: {batch_samples * avg_output:,}')
print(f'')
print(f'使用标准 API:')
print(f'  - 可生成样本数: {std_samples:,}')
print(f'  - 每样本成本: \${std_cost_per_sample:.6f}')
print(f'')
print(f'💰 节省:')
print(f'  - 多生成样本: {batch_samples - std_samples:,} ({((batch_samples - std_samples) / std_samples * 100):.1f}%)')
print(f'  - 节省金额: \${budget * 0.5:.2f} (50%)')
"

echo ""
echo "📝 使用建议:"
echo "=========================================="
echo "1. 先小规模测试 (100-1000 样本)"
echo "2. 确认质量后再大规模生成"
echo "3. 使用 --min-score 90+ 保证高质量"
echo "4. Batch API 处理时间: 通常几小时内完成"
echo ""
echo "💡 运行估算命令:"
echo "   python3 generate_problems_batch.py estimate --num-requests 20000"