dataset-builder / data3 /download_and_process.sh
DouDou
Upload data3/download_and_process.sh with huggingface_hub
30c724d verified
#!/bin/bash
# 下载和处理批处理结果
set -e
BATCH_ID_FILE="batch_id.txt"
BATCH_RESULTS_RAW="batch_results_raw.jsonl"
FINAL_OUTPUT="programming_problems_batch.jsonl"
MODEL="gpt-4o-mini"
# 获取 Batch ID
if [ -n "$1" ]; then
BATCH_ID=$1
elif [ -f "$BATCH_ID_FILE" ]; then
BATCH_ID=$(cat $BATCH_ID_FILE)
else
echo "❌ 错误: 请提供 Batch ID"
echo "用法: $0 <batch_id>"
exit 1
fi
echo "⬇️ 下载和处理批处理结果"
echo "========================================"
echo "Batch ID: $BATCH_ID"
echo ""
# 检查状态
echo "检查批处理状态..."
python3 generate_problems_batch.py status $BATCH_ID
echo ""
read -p "👉 确认下载? (y/n) " -n 1 -r
echo ""
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
exit 0
fi
# 下载
echo ""
echo "⬇️ 下载结果..."
python3 generate_problems_batch.py download $BATCH_ID \
--output $BATCH_RESULTS_RAW
echo ""
echo "✅ 原始结果已下载: $BATCH_RESULTS_RAW"
# 处理
echo ""
echo "📊 处理结果..."
python3 generate_problems_batch.py process \
--input $BATCH_RESULTS_RAW \
--output $FINAL_OUTPUT \
--model $MODEL \
--requests batch_requests_full.jsonl
echo ""
echo "========================================"
echo "✅ 完成!"
echo "========================================"
echo "最终结果: $FINAL_OUTPUT"
echo ""
echo "查看结果:"
echo " wc -l $FINAL_OUTPUT"
echo " head -1 $FINAL_OUTPUT | python3 -m json.tool"