tools / utils /upload /batch_download.py
Adinosaur's picture
Upload folder using huggingface_hub
1c980b1 verified
import os
import json
from openai import OpenAI
def batch_download_files():
# 初始化客户端
client = OpenAI(
api_key=os.getenv("DASHSCOPE_API_KEY"),
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
try:
# 获取用户输入
input_jsonl = input("请输入包含batch任务状态的jsonl文件路径:")
output_dir = input("请输入保存文件的目录路径:")
# 创建输出目录(如果不存在)
os.makedirs(output_dir, exist_ok=True)
# 读取并处理jsonl文件
with open(input_jsonl, 'r', encoding='utf-8') as f:
for line_num, line in enumerate(f, 1):
try:
entry = json.loads(line.strip())
# 检查状态和文件ID
if entry.get('status') == 'completed':
file_id = entry.get('output_file_id')
if not file_id:
print(f"第 {line_num} 行: 缺少output_file_id")
continue
# 下载文件内容
content = client.files.content(file_id=file_id)
# 构建保存路径
filename = f"{file_id}.jsonl"
save_path = os.path.join(output_dir, filename)
# 保存文件
content.write_to_file(save_path)
print(f"成功保存: {filename} -> {save_path}")
else:
print(f"第 {line_num} 行: 状态未完成(当前状态:{entry.get('status')})")
except json.JSONDecodeError:
print(f"第 {line_num} 行: JSON解析失败")
except Exception as e:
print(f"第 {line_num} 行: 发生错误 - {str(e)}")
except FileNotFoundError:
print("错误:输入文件不存在")
except Exception as e:
print(f"发生未预期的错误: {str(e)}")
if __name__ == "__main__":
batch_download_files()