|
|
import os |
|
|
import json |
|
|
from openai import OpenAI |
|
|
|
|
|
def batch_download_files(): |
|
|
|
|
|
client = OpenAI( |
|
|
api_key=os.getenv("DASHSCOPE_API_KEY"), |
|
|
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", |
|
|
) |
|
|
|
|
|
try: |
|
|
|
|
|
input_jsonl = input("请输入包含batch任务状态的jsonl文件路径:") |
|
|
output_dir = input("请输入保存文件的目录路径:") |
|
|
|
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
with open(input_jsonl, 'r', encoding='utf-8') as f: |
|
|
for line_num, line in enumerate(f, 1): |
|
|
try: |
|
|
entry = json.loads(line.strip()) |
|
|
|
|
|
|
|
|
if entry.get('status') == 'completed': |
|
|
file_id = entry.get('output_file_id') |
|
|
if not file_id: |
|
|
print(f"第 {line_num} 行: 缺少output_file_id") |
|
|
continue |
|
|
|
|
|
|
|
|
content = client.files.content(file_id=file_id) |
|
|
|
|
|
|
|
|
filename = f"{file_id}.jsonl" |
|
|
save_path = os.path.join(output_dir, filename) |
|
|
|
|
|
|
|
|
content.write_to_file(save_path) |
|
|
print(f"成功保存: {filename} -> {save_path}") |
|
|
|
|
|
else: |
|
|
print(f"第 {line_num} 行: 状态未完成(当前状态:{entry.get('status')})") |
|
|
|
|
|
except json.JSONDecodeError: |
|
|
print(f"第 {line_num} 行: JSON解析失败") |
|
|
except Exception as e: |
|
|
print(f"第 {line_num} 行: 发生错误 - {str(e)}") |
|
|
|
|
|
except FileNotFoundError: |
|
|
print("错误:输入文件不存在") |
|
|
except Exception as e: |
|
|
print(f"发生未预期的错误: {str(e)}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
batch_download_files() |