import argparse import os import alibabacloud_oss_v2 as oss def main(): parser = argparse.ArgumentParser(description="批量上传JSONL文件到OSS") # 必需参数 parser.add_argument('--region', required=True, help='OSS存储空间所在区域') parser.add_argument('--bucket', required=True, help='目标存储空间名称') parser.add_argument('--key', required=True, help='OSS目标文件夹路径(如:/data)') parser.add_argument('--file_path', required=True, help='本地包含JSONL文件的文件夹路径') # 可选参数 parser.add_argument('--endpoint', help='自定义访问端点') args = parser.parse_args() # 验证文件路径有效性 if not os.path.isdir(args.file_path): raise ValueError(f"无效的目录路径: {args.file_path}") # 收集所有JSONL文件 jsonl_files = [] for filename in os.listdir(args.file_path): if filename.endswith('.jsonl'): full_path = os.path.join(args.file_path, filename) if os.path.isfile(full_path): jsonl_files.append((full_path, filename)) if not jsonl_files: print("未找到任何JSONL文件") return # 初始化OSS配置 credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider() cfg = oss.config.load_default() cfg.credentials_provider = credentials_provider cfg.region = args.region if args.endpoint: cfg.endpoint = args.endpoint # 创建OSS客户端 client = oss.Client(cfg) uploader = client.uploader() # 处理OSS路径格式 base_key = args.key.rstrip('/') # 批量上传 for local_path, filename in jsonl_files: oss_key = f"{base_key}/{filename}" if base_key else filename try: result = uploader.upload_file( oss.PutObjectRequest( bucket=args.bucket, key=oss_key, ), filepath=local_path ) # 输出上传结果 print(f" 成功上传 {filename}") print(f" OSS路径: {oss_key}") print(f" 状态码: {result.status_code}") print(f" 请求ID: {result.request_id}") print(f" ETag: {result.etag}\n") except Exception as e: print(f" 上传失败 {filename}") print(f" 错误信息: {str(e)}\n") if __name__ == "__main__": main()