| | import argparse |
| | import os |
| | import alibabacloud_oss_v2 as oss |
| |
|
| | def main(): |
| | parser = argparse.ArgumentParser(description="批量上传JSONL文件到OSS") |
| | |
| | |
| | parser.add_argument('--region', required=True, help='OSS存储空间所在区域') |
| | parser.add_argument('--bucket', required=True, help='目标存储空间名称') |
| | parser.add_argument('--key', required=True, help='OSS目标文件夹路径(如:/data)') |
| | parser.add_argument('--file_path', required=True, help='本地包含JSONL文件的文件夹路径') |
| | |
| | |
| | parser.add_argument('--endpoint', help='自定义访问端点') |
| | |
| | args = parser.parse_args() |
| |
|
| | |
| | if not os.path.isdir(args.file_path): |
| | raise ValueError(f"无效的目录路径: {args.file_path}") |
| |
|
| | |
| | jsonl_files = [] |
| | for filename in os.listdir(args.file_path): |
| | if filename.endswith('.jsonl'): |
| | full_path = os.path.join(args.file_path, filename) |
| | if os.path.isfile(full_path): |
| | jsonl_files.append((full_path, filename)) |
| |
|
| | if not jsonl_files: |
| | print("未找到任何JSONL文件") |
| | return |
| |
|
| | |
| | credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider() |
| | cfg = oss.config.load_default() |
| | cfg.credentials_provider = credentials_provider |
| | cfg.region = args.region |
| | if args.endpoint: |
| | cfg.endpoint = args.endpoint |
| |
|
| | |
| | client = oss.Client(cfg) |
| | uploader = client.uploader() |
| |
|
| | |
| | base_key = args.key.rstrip('/') |
| | |
| | |
| | for local_path, filename in jsonl_files: |
| | oss_key = f"{base_key}/{filename}" if base_key else filename |
| | |
| | try: |
| | result = uploader.upload_file( |
| | oss.PutObjectRequest( |
| | bucket=args.bucket, |
| | key=oss_key, |
| | ), |
| | filepath=local_path |
| | ) |
| | |
| | |
| | print(f" 成功上传 {filename}") |
| | print(f" OSS路径: {oss_key}") |
| | print(f" 状态码: {result.status_code}") |
| | print(f" 请求ID: {result.request_id}") |
| | print(f" ETag: {result.etag}\n") |
| | |
| | except Exception as e: |
| | print(f" 上传失败 {filename}") |
| | print(f" 错误信息: {str(e)}\n") |
| |
|
| | if __name__ == "__main__": |
| | main() |