File size: 2,516 Bytes
1c980b1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | import argparse
import os
import alibabacloud_oss_v2 as oss
def main():
parser = argparse.ArgumentParser(description="批量上传JSONL文件到OSS")
# 必需参数
parser.add_argument('--region', required=True, help='OSS存储空间所在区域')
parser.add_argument('--bucket', required=True, help='目标存储空间名称')
parser.add_argument('--key', required=True, help='OSS目标文件夹路径(如:/data)')
parser.add_argument('--file_path', required=True, help='本地包含JSONL文件的文件夹路径')
# 可选参数
parser.add_argument('--endpoint', help='自定义访问端点')
args = parser.parse_args()
# 验证文件路径有效性
if not os.path.isdir(args.file_path):
raise ValueError(f"无效的目录路径: {args.file_path}")
# 收集所有JSONL文件
jsonl_files = []
for filename in os.listdir(args.file_path):
if filename.endswith('.jsonl'):
full_path = os.path.join(args.file_path, filename)
if os.path.isfile(full_path):
jsonl_files.append((full_path, filename))
if not jsonl_files:
print("未找到任何JSONL文件")
return
# 初始化OSS配置
credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider()
cfg = oss.config.load_default()
cfg.credentials_provider = credentials_provider
cfg.region = args.region
if args.endpoint:
cfg.endpoint = args.endpoint
# 创建OSS客户端
client = oss.Client(cfg)
uploader = client.uploader()
# 处理OSS路径格式
base_key = args.key.rstrip('/')
# 批量上传
for local_path, filename in jsonl_files:
oss_key = f"{base_key}/{filename}" if base_key else filename
try:
result = uploader.upload_file(
oss.PutObjectRequest(
bucket=args.bucket,
key=oss_key,
),
filepath=local_path
)
# 输出上传结果
print(f" 成功上传 {filename}")
print(f" OSS路径: {oss_key}")
print(f" 状态码: {result.status_code}")
print(f" 请求ID: {result.request_id}")
print(f" ETag: {result.etag}\n")
except Exception as e:
print(f" 上传失败 {filename}")
print(f" 错误信息: {str(e)}\n")
if __name__ == "__main__":
main() |