asdjghh commited on
Commit
734afcf
·
verified ·
1 Parent(s): 6436700

Upload image_edit.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. image_edit.py +521 -0
image_edit.py ADDED
@@ -0,0 +1,521 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Copyright 2025 PKU-Alignment Team. All Rights Reserved.
2
+ # #
3
+ # # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # # you may not use this file except in compliance with the License.
5
+ # # You may obtain a copy of the License at
6
+ # #
7
+ # # http://www.apache.org/licenses/LICENSE-2.0
8
+ # #
9
+ # # Unless required by applicable law or agreed to in writing, software
10
+ # # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # # See the License for the specific language governing permissions and
13
+ # # limitations under the License.
14
+ # # ==============================================================================
15
+ # import argparse
16
+ # import json
17
+ # import os
18
+ # import uuid
19
+ #
20
+ # import requests
21
+ # import torch
22
+ # import torch.multiprocessing as mp
23
+ # from janus.models import MultiModalityCausalLM, VLChatProcessor, VLMImageProcessor
24
+ # from PIL import Image
25
+ # from tqdm import tqdm
26
+ #
27
+ # from align_anything.utils.device_utils import set_device, torch_gc
28
+ #
29
+ #
30
+ # ignore_index = -100
31
+ #
32
+ #
33
+ # def load_image(image_path: str):
34
+ # try:
35
+ # if image_path.startswith('http'):
36
+ # image = Image.open(requests.get(image_path, stream=True).raw).convert('RGB')
37
+ # else:
38
+ # image = Image.open(image_path).convert('RGB')
39
+ # return image
40
+ # except Exception as e:
41
+ # print(f'Error occurred when dealing with {image_path}: {e}')
42
+ # raise Exception
43
+ #
44
+ #
45
+ # def format_sample_janus(piece, vl_chat_processor):
46
+ # sample = {
47
+ # 'input_text': piece['prompt'],
48
+ # 'source_image': load_image(piece['source_image']),
49
+ # 'output_image': load_image(piece['image']),
50
+ # }
51
+ # return sample
52
+ #
53
+ #
54
+ # def tokenize_sample(vl_chat_processor, vl_gpt, vl_image_processor, formatted_sample):
55
+ # input_img_tokens = (vl_chat_processor.image_start_tag +
56
+ # vl_chat_processor.image_tag * vl_chat_processor.num_image_tokens
57
+ # + vl_chat_processor.image_end_tag +
58
+ # vl_chat_processor.image_start_tag +
59
+ # vl_chat_processor.pad_tag * vl_chat_processor.num_image_tokens +
60
+ # vl_chat_processor.image_end_tag)
61
+ # output_img_tokens = vl_chat_processor.image_start_tag
62
+ # prompts = input_img_tokens + formatted_sample['input_text']
63
+ #
64
+ # conversation = [
65
+ # {'role': 'User', 'content': prompts},
66
+ # {'role': 'Assistant', 'content': ''},
67
+ # ]
68
+ # sft_format = vl_chat_processor.apply_sft_template_for_multi_turn_prompts(
69
+ # conversations=conversation,
70
+ # sft_format=vl_chat_processor.sft_format,
71
+ # system_prompt='',
72
+ # )
73
+ # # sft_format = sft_format + output_img_tokens
74
+ #
75
+ # prompt = sft_format + vl_chat_processor.image_start_tag
76
+ # input_ids = vl_chat_processor.tokenizer.encode(prompt)
77
+ # input_ids = torch.LongTensor(input_ids).to(vl_gpt.device)
78
+ #
79
+ # pixel_values = (
80
+ # vl_image_processor([formatted_sample['output_image']], return_tensors='pt')['pixel_values']
81
+ # .to(vl_gpt.device)
82
+ # .to(torch.bfloat16)
83
+ # )
84
+ # (
85
+ # quant,
86
+ # (vq_loss, commit_loss, entropy_loss),
87
+ # (perplexity, min_encodings, min_encoding_indices),
88
+ # ) = vl_gpt.gen_vision_model.encode(pixel_values)
89
+ # full_input_ids = torch.cat([input_ids, min_encoding_indices])
90
+ # labels = full_input_ids.clone()
91
+ # labels[: len(input_ids)] = ignore_index
92
+ #
93
+ # return {
94
+ # 'input_ids': full_input_ids.to('cpu'),
95
+ # 'labels': labels.to('cpu'),
96
+ # 'task': 'generation',
97
+ # }
98
+ #
99
+ #
100
+ # def process_data(gpu, chunk, model_path, output_paths, cache_path):
101
+ # device = set_device(gpu)
102
+ # print(f'Initializing Model on {device}')
103
+ # vl_chat_processor = VLChatProcessor.from_pretrained(model_path, device=device)
104
+ # vl_gpt = MultiModalityCausalLM.from_pretrained(model_path, trust_remote_code=True).to(device)
105
+ # vl_gpt = vl_gpt.to(torch.bfloat16).eval()
106
+ # vl_image_processor = VLMImageProcessor.from_pretrained(model_path, device=device)
107
+ #
108
+ # print(f'Finished Initializing Model on {device}')
109
+ #
110
+ # local_output_paths = []
111
+ # for piece in tqdm(chunk, desc=f'Processing on GPU {gpu}'):
112
+ # print(piece)
113
+ # formatted_sample = format_sample_janus(piece, vl_chat_processor)
114
+ # sample = tokenize_sample(vl_chat_processor, vl_gpt, vl_image_processor, formatted_sample)
115
+ # file_name = str(uuid.uuid4()) + '.pt'
116
+ # file_path = os.path.join(cache_path, file_name)
117
+ # torch.save(sample, file_path)
118
+ # local_output_paths.append(file_path)
119
+ # del sample
120
+ # torch_gc()
121
+ #
122
+ # output_paths.extend(local_output_paths)
123
+ # print(f'Processed {len(local_output_paths)} samples on GPU {gpu}')
124
+ #
125
+ #
126
+ # def main():
127
+ # parser = argparse.ArgumentParser()
128
+ # parser.add_argument('--input_path', type=str, required=True)
129
+ # parser.add_argument('--output_path', type=str, required=True)
130
+ # parser.add_argument('--model_path', type=str, required=True)
131
+ # parser.add_argument('--cache_dir', type=str, default='.cache')
132
+ # parser.add_argument('--num_processes', type=int, default=1)
133
+ # parser.add_argument('--num_gpus', type=int, default=2)
134
+ #
135
+ # args = parser.parse_args()
136
+ #
137
+ # input_path = args.input_path
138
+ # output_path = args.output_path
139
+ # model_path = args.model_path
140
+ # cache_path = args.cache_dir
141
+ #
142
+ # # if cache dir does not exist, make one
143
+ # if not os.path.exists(cache_path):
144
+ # os.makedirs(cache_path)
145
+ #
146
+ # with open(input_path) as f:
147
+ # input_data = json.load(f)
148
+ #
149
+ # num_processes = args.num_processes
150
+ # num_gpus = args.num_gpus
151
+ # mp.set_start_method('spawn', force=True)
152
+ # output_paths = mp.Manager().list() # For collecting results from multiple processes
153
+ #
154
+ # target = input_data # add to_list() if you acquire the dataset from load_dataset
155
+ # print(f'Full Length: {len(target)}')
156
+ # chunks = [target[i::num_processes] for i in range(num_processes)]
157
+ #
158
+ # processes = []
159
+ # for id in range(num_processes):
160
+ # gpu = id % num_gpus # This maps process to GPU cyclically
161
+ # p = mp.Process(
162
+ # target=process_data, args=(gpu, chunks[id], model_path, output_paths, '.cache')
163
+ # )
164
+ # p.start()
165
+ # processes.append(p)
166
+ #
167
+ # for p in processes:
168
+ # p.join()
169
+ #
170
+ # output_paths = list(output_paths)
171
+ #
172
+ # all_data = []
173
+ # for path in output_paths:
174
+ # data = torch.load(path)
175
+ # all_data.append(data)
176
+ #
177
+ # torch.set_printoptions(threshold=torch.inf)
178
+ # print(f'Effective Length: {len(all_data)}')
179
+ #
180
+ # torch.save(all_data, output_path)
181
+ #
182
+ #
183
+ # if __name__ == '__main__':
184
+ # main()
185
+ # Copyright 2025 PKU-Alignment Team. All Rights Reserved.
186
+ #
187
+ # Licensed under the Apache License, Version 2.0 (the "License");
188
+ # you may not use this file except in compliance with the License.
189
+ # You may obtain a copy of the License at
190
+ #
191
+ # http://www.apache.org/licenses/LICENSE-2.0
192
+ #
193
+ # Unless required by applicable law or agreed to in writing, software
194
+ # distributed under the License is distributed on an "AS IS" BASIS,
195
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
196
+ # See the License for the specific language governing permissions and
197
+ # limitations under the License.
198
+ # ==============================================================================
199
+ import argparse
200
+ import json
201
+ import os
202
+ import uuid
203
+ from pathlib import Path
204
+
205
+ import requests
206
+ import torch
207
+ import torch.multiprocessing as mp
208
+ from janus.models import MultiModalityCausalLM, VLChatProcessor, VLMImageProcessor
209
+ from PIL import Image
210
+ from tqdm import tqdm
211
+
212
+ from align_anything.utils.device_utils import set_device, torch_gc
213
+
214
+ ignore_index = -100
215
+
216
+
217
+ def safe_torch_save(obj, file_path):
218
+ """安全地保存torch对象,自动创建目录"""
219
+ try:
220
+ # 确保file_path是Path对象
221
+ file_path = Path(file_path)
222
+
223
+ # 创建父目录(如果不存在)
224
+ file_path.parent.mkdir(parents=True, exist_ok=True)
225
+
226
+ # 保存文件
227
+ torch.save(obj, file_path)
228
+ return str(file_path)
229
+
230
+ except Exception as e:
231
+ print(f"❌ 保存失败: {e}")
232
+ print(f"尝试保存到: {file_path}")
233
+
234
+ # 尝试备用路径
235
+ backup_dir = Path.home() / "torch_cache"
236
+ backup_dir.mkdir(parents=True, exist_ok=True)
237
+ backup_path = backup_dir / file_path.name
238
+ torch.save(obj, backup_path)
239
+ print(f"✅ 已保存到备用位置: {backup_path}")
240
+ return str(backup_path)
241
+
242
+
243
+ def load_image(image_path: str):
244
+ try:
245
+ if image_path.startswith('http'):
246
+ image = Image.open(requests.get(image_path, stream=True).raw).convert('RGB')
247
+ else:
248
+ image = Image.open(image_path).convert('RGB')
249
+ return image
250
+ except Exception as e:
251
+ print(f'Error occurred when dealing with {image_path}: {e}')
252
+ raise Exception
253
+
254
+
255
+ def format_sample_janus(piece, vl_chat_processor):
256
+ sample = {
257
+ 'input_text': piece['prompt'],
258
+ 'source_image': piece['source_image'],
259
+ 'output_image': load_image(piece['image']),
260
+ }
261
+ return sample
262
+
263
+
264
+ def tokenize_sample(vl_chat_processor, vl_gpt, vl_image_processor, formatted_sample):
265
+ input_img_tokens = (vl_chat_processor.image_start_tag +
266
+ vl_chat_processor.image_tag * vl_chat_processor.num_image_tokens
267
+ + vl_chat_processor.image_end_tag +
268
+ vl_chat_processor.image_start_tag +
269
+ vl_chat_processor.pad_tag * vl_chat_processor.num_image_tokens +
270
+ vl_chat_processor.image_end_tag)
271
+ output_img_tokens = vl_chat_processor.image_start_tag
272
+ print(f'input_img_tokens: ')
273
+ print(len(input_img_tokens))
274
+ print(vl_chat_processor.image_end_id)
275
+ print(len(vl_chat_processor.image_tag))
276
+ print(vl_chat_processor.image_tag)
277
+ print(len(vl_chat_processor.pad_tag))
278
+ print(f'{vl_chat_processor.image_tag} vl_chat_processor.num_image_tokens :',vl_chat_processor.num_image_tokens)
279
+ print(f'{vl_chat_processor.pad_tag} vl_chat_processor.num_image_tokens :',vl_chat_processor.num_image_tokens)
280
+ print()
281
+ prompts = input_img_tokens + formatted_sample['input_text']
282
+
283
+ conversation = [
284
+ {'role': 'User', 'content': prompts},
285
+ {'role': 'Assistant', 'content': ''},
286
+ ]
287
+ sft_format = vl_chat_processor.apply_sft_template_for_multi_turn_prompts(
288
+ conversations=conversation,
289
+ sft_format=vl_chat_processor.sft_format,
290
+ system_prompt='',
291
+ )
292
+ # sft_format = sft_format + output_img_tokens
293
+
294
+ prompt = sft_format + vl_chat_processor.image_start_tag
295
+ input_ids = vl_chat_processor.tokenizer.encode(prompt)
296
+ input_ids = torch.LongTensor(input_ids).to(vl_gpt.device)
297
+ xpp = (input_ids == vl_chat_processor.image_end_id).nonzero()
298
+ print(xpp)
299
+ print(len(input_ids))
300
+
301
+ pixel_values = (
302
+ vl_image_processor([formatted_sample['output_image']], return_tensors='pt')['pixel_values']
303
+ .to(vl_gpt.device)
304
+ .to(torch.bfloat16)
305
+ )
306
+ (
307
+ quant,
308
+ (vq_loss, commit_loss, entropy_loss),
309
+ (perplexity, min_encodings, min_encoding_indices),
310
+ ) = vl_gpt.gen_vision_model.encode(pixel_values)
311
+ full_input_ids = torch.cat([input_ids, min_encoding_indices])
312
+ labels = full_input_ids.clone()
313
+ labels[: len(input_ids)] = ignore_index
314
+
315
+ return {
316
+ 'input_ids': full_input_ids.to('cpu'),
317
+ 'labels': labels.to('cpu'),
318
+ 'source_image': formatted_sample['source_image'],
319
+ 'task': 'generation',
320
+ }
321
+
322
+
323
+ def process_data(gpu, chunk, model_path, output_paths, cache_path):
324
+ """修复后的process_data函数"""
325
+ try:
326
+ # 确保缓存路径为绝对路径
327
+ cache_path = os.path.abspath(cache_path)
328
+ print(f'GPU {gpu}: 使用缓存路径: {cache_path}')
329
+
330
+ # 在子进程中也确保目录存在
331
+ if not os.path.exists(cache_path):
332
+ try:
333
+ os.makedirs(cache_path, exist_ok=True)
334
+ print(f'GPU {gpu}: 创建缓存目录: {cache_path}')
335
+ except Exception as e:
336
+ print(f'GPU {gpu}: 创建缓存目录失败: {e}')
337
+ # 使用备用目录
338
+ cache_path = os.path.join(os.path.expanduser("~"), "torch_cache")
339
+ os.makedirs(cache_path, exist_ok=True)
340
+ print(f'GPU {gpu}: 使用备用缓存目录: {cache_path}')
341
+
342
+ device = set_device(gpu)
343
+ print(f'Initializing Model on {device}')
344
+
345
+ vl_chat_processor = VLChatProcessor.from_pretrained(model_path, device=device)
346
+ vl_gpt = MultiModalityCausalLM.from_pretrained(model_path, trust_remote_code=True).to(device)
347
+ vl_gpt = vl_gpt.to(torch.bfloat16).eval()
348
+ vl_image_processor = VLMImageProcessor.from_pretrained(model_path, device=device)
349
+
350
+ print(f'Finished Initializing Model on {device}')
351
+
352
+ local_output_paths = []
353
+ for i, piece in enumerate(tqdm(chunk, desc=f'Processing on GPU {gpu}')):
354
+ try:
355
+ print(f'GPU {gpu}: Processing sample {i + 1}/{len(chunk)}')
356
+ formatted_sample = format_sample_janus(piece, vl_chat_processor)
357
+ sample = tokenize_sample(vl_chat_processor, vl_gpt, vl_image_processor, formatted_sample)
358
+
359
+ file_name = f"gpu_{gpu}_{str(uuid.uuid4())}.pt"
360
+ file_path = os.path.join(cache_path, file_name)
361
+
362
+ # 使用安全保存函数
363
+ saved_path = safe_torch_save(sample, file_path)
364
+ local_output_paths.append(saved_path)
365
+
366
+ del sample
367
+ torch_gc()
368
+
369
+ except Exception as e:
370
+ print(f'GPU {gpu}: 处理样本 {i} 时出错: {e}')
371
+ continue
372
+
373
+ output_paths.extend(local_output_paths)
374
+ print(f'GPU {gpu}: Processed {len(local_output_paths)} samples successfully')
375
+
376
+ except Exception as e:
377
+ print(f'GPU {gpu}: process_data 函数出错: {e}')
378
+ import traceback
379
+ traceback.print_exc()
380
+
381
+
382
+ def main():
383
+ parser = argparse.ArgumentParser()
384
+ parser.add_argument('--input_path', type=str, required=True)
385
+ parser.add_argument('--output_path', type=str, required=True)
386
+ parser.add_argument('--model_path', type=str, required=True)
387
+ parser.add_argument('--cache_dir', type=str, default='.cache')
388
+ parser.add_argument('--num_processes', type=int, default=16)
389
+ parser.add_argument('--num_gpus', type=int, default=8)
390
+
391
+ args = parser.parse_args()
392
+
393
+ input_path = args.input_path
394
+ output_path = args.output_path
395
+ model_path = args.model_path
396
+ cache_path = os.path.abspath(args.cache_dir) # 转换为绝对路径
397
+
398
+ print(f"输入路径: {input_path}")
399
+ print(f"输出路径: {output_path}")
400
+ print(f"模型路径: {model_path}")
401
+ print(f"缓存路径: {cache_path}")
402
+ print(f"进程数: {args.num_processes}")
403
+ print(f"GPU数: {args.num_gpus}")
404
+
405
+ # 确保缓存目录存在
406
+ try:
407
+ if not os.path.exists(cache_path):
408
+ os.makedirs(cache_path, exist_ok=True)
409
+ print(f"✅ 创建缓存目录: {cache_path}")
410
+ else:
411
+ print(f"✅ 缓存目录已存在: {cache_path}")
412
+ except Exception as e:
413
+ print(f"❌ 创建缓存目录失败: {e}")
414
+ # 使用备用目录
415
+ cache_path = os.path.join(os.path.expanduser("~"), "torch_cache")
416
+ os.makedirs(cache_path, exist_ok=True)
417
+ print(f"✅ 使用备用缓存目录: {cache_path}")
418
+
419
+ # 确保输出目录存在
420
+ output_dir = os.path.dirname(os.path.abspath(output_path))
421
+ if not os.path.exists(output_dir):
422
+ os.makedirs(output_dir, exist_ok=True)
423
+ print(f"✅ 创建输出目录: {output_dir}")
424
+
425
+ # 检查输入文件
426
+ if not os.path.exists(input_path):
427
+ raise FileNotFoundError(f"输入文件不存在: {input_path}")
428
+
429
+ with open(input_path) as f:
430
+ input_data = json.load(f)
431
+
432
+ num_processes = args.num_processes
433
+ num_gpus = args.num_gpus
434
+
435
+ # 设置多进程启动方式
436
+ try:
437
+ mp.set_start_method('spawn', force=True)
438
+ except RuntimeError:
439
+ # 如果已经设置过,忽略错误
440
+ pass
441
+
442
+ output_paths = mp.Manager().list() # For collecting results from multiple processes
443
+
444
+ target = input_data # add to_list() if you acquire the dataset from load_dataset
445
+ print(f'Full Length: {len(target)}')
446
+
447
+ if len(target) == 0:
448
+ print("❌ 输入数据为空")
449
+ return
450
+
451
+ chunks = [target[i::num_processes] for i in range(num_processes)]
452
+ print(f"数据分块: {[len(chunk) for chunk in chunks]}")
453
+
454
+ processes = []
455
+ for id in range(num_processes):
456
+ gpu = id % num_gpus # This maps process to GPU cyclically
457
+ print(f"启动进程 {id}, 使用GPU {gpu}, 处理 {len(chunks[id])} 个样本")
458
+
459
+ p = mp.Process(
460
+ target=process_data,
461
+ args=(gpu, chunks[id], model_path, output_paths, cache_path) # 修复:使用cache_path而不是硬编码'.cache'
462
+ )
463
+ p.start()
464
+ processes.append(p)
465
+
466
+ # 等待所有进程完成
467
+ for i, p in enumerate(processes):
468
+ print(f"等待进程 {i} 完成...")
469
+ p.join()
470
+ if p.exitcode != 0:
471
+ print(f"⚠️ 进程 {i} 退出码: {p.exitcode}")
472
+
473
+ output_paths = list(output_paths)
474
+ print(f"收集到 {len(output_paths)} 个输出文件")
475
+
476
+ if len(output_paths) == 0:
477
+ print("❌ 没有成功处理的样本")
478
+ return
479
+
480
+ all_data = []
481
+ failed_loads = 0
482
+ for path in tqdm(output_paths, desc="加载处理后的数据"):
483
+ try:
484
+ data = torch.load(path, weights_only=False)
485
+ all_data.append(data)
486
+ except Exception as e:
487
+ print(f"❌ 加载文件失败 {path}: {e}")
488
+ failed_loads += 1
489
+
490
+ if failed_loads > 0:
491
+ print(f"⚠️ {failed_loads} 个文件加载失败")
492
+
493
+ torch.set_printoptions(threshold=torch.inf)
494
+ print(f'Effective Length: {len(all_data)}')
495
+
496
+ if len(all_data) == 0:
497
+ print("❌ 没有有效数据可保存")
498
+ return
499
+
500
+ try:
501
+ torch.save(all_data, output_path)
502
+ print(f"✅ 成功保存到: {output_path}")
503
+ except Exception as e:
504
+ print(f"❌ 保存最终结果失败: {e}")
505
+ # 尝试备用路径
506
+ backup_path = os.path.join(os.path.dirname(output_path), f"backup_{os.path.basename(output_path)}")
507
+ torch.save(all_data, backup_path)
508
+ print(f"✅ 已保存到备用位置: {backup_path}")
509
+
510
+ # 清理临时文件
511
+ print("清理临时文件...")
512
+ for path in output_paths:
513
+ try:
514
+ if os.path.exists(path):
515
+ os.remove(path)
516
+ except Exception as e:
517
+ print(f"清理文件失败 {path}: {e}")
518
+
519
+
520
+ if __name__ == '__main__':
521
+ main()