Upload 137 files
Browse files- hugging/td_fuse/heal.py +31 -0
hugging/td_fuse/heal.py
CHANGED
|
@@ -347,6 +347,37 @@ def apply_qlora_standard(
|
|
| 347 |
|
| 348 |
print(f"\n[heal] Merging LoRA adapters...")
|
| 349 |
merged_model = model.merge_and_unload()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
merged_model.save_pretrained(str(healed_dir))
|
| 351 |
tokenizer.save_pretrained(str(healed_dir))
|
| 352 |
|
|
|
|
| 347 |
|
| 348 |
print(f"\n[heal] Merging LoRA adapters...")
|
| 349 |
merged_model = model.merge_and_unload()
|
| 350 |
+
|
| 351 |
+
# Free disk space before saving — remove duplicate model copies
|
| 352 |
+
import shutil, gc
|
| 353 |
+
print("[heal] Freeing disk space before save...")
|
| 354 |
+
|
| 355 |
+
# Search for large duplicate directories we can safely remove
|
| 356 |
+
# The healed model in memory IS the final product — we don't need old copies
|
| 357 |
+
cleanup_targets = [
|
| 358 |
+
"td_fuse_outputs/final", # duplicate of after_deepseek
|
| 359 |
+
"td_fuse_outputs/healed", # old healed dir if exists
|
| 360 |
+
]
|
| 361 |
+
for target in cleanup_targets:
|
| 362 |
+
target_path = Path(target)
|
| 363 |
+
if target_path.exists() and target_path.is_dir():
|
| 364 |
+
shutil.rmtree(str(target_path))
|
| 365 |
+
print(f"[heal] Freed space: removed {target_path}")
|
| 366 |
+
|
| 367 |
+
# Remove any trainer checkpoint-* dirs (we have the merged model in memory)
|
| 368 |
+
for parent in [Path("."), Path("td_lang_outputs"), Path(cfg.output_dir)]:
|
| 369 |
+
if parent.exists():
|
| 370 |
+
for ckpt in parent.rglob("checkpoint-*"):
|
| 371 |
+
if ckpt.is_dir():
|
| 372 |
+
shutil.rmtree(str(ckpt))
|
| 373 |
+
print(f"[heal] Freed space: removed {ckpt}")
|
| 374 |
+
|
| 375 |
+
gc.collect()
|
| 376 |
+
|
| 377 |
+
# Report free space
|
| 378 |
+
stat = shutil.disk_usage("/")
|
| 379 |
+
print(f"[heal] Disk space: {stat.free / 1e9:.1f} GB free / {stat.total / 1e9:.1f} GB total")
|
| 380 |
+
|
| 381 |
merged_model.save_pretrained(str(healed_dir))
|
| 382 |
tokenizer.save_pretrained(str(healed_dir))
|
| 383 |
|