Add checkpoint uploader script
Browse files- hf_upload.py +63 -0
hf_upload.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
HuggingFace Checkpoint Uploader for AGILLM-3
|
| 4 |
+
Usage: python hf_upload.py <checkpoint.pt> [--message "commit msg"]
|
| 5 |
+
Requires: HF_TOKEN env var or ~/.huggingface/token
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import argparse
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
|
| 13 |
+
try:
|
| 14 |
+
from huggingface_hub import HfApi, upload_file
|
| 15 |
+
except ImportError:
|
| 16 |
+
print("Installing huggingface_hub...")
|
| 17 |
+
os.system("pip install huggingface_hub -q")
|
| 18 |
+
from huggingface_hub import HfApi, upload_file
|
| 19 |
+
|
| 20 |
+
REPO_ID = "OpenTransformer/AGILLM-3-large"
|
| 21 |
+
|
| 22 |
+
def upload_checkpoint(ckpt_path: str, message: str = None):
|
| 23 |
+
path = Path(ckpt_path)
|
| 24 |
+
if not path.exists():
|
| 25 |
+
print(f"Error: {ckpt_path} not found")
|
| 26 |
+
return False
|
| 27 |
+
|
| 28 |
+
token = os.environ.get("HF_TOKEN") or None
|
| 29 |
+
api = HfApi(token=token)
|
| 30 |
+
|
| 31 |
+
# Extract step from filename
|
| 32 |
+
step = "unknown"
|
| 33 |
+
if "step" in path.stem:
|
| 34 |
+
step = path.stem.split("step")[-1].split(".")[0]
|
| 35 |
+
|
| 36 |
+
ts = datetime.now().strftime("%Y-%m-%d %H:%M")
|
| 37 |
+
commit_msg = message or f"Checkpoint step {step} - {ts}"
|
| 38 |
+
|
| 39 |
+
print(f"Uploading {path.name} to {REPO_ID}...")
|
| 40 |
+
print(f"Size: {path.stat().st_size / 1e9:.2f} GB")
|
| 41 |
+
|
| 42 |
+
try:
|
| 43 |
+
upload_file(
|
| 44 |
+
path_or_fileobj=str(path),
|
| 45 |
+
path_in_repo=f"checkpoints/{path.name}",
|
| 46 |
+
repo_id=REPO_ID,
|
| 47 |
+
commit_message=commit_msg,
|
| 48 |
+
token=token
|
| 49 |
+
)
|
| 50 |
+
print(f"SUCCESS: https://huggingface.co/{REPO_ID}")
|
| 51 |
+
return True
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Upload failed: {e}")
|
| 54 |
+
return False
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
parser = argparse.ArgumentParser()
|
| 58 |
+
parser.add_argument("checkpoint", help="Path to checkpoint .pt file")
|
| 59 |
+
parser.add_argument("--message", "-m", help="Commit message")
|
| 60 |
+
args = parser.parse_args()
|
| 61 |
+
|
| 62 |
+
success = upload_checkpoint(args.checkpoint, args.message)
|
| 63 |
+
sys.exit(0 if success else 1)
|