File size: 2,360 Bytes
f6e42f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f04365
 
 
35d9db6
3af7f4c
 
 
 
 
 
 
 
 
729546e
 
 
 
f6e42f8
 
3f04365
eb5278f
3af7f4c
729546e
f6e42f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env python3
"""Push the distill code/configs to the HF backup repo.

Usage:
    .venv/bin/python scripts/backup_to_hf.py "<commit message>"
"""
import os
import sys
from pathlib import Path

from huggingface_hub import HfApi, CommitOperationAdd, create_commit

REPO_ID = "Delta-Vector/distill-m-6a3lnzvb-code"
REPO_TYPE = "model"

# Files/directories to mirror to the repo
INCLUDE = [
    "distill.py",
    "configs/base.toml",
    "configs/zero_14_17.toml",
    "configs/replicate_zero4.toml",
    "configs/grow40_winning.toml",
    "configs/grow40_simple.toml",
    "configs/grow40_winning_v2.toml",
    "configs/sweep/A_resume_lr1e7_cos.toml",
    "configs/sweep/B_resume_lr5e8_cos.toml",
    "configs/sweep/C_resume_lr2e8_cos.toml",
    "configs/sweep/D_resume_lr1e7_const.toml",
    "configs/sweep/E_resume_lr5e8_b95.toml",
    "configs/sweep/F_cold_lr1e7_grow40.toml",
    "configs/sweep/G_cold_lr2e7_grow40.toml",
    "configs/sweep/H_cold_lr1e7_32L.toml",
    "configs/sweep/I_cold_paramgroups_grow40.toml",
    "configs/sweep/J_phase2_lr5e9_const.toml",
    "configs/sweep/K_phase2_lr2e8_const.toml",
    "configs/sweep/L_phase2_lr1e8_warmup500.toml",
    "configs/sweep/M_phase2_lr2e8_largebatch.toml",
    "configs/accelerate.yaml",
    "scripts/backup_to_hf.py",
    "scripts/run_sweep.sh",
    "scripts/run_sweep_rerun.sh",
    "scripts/run_hparam_sweep.sh",
    "scripts/run_phase2_sweep.sh",
    "pyproject.toml",
    "requirements.lock.txt",
]


def main():
    msg = sys.argv[1] if len(sys.argv) > 1 else "update"
    token = os.environ.get("HF_TOKEN")
    if not token:
        print("HF_TOKEN env var required", file=sys.stderr)
        sys.exit(1)

    root = Path(__file__).resolve().parent.parent
    ops = []
    for rel in INCLUDE:
        local = root / rel
        if not local.exists():
            print(f"  skip (missing): {rel}")
            continue
        ops.append(
            CommitOperationAdd(path_in_repo=rel, path_or_fileobj=str(local))
        )
        print(f"  add: {rel}")

    if not ops:
        print("nothing to upload")
        return

    api = HfApi(token=token)
    api.create_commit(
        repo_id=REPO_ID,
        repo_type=REPO_TYPE,
        operations=ops,
        commit_message=msg,
    )
    print(f"pushed {len(ops)} files to {REPO_ID}: {msg}")


if __name__ == "__main__":
    main()