ASTERIZER commited on
Commit
828e3ce
·
verified ·
1 Parent(s): 2b33315

Upload push_code_to_hf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. push_code_to_hf.py +95 -0
push_code_to_hf.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Push LUNA training code + config to a Hugging Face model repo.
3
+ Uploads everything needed to run LoRA SFT on a GPU instance.
4
+
5
+ Usage:
6
+ HF_TOKEN=hf_xxx python push_code_to_hf.py
7
+ """
8
+
9
+ import os
10
+ from huggingface_hub import HfApi, create_repo
11
+
12
+ HF_REPO = "ASTERIZER/LUNA-Training"
13
+ TOKEN = os.environ.get("HF_TOKEN")
14
+
15
+ FILES_TO_PUSH = [
16
+ # Core training scripts
17
+ "sft_train.py",
18
+ "lora_sft_train.py",
19
+ "train.py",
20
+ "chat.py",
21
+ "generate.py",
22
+
23
+ # Configs
24
+ "rag_mcp_lora_config.yaml",
25
+ "sft_config.yaml",
26
+ "train_config.yaml",
27
+
28
+ # Requirements
29
+ "requirements.txt",
30
+
31
+ # Validation / benchmarking
32
+ "validate_sft.py",
33
+ "check_sft_alignment.py",
34
+ "validate_and_quantize.py",
35
+
36
+ # Dataset builder
37
+ "Base/Datasets/rag_mcp_sft/build_rag_mcp_sft_dataset.py",
38
+ "Base/Datasets/rag_mcp_sft/push_to_hf.py",
39
+ "Base/Datasets/rag_mcp_sft/BUILD_REPORT.md",
40
+ "Base/Datasets/rag_mcp_sft/FINETUNE_COMMANDS.md",
41
+ "Base/Datasets/rag_mcp_sft/README.md",
42
+ "Base/Datasets/rag_mcp_sft/source_manifest.json",
43
+ "Base/Datasets/rag_mcp_sft/sample_preview.json",
44
+
45
+ # Tokenizer config (small files only)
46
+ "Base/checkpoints/EleutherAI/pythia-160m/config.json",
47
+ "Base/checkpoints/EleutherAI/pythia-160m/tokenizer_config.json",
48
+ "Base/checkpoints/EleutherAI/pythia-160m/tokenizer.json",
49
+
50
+ # Shell scripts
51
+ "setup_and_sft.sh",
52
+ "setup_and_train.sh",
53
+
54
+ # GPU run script
55
+ "gpu_train.sh",
56
+
57
+ # README
58
+ "README.md",
59
+ ]
60
+
61
+
62
+ def main():
63
+ if not TOKEN:
64
+ raise RuntimeError("Set HF_TOKEN environment variable")
65
+
66
+ api = HfApi(token=TOKEN)
67
+
68
+ create_repo(
69
+ repo_id=HF_REPO,
70
+ token=TOKEN,
71
+ repo_type="model",
72
+ exist_ok=True,
73
+ private=False,
74
+ )
75
+ print(f"Repo ready: https://huggingface.co/{HF_REPO}")
76
+
77
+ pushed = 0
78
+ for fpath in FILES_TO_PUSH:
79
+ if not os.path.exists(fpath):
80
+ print(f" SKIP (not found): {fpath}")
81
+ continue
82
+ api.upload_file(
83
+ path_or_fileobj=fpath,
84
+ path_in_repo=fpath,
85
+ repo_id=HF_REPO,
86
+ token=TOKEN,
87
+ )
88
+ print(f" OK: {fpath}")
89
+ pushed += 1
90
+
91
+ print(f"\nPushed {pushed}/{len(FILES_TO_PUSH)} files to https://huggingface.co/{HF_REPO}")
92
+
93
+
94
+ if __name__ == "__main__":
95
+ main()