ASTERIZER
/

LUNA-Training

Model card Files Files and versions

xet

Community

ASTERIZER commited on Apr 2

Commit

828e3ce

verified ·

1 Parent(s): 2b33315

Upload push_code_to_hf.py with huggingface_hub

Browse files

Files changed (1) hide show

push_code_to_hf.py +95 -0

push_code_to_hf.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""
+Push LUNA training code + config to a Hugging Face model repo.
+Uploads everything needed to run LoRA SFT on a GPU instance.
+Usage:
+    HF_TOKEN=hf_xxx python push_code_to_hf.py
+"""
+import os
+from huggingface_hub import HfApi, create_repo
+HF_REPO = "ASTERIZER/LUNA-Training"
+TOKEN = os.environ.get("HF_TOKEN")
+FILES_TO_PUSH = [
+    # Core training scripts
+    "sft_train.py",
+    "lora_sft_train.py",
+    "train.py",
+    "chat.py",
+    "generate.py",
+    # Configs
+    "rag_mcp_lora_config.yaml",
+    "sft_config.yaml",
+    "train_config.yaml",
+    # Requirements
+    "requirements.txt",
+    # Validation / benchmarking
+    "validate_sft.py",
+    "check_sft_alignment.py",
+    "validate_and_quantize.py",
+    # Dataset builder
+    "Base/Datasets/rag_mcp_sft/build_rag_mcp_sft_dataset.py",
+    "Base/Datasets/rag_mcp_sft/push_to_hf.py",
+    "Base/Datasets/rag_mcp_sft/BUILD_REPORT.md",
+    "Base/Datasets/rag_mcp_sft/FINETUNE_COMMANDS.md",
+    "Base/Datasets/rag_mcp_sft/README.md",
+    "Base/Datasets/rag_mcp_sft/source_manifest.json",
+    "Base/Datasets/rag_mcp_sft/sample_preview.json",
+    # Tokenizer config (small files only)
+    "Base/checkpoints/EleutherAI/pythia-160m/config.json",
+    "Base/checkpoints/EleutherAI/pythia-160m/tokenizer_config.json",
+    "Base/checkpoints/EleutherAI/pythia-160m/tokenizer.json",
+    # Shell scripts
+    "setup_and_sft.sh",
+    "setup_and_train.sh",
+    # GPU run script
+    "gpu_train.sh",
+    # README
+    "README.md",
+]
+def main():
+    if not TOKEN:
+        raise RuntimeError("Set HF_TOKEN environment variable")
+    api = HfApi(token=TOKEN)
+    create_repo(
+        repo_id=HF_REPO,
+        token=TOKEN,
+        repo_type="model",
+        exist_ok=True,
+        private=False,
+    )
+    print(f"Repo ready: https://huggingface.co/{HF_REPO}")
+    pushed = 0
+    for fpath in FILES_TO_PUSH:
+        if not os.path.exists(fpath):
+            print(f"  SKIP (not found): {fpath}")
+            continue
+        api.upload_file(
+            path_or_fileobj=fpath,
+            path_in_repo=fpath,
+            repo_id=HF_REPO,
+            token=TOKEN,
+        )
+        print(f"  OK: {fpath}")
+        pushed += 1
+    print(f"\nPushed {pushed}/{len(FILES_TO_PUSH)} files to https://huggingface.co/{HF_REPO}")
+if __name__ == "__main__":
+    main()