LUNA-Training / push_code_to_hf.py
ASTERIZER's picture
Upload push_code_to_hf.py with huggingface_hub
b6dbdde verified
"""
Push LUNA training code + config to a Hugging Face model repo.
Uploads everything needed to run LoRA SFT on a GPU instance.
Usage:
HF_TOKEN=hf_xxx python push_code_to_hf.py
"""
import os
from huggingface_hub import HfApi, create_repo
HF_REPO = "ASTERIZER/LUNA-Training"
TOKEN = os.environ.get("HF_TOKEN")
FILES_TO_PUSH = [
# Core training scripts
"sft_train.py",
"lora_sft_train.py",
"upload_lora_to_hf.py",
"upload_full_sft_to_hf.py",
"chat_full_sft.py",
"train.py",
"chat.py",
"generate.py",
# Configs
"rag_mcp_lora_config.yaml",
"rag_mcp_full_sft_config.yaml",
"sft_config.yaml",
"train_config.yaml",
# Requirements
"requirements.txt",
# Validation / benchmarking
"validate_sft.py",
"check_sft_alignment.py",
"validate_and_quantize.py",
# Dataset builder
"Base/Datasets/rag_mcp_sft/build_rag_mcp_sft_dataset.py",
"Base/Datasets/rag_mcp_sft/push_to_hf.py",
"Base/Datasets/rag_mcp_sft/BUILD_REPORT.md",
"Base/Datasets/rag_mcp_sft/FINETUNE_COMMANDS.md",
"Base/Datasets/rag_mcp_sft/README.md",
"Base/Datasets/rag_mcp_sft/source_manifest.json",
"Base/Datasets/rag_mcp_sft/sample_preview.json",
# Tokenizer config (small files only)
"Base/checkpoints/EleutherAI/pythia-160m/config.json",
"Base/checkpoints/EleutherAI/pythia-160m/tokenizer_config.json",
"Base/checkpoints/EleutherAI/pythia-160m/tokenizer.json",
# Shell scripts
"setup_and_sft.sh",
"setup_and_train.sh",
# GPU run script
"gpu_train.sh",
"gpu_full_sft.sh",
# README
"README.md",
]
def main():
if not TOKEN:
raise RuntimeError("Set HF_TOKEN environment variable")
api = HfApi(token=TOKEN)
create_repo(
repo_id=HF_REPO,
token=TOKEN,
repo_type="model",
exist_ok=True,
private=False,
)
print(f"Repo ready: https://huggingface.co/{HF_REPO}")
pushed = 0
for fpath in FILES_TO_PUSH:
if not os.path.exists(fpath):
print(f" SKIP (not found): {fpath}")
continue
api.upload_file(
path_or_fileobj=fpath,
path_in_repo=fpath,
repo_id=HF_REPO,
token=TOKEN,
)
print(f" OK: {fpath}")
pushed += 1
print(f"\nPushed {pushed}/{len(FILES_TO_PUSH)} files to https://huggingface.co/{HF_REPO}")
if __name__ == "__main__":
main()