| | |
| | """Upload FRANKENSTALLM: model, eval reports, source code, and data scripts to Hugging Face. |
| | |
| | Usage: |
| | huggingface-cli login |
| | |
| | # 모델 + README + 평가 결과 + 보고서 |
| | python scripts/upload_to_huggingface.py --repo-id pathcosmos/frankenstallm --create-pr |
| | |
| | # 위 + 소스 코드 + 데이터 스크립트 (모델/데이터/소스 전부) |
| | python scripts/upload_to_huggingface.py --repo-id pathcosmos/frankenstallm --with-source --with-data --create-pr |
| | |
| | # 평가·보고서만 |
| | python scripts/upload_to_huggingface.py --repo-id pathcosmos/frankenstallm --readme-only --create-pr |
| | """ |
| |
|
| | import argparse |
| | from pathlib import Path |
| |
|
| | PROJECT_ROOT = Path(__file__).resolve().parent.parent |
| | HF_CHECKPOINT = PROJECT_ROOT / "outputs" / "hf_checkpoint-best-fixed" |
| | REPORTS_DIR = PROJECT_ROOT / "reports" |
| | EVAL_RESULTS_DIR = PROJECT_ROOT / "eval" / "results" / "frankenstallm-3b-v2" |
| | DATA_DIR = PROJECT_ROOT / "data" |
| | SOURCE_DIRS = ["train", "model", "configs", "scripts", "tokenizer", "eval"] |
| |
|
| |
|
| | def main(): |
| | parser = argparse.ArgumentParser(description="Upload model, eval reports, source, and data scripts to Hugging Face") |
| | parser.add_argument("--repo-id", type=str, required=True, help="e.g. pathcosmos/frankenstallm") |
| | parser.add_argument("--readme-only", action="store_true", help="Only push README + eval results + reports (no model)") |
| | parser.add_argument("--create-pr", action="store_true", help="Create a Pull Request instead of pushing to main") |
| | parser.add_argument("--with-source", action="store_true", help="Upload full source code (train, eval, model, configs, scripts, tokenizer)") |
| | parser.add_argument("--with-data", action="store_true", help="Upload data scripts and DATA_README (no .bin files)") |
| | args = parser.parse_args() |
| | create_pr = getattr(args, "create_pr", False) |
| |
|
| | try: |
| | from huggingface_hub import HfApi, create_repo |
| | except ImportError: |
| | print("Install: pip install huggingface_hub") |
| | raise SystemExit(1) |
| |
|
| | api = HfApi() |
| |
|
| | |
| | |
| | try: |
| | create_repo(args.repo_id, repo_type="model", exist_ok=True) |
| | except Exception as e: |
| | print(f"Note: create_repo skipped (use Hugging Face website to create repo if needed): {e}") |
| |
|
| | if not args.readme_only: |
| | if not HF_CHECKPOINT.exists(): |
| | print(f"Checkpoint not found: {HF_CHECKPOINT}") |
| | raise SystemExit(1) |
| | print(f"Uploading model from {HF_CHECKPOINT} ...") |
| | api.upload_folder( |
| | folder_path=str(HF_CHECKPOINT), |
| | repo_id=args.repo_id, |
| | repo_type="model", |
| | create_pr=create_pr, |
| | ) |
| | print("Model upload done.") |
| |
|
| | |
| | readme_src = HF_CHECKPOINT / "README.md" |
| | if readme_src.exists(): |
| | print("Pushing README (model card) ...") |
| | api.upload_file( |
| | path_or_fileobj=str(readme_src), |
| | path_in_repo="README.md", |
| | repo_id=args.repo_id, |
| | repo_type="model", |
| | create_pr=create_pr, |
| | ) |
| | print("README upload done.") |
| | else: |
| | print("No README.md in checkpoint dir; skipping README push.") |
| |
|
| | |
| | results_json = EVAL_RESULTS_DIR / "ollama_benchmark_results.json" |
| | if results_json.exists(): |
| | print("Pushing ollama_benchmark_results.json ...") |
| | api.upload_file( |
| | path_or_fileobj=str(results_json), |
| | path_in_repo="eval/ollama_benchmark_results.json", |
| | repo_id=args.repo_id, |
| | repo_type="model", |
| | create_pr=create_pr, |
| | ) |
| | print("Eval results upload done.") |
| |
|
| | |
| | for name, src in [ |
| | ("2026-03-09_GGUF_DEPLOYMENT_AND_EVAL_REPORT.md", REPORTS_DIR / "2026-03-09_GGUF_DEPLOYMENT_AND_EVAL_REPORT.md"), |
| | ("2026-03-09_ORPO_EVALUATION_REPORT.md", REPORTS_DIR / "2026-03-09_ORPO_EVALUATION_REPORT.md"), |
| | ]: |
| | if src.exists(): |
| | print(f"Pushing {name} ...") |
| | api.upload_file( |
| | path_or_fileobj=str(src), |
| | path_in_repo=f"eval_reports/{name}", |
| | repo_id=args.repo_id, |
| | repo_type="model", |
| | create_pr=create_pr, |
| | ) |
| | print("Reports upload done.") |
| |
|
| | |
| | if getattr(args, "with_source", False): |
| | print("Uploading source code ...") |
| | ignore_common = ["**/__pycache__/**", "**/*.pyc", "**/.DS_Store"] |
| | for dirname in ["train", "model", "configs", "scripts", "tokenizer"]: |
| | src_dir = PROJECT_ROOT / dirname |
| | if src_dir.exists(): |
| | api.upload_folder( |
| | folder_path=str(src_dir), |
| | path_in_repo=f"source/{dirname}", |
| | repo_id=args.repo_id, |
| | repo_type="model", |
| | ignore_patterns=ignore_common, |
| | create_pr=create_pr, |
| | ) |
| | print(f" source/{dirname}/ done.") |
| | |
| | eval_dir = PROJECT_ROOT / "eval" |
| | if eval_dir.exists(): |
| | api.upload_folder( |
| | folder_path=str(eval_dir), |
| | path_in_repo="source/eval", |
| | repo_id=args.repo_id, |
| | repo_type="model", |
| | ignore_patterns=ignore_common + ["**/outputs/**", "**/results/**", "**/.compile_cache/**"], |
| | create_pr=create_pr, |
| | ) |
| | print(" source/eval/ done.") |
| | |
| | for name in ["README.md", "CLAUDE.md", "requirements.txt", "PROGRESS.md"]: |
| | src_file = PROJECT_ROOT / name |
| | if src_file.exists(): |
| | api.upload_file( |
| | path_or_fileobj=str(src_file), |
| | path_in_repo=f"source/{name}", |
| | repo_id=args.repo_id, |
| | repo_type="model", |
| | create_pr=create_pr, |
| | ) |
| | for p in PROJECT_ROOT.glob("PLAN_*.md"): |
| | api.upload_file( |
| | path_or_fileobj=str(p), |
| | path_in_repo=f"source/{p.name}", |
| | repo_id=args.repo_id, |
| | repo_type="model", |
| | create_pr=create_pr, |
| | ) |
| | print("Source upload done.") |
| |
|
| | |
| | if getattr(args, "with_data", False) and DATA_DIR.exists(): |
| | print("Uploading data scripts (no .bin) ...") |
| | api.upload_folder( |
| | folder_path=str(DATA_DIR), |
| | path_in_repo="data", |
| | repo_id=args.repo_id, |
| | repo_type="model", |
| | ignore_patterns=[ |
| | "**/*.bin", |
| | "**/*.chunk*", |
| | "**/__pycache__/**", |
| | "**/code/**", |
| | "**/*.pyc", |
| | ], |
| | create_pr=create_pr, |
| | ) |
| | print("Data scripts upload done.") |
| |
|
| | print(f"Done. https://huggingface.co/{args.repo_id}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|