| |
| """ |
| ๊ฐ๋ฐ ํ๋ก์ ํธ(hf_custom_proj/)์์ ํ์ต ์ฐ์ถ๋ฌผ์ artifacts/์ ๋ณด๊ดํ๊ณ , |
| ๋ฐฐํฌ(Hub/๋ก์ปฌ from_pretrained)์ฉ repo ๋ฃจํธ๋ dist/my-mnist-hf/ ๋ก ๊ตฌ์ฑํฉ๋๋ค. |
| |
| ๋ชฉํ (๋น์ ์ด ์๊ตฌํ "HF ์คํ์ผ"์ ๋ง์กฑ) |
| - ๊ฐ๋ฐ: src/ ๋ ์ด์์ ์ ์ง (pip install -e . / python -m examples.*) |
| - ๋ฐฐํฌ: dist/ ๋ Hub / local from_pretrained ๋ชจ๋ ๋์ |
| - ์์์ฒ๋ฐฉ X: dist ๊ตฌ์กฐ๋ฅผ HF dynamic module ๋ก๋ฉ ๊ท์น์ "์ ํฉ"๋๊ฒ ์์ฑ |
| |
| ์ค์ ํฌ์ธํธ (ํ์ฌ ๋น์ ์ํฉ์ ํต์ฌ) |
| - preprocessor_config.json์ auto_map์ด |
| "AutoImageProcessor": "image_processing_my_mnist.MyMNISTImageProcessor" |
| ์ฒ๋ผ "ํจํค์ง๋ช
์์ด" ์ ์ฅ๋๋ ๊ฒฝ์ฐ๊ฐ ์์. |
| - ์ด ๊ฒฝ์ฐ transformers๋ dist ๋ฃจํธ์์ image_processing_my_mnist.py ๋ฅผ ์ฐพ๋๋ค. |
| - ๋ฐ๋ผ์ dist ๋ฃจํธ์ configuration/modeling/image_processing *.py ๋ฅผ flatํ๊ฒ ๋๋ค. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import shutil |
| import sys |
| from pathlib import Path |
|
|
|
|
| |
| |
| |
| def project_root_from_this_file() -> Path: |
| return Path(__file__).resolve().parents[1] |
|
|
|
|
| def rmtree_if_exists(p: Path) -> None: |
| if p.exists(): |
| shutil.rmtree(p) |
|
|
|
|
| def copytree(src: Path, dst: Path) -> None: |
| if not src.exists(): |
| raise FileNotFoundError(f"Missing source path: {src}") |
| if dst.exists(): |
| shutil.rmtree(dst) |
| shutil.copytree(src, dst) |
|
|
|
|
| def copy2(src: Path, dst: Path) -> None: |
| if not src.exists(): |
| raise FileNotFoundError(f"Missing file: {src}") |
| dst.parent.mkdir(parents=True, exist_ok=True) |
| shutil.copy2(src, dst) |
|
|
|
|
| def copy_if_exists(src: Path, dst: Path) -> None: |
| if src.exists(): |
| dst.parent.mkdir(parents=True, exist_ok=True) |
| shutil.copy2(src, dst) |
|
|
|
|
| def write_text(dst: Path, text: str) -> None: |
| dst.parent.mkdir(parents=True, exist_ok=True) |
| dst.write_text(text, encoding="utf-8") |
|
|
|
|
| |
| |
| |
| def find_model_file(artifact_dir: Path) -> Path: |
| st = artifact_dir / "model.safetensors" |
| pt = artifact_dir / "pytorch_model.bin" |
| if st.exists(): |
| return st |
| if pt.exists(): |
| return pt |
| raise FileNotFoundError( |
| f"Missing model file in artifacts: expected '{st.name}' or '{pt.name}'" |
| ) |
|
|
|
|
| def validate_artifacts(artifact_dir: Path) -> tuple[Path, Path, Path]: |
| if not artifact_dir.exists(): |
| raise FileNotFoundError( |
| f"Artifacts directory does not exist: {artifact_dir}\n" |
| "๋จผ์ ํ์ต ํ ์๋์ฒ๋ผ ์ ์ฅํ์ธ์:\n" |
| " save_dir = 'artifacts/my_mnist'\n" |
| " trainer.save_model(save_dir)\n" |
| " processor.save_pretrained(save_dir)\n" |
| ) |
|
|
| config = artifact_dir / "config.json" |
| preproc = artifact_dir / "preprocessor_config.json" |
| model = find_model_file(artifact_dir) |
|
|
| missing = [] |
| if not config.exists(): |
| missing.append("config.json") |
| if not preproc.exists(): |
| missing.append("preprocessor_config.json") |
| if missing: |
| raise FileNotFoundError( |
| "Artifacts ํด๋์ ํ์ต ์ฐ์ถ๋ฌผ์ด ์์ต๋๋ค(๋๋ ๋ถ์์ ํฉ๋๋ค).\n" |
| f" artifacts: {artifact_dir}\n" |
| " missing:\n" |
| + "\n".join([f" - {m}" for m in missing]) |
| + "\n\n" |
| "๋จผ์ ํ์ต ํ ์๋์ฒ๋ผ ์ ์ฅํ์ธ์:\n" |
| " save_dir = 'artifacts/my_mnist'\n" |
| " trainer.save_model(save_dir)\n" |
| " processor.save_pretrained(save_dir)\n" |
| ) |
|
|
| return config, model, preproc |
|
|
|
|
| |
| |
| |
| def prepare_dist_repo_root( |
| *, |
| root: Path, |
| dist_repo_dir: Path, |
| artifact_dir: Path, |
| package_name: str, |
| copy_scripts_into_dist: bool, |
| requirements_text: str | None, |
| ) -> None: |
| """ |
| dist repo ๋ฃจํธ๋ฅผ ์์ฑํฉ๋๋ค. |
| |
| - src/<package_name>/ ์์ "๋ฐฐํฌ์ ํ์ํ ๋ชจ๋ ํ์ผ"์ dist ๋ฃจํธ๋ก flat ๋ณต์ฌ |
| (ํ์ฌ auto_map์ด 'image_processing_my_mnist.MyMNISTImageProcessor' ํํ๋ก ์ ์ฅ๋๋ ์ํฉ์ |
| ๊ฐ์ฅ ํ์คํ ๋ง์กฑ์ํค๋ ๋ฐฉ์) |
| - examples/๋ dist์ ๊ฐ์ด ๋ฃ์ด ํ
์คํธ ํธ์ ์ ๊ณต(์์น ์์ผ๋ฉด ์ง์๋ ๋จ) |
| - artifacts์ config/model/preprocessor_config๋ฅผ dist ๋ฃจํธ๋ก ๋ณต์ฌ |
| """ |
| rmtree_if_exists(dist_repo_dir) |
| dist_repo_dir.mkdir(parents=True, exist_ok=True) |
|
|
| |
| src_pkg = root / "src" / package_name |
| if not src_pkg.exists(): |
| raise FileNotFoundError(f"Missing src package dir: {src_pkg}") |
|
|
| required_py = [ |
| "configuration_my_mnist.py", |
| "modeling_my_mnist.py", |
| "image_processing_my_mnist.py", |
| ] |
| for fname in required_py: |
| copy2(src_pkg / fname, dist_repo_dir / fname) |
|
|
| |
| if (src_pkg / "__init__.py").exists(): |
| copy2(src_pkg / "__init__.py", dist_repo_dir / "__init__.py") |
|
|
| |
| copytree(root / "examples", dist_repo_dir / "examples") |
| if copy_scripts_into_dist: |
| |
| copytree(root / "scripts", dist_repo_dir / "scripts") |
|
|
| |
| copy_if_exists(root / "README.md", dist_repo_dir / "README.md") |
| copy_if_exists(root / "LICENSE", dist_repo_dir / "LICENSE") |
| copy_if_exists(root / ".gitignore", dist_repo_dir / ".gitignore") |
| copy_if_exists(root / "pyproject.toml", dist_repo_dir / "pyproject.toml") |
|
|
| |
| req_dst = dist_repo_dir / "requirements.txt" |
| if requirements_text is not None: |
| write_text(req_dst, requirements_text) |
| else: |
| if (root / "requirements.txt").exists(): |
| copy2(root / "requirements.txt", req_dst) |
| else: |
| write_text( |
| req_dst, |
| "\n".join(["torch", "torchvision", "transformers", "evaluate", "numpy", "Pillow", ""]), |
| ) |
|
|
| |
| config, model, preproc = validate_artifacts(artifact_dir) |
| copy2(config, dist_repo_dir / "config.json") |
| copy2(preproc, dist_repo_dir / "preprocessor_config.json") |
| copy2(model, dist_repo_dir / model.name) |
|
|
| |
| write_text( |
| dist_repo_dir / "DIST_NOTE.txt", |
| "\n".join( |
| [ |
| "This folder is a Hub/local from_pretrained() compatible repo root.", |
| "Dynamic module loading expects flat *.py modules at repo root (per auto_map).", |
| "", |
| ] |
| ), |
| ) |
|
|
|
|
| |
| |
| |
| def upload_to_hub( |
| *, |
| dist_repo_dir: Path, |
| repo_id: str, |
| private: bool, |
| repo_type: str, |
| commit_message: str, |
| ) -> None: |
| try: |
| from huggingface_hub import HfApi |
| from huggingface_hub.utils import HfHubHTTPError |
| from huggingface_hub.hf_api import HfFolder |
| except Exception as e: |
| raise RuntimeError( |
| "huggingface_hub๊ฐ ํ์ํฉ๋๋ค. ์ค์น ํ ๋ค์ ์คํํ์ธ์:\n" |
| " pip install -U huggingface_hub\n" |
| ) from e |
|
|
| token = HfFolder.get_token() |
| if not token: |
| raise RuntimeError( |
| "Hugging Face ์ธ์ฆ ํ ํฐ์ด ์์ต๋๋ค.\n" |
| "๋ค์ ์ค ํ๋๋ฅผ ์ํํ์ธ์:\n" |
| " 1) ํฐ๋ฏธ๋์์: huggingface-cli login\n" |
| " 2) ํ๊ฒฝ๋ณ์๋ก: export HF_TOKEN=... (Colab/CI ํฌํจ)\n" |
| " 3) ํ์ด์ฌ์์: from huggingface_hub import login; login('HF_TOKEN')\n" |
| ) |
|
|
| api = HfApi() |
|
|
| try: |
| api.create_repo(repo_id=repo_id, repo_type=repo_type, exist_ok=True, private=private) |
| except HfHubHTTPError as e: |
| raise RuntimeError( |
| f"Repo ์์ฑ์ ์คํจํ์ต๋๋ค: {repo_id} (repo_type={repo_type})\n" |
| "๊ถํ(organization repo ์ฌ๋ถ), repo_id ์คํ, ํ ํฐ ๊ถํ์ ํ์ธํ์ธ์." |
| ) from e |
|
|
| try: |
| api.upload_folder( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| folder_path=str(dist_repo_dir), |
| commit_message=commit_message, |
| ) |
| except HfHubHTTPError as e: |
| raise RuntimeError( |
| f"์
๋ก๋์ ์คํจํ์ต๋๋ค: {repo_id} (repo_type={repo_type})\n" |
| "ํ ํฐ ๊ถํ, ๋์ฉ๋ ํ์ผ, ๋คํธ์ํฌ ์ํ๋ฅผ ํ์ธํ์ธ์." |
| ) from e |
|
|
|
|
| |
| |
| |
| def build_argparser() -> argparse.ArgumentParser: |
| p = argparse.ArgumentParser( |
| description="Build dist repo root (Hub-ready) from src/<pkg> + artifacts, and optionally push to Hub." |
| ) |
|
|
| p.add_argument("--push", action="store_true", help="์ง์ ์ Hub์ ์
๋ก๋๊น์ง ์ํํฉ๋๋ค.") |
| p.add_argument("--repo-id", default=None, help='Hub repo id. ์: "YOUR_ID/my-mnist-hf" ( --push์ผ ๋ ํ์ )') |
| p.add_argument("--private", action="store_true", help="Hub repo๋ฅผ private๋ก ์์ฑ( --push์ผ ๋๋ง ์๋ฏธ )") |
| p.add_argument("--repo-type", default="model", help='Hub repo type. ๊ธฐ๋ณธ: "model"') |
| p.add_argument("--commit-message", default="Release custom MNIST model", help="Hub ์ปค๋ฐ ๋ฉ์์ง") |
|
|
| p.add_argument("--artifact-dir", default="artifacts/my_mnist", help="ํ์ต ์ฐ์ถ๋ฌผ ํด๋. ๊ธฐ๋ณธ: artifacts/my_mnist") |
| p.add_argument("--dist-dir", default="dist/my-mnist-hf", help="๋ฐฐํฌ์ฉ dist ํด๋. ๊ธฐ๋ณธ: dist/my-mnist-hf") |
|
|
| p.add_argument( |
| "--package-name", |
| default="my_mnist_hf", |
| help="src/ ์๋์ ํจํค์ง ํด๋๋ช
. ๊ธฐ๋ณธ: my_mnist_hf", |
| ) |
|
|
| p.add_argument("--no-copy-scripts", action="store_true", help="dist์ scripts/ ๋ณต์ฌ๋ฅผ ์๋ต") |
| p.add_argument( |
| "--requirements", |
| default=None, |
| help="requirements.txt ๋ด์ฉ์ ๋ฌธ์์ด๋ก ์ง์ (ํ์ผ ์์ฑ). ๋ฏธ์ง์ ์ด๋ฉด ๋ฃจํธ requirements.txt ๋ณต์ฌ ๋๋ ๊ธฐ๋ณธ๊ฐ ์์ฑ.", |
| ) |
| return p |
|
|
|
|
| def main() -> None: |
| args = build_argparser().parse_args() |
|
|
| root = project_root_from_this_file() |
| artifact_dir = root / args.artifact_dir |
| dist_repo_dir = root / args.dist_dir |
|
|
| prepare_dist_repo_root( |
| root=root, |
| dist_repo_dir=dist_repo_dir, |
| artifact_dir=artifact_dir, |
| package_name=str(args.package_name), |
| copy_scripts_into_dist=not bool(args.no_copy_scripts), |
| requirements_text=args.requirements, |
| ) |
|
|
| print("\n[OK] dist ํด๋ ์์ฑ ์๋ฃ") |
| print(f" - artifacts : {artifact_dir}") |
| print(f" - dist repo : {dist_repo_dir}") |
| print(f" - package : {args.package_name}") |
|
|
| if not args.push: |
| print("\n[Info] --push๋ฅผ ์ง์ ํ์ง ์์ Hub ์
๋ก๋๋ ์๋ตํ์ต๋๋ค(local-only).") |
| print("\n๋ก์ปฌ ๋ก๋ ์์:") |
| print(" from transformers import AutoModelForImageClassification, AutoImageProcessor") |
| print(f" p = '{args.dist_dir}'") |
| print(" processor = AutoImageProcessor.from_pretrained(p, trust_remote_code=True)") |
| print(" model = AutoModelForImageClassification.from_pretrained(p, trust_remote_code=True)") |
| return |
|
|
| if not args.repo_id: |
| raise ValueError("--push๋ฅผ ์ฌ์ฉํ๋ ค๋ฉด --repo-id๋ฅผ ๋ฐ๋์ ์ง์ ํด์ผ ํฉ๋๋ค.") |
|
|
| upload_to_hub( |
| dist_repo_dir=dist_repo_dir, |
| repo_id=str(args.repo_id), |
| private=bool(args.private), |
| repo_type=str(args.repo_type), |
| commit_message=str(args.commit_message), |
| ) |
|
|
| print("\n[OK] Hub ์
๋ก๋ ์๋ฃ") |
| print(f" - hub repo : {args.repo_id}") |
| print("\nHub ๋ก๋ ์์:") |
| print(" from transformers import AutoModelForImageClassification, AutoImageProcessor") |
| print(f" repo_id = '{args.repo_id}'") |
| print(" processor = AutoImageProcessor.from_pretrained(repo_id, trust_remote_code=True)") |
| print(" model = AutoModelForImageClassification.from_pretrained(repo_id, trust_remote_code=True)") |
|
|
|
|
| if __name__ == "__main__": |
| try: |
| main() |
| except Exception as e: |
| print(f"\n์ค๋ฅ: {e}\n", file=sys.stderr) |
| raise |
|
|