| |
| """ |
| Hugging Face Hub ์
๋ก๋ ์คํฌ๋ฆฝํธ |
| - ๋ณํฉ๋ EEVE ๋ชจ๋ธ์ Hugging Face Hub์ ์
๋ก๋ |
| - ๋ชจ๋ธ ์นด๋(README.md) ํฌํจ |
| """ |
|
|
| import os |
| import argparse |
| from pathlib import Path |
| from huggingface_hub import HfApi, create_repo, upload_folder |
|
|
|
|
| def upload_model_to_hub( |
| model_dir: str, |
| repo_id: str, |
| token: str = None, |
| private: bool = False, |
| commit_message: str = "Upload EEVE Korean Custom model" |
| ): |
| """ |
| ๋ชจ๋ธ์ Hugging Face Hub์ ์
๋ก๋ |
| |
| Args: |
| model_dir: ์
๋ก๋ํ ๋ชจ๋ธ ๋๋ ํ ๋ฆฌ ๊ฒฝ๋ก |
| repo_id: Hugging Face ๋ฆฌํฌ์งํ ๋ฆฌ ID (username/model-name) |
| token: Hugging Face API ํ ํฐ (None์ด๋ฉด ํ๊ฒฝ๋ณ์ ๋๋ CLI ๋ก๊ทธ์ธ ์ฌ์ฉ) |
| private: Private ๋ฆฌํฌ์งํ ๋ฆฌ๋ก ์์ฑํ ์ง ์ฌ๋ถ |
| commit_message: ์ปค๋ฐ ๋ฉ์์ง |
| """ |
| |
| print("\n" + "="*80) |
| print(" Hugging Face Hub ์
๋ก๋") |
| print("="*80) |
| print(f"๐ ๋ชจ๋ธ ๋๋ ํ ๋ฆฌ: {model_dir}") |
| print(f"๐ฏ ๋ฆฌํฌ์งํ ๋ฆฌ: {repo_id}") |
| print(f"๐ ๊ณต๊ฐ ์ฌ๋ถ: {'Private' if private else 'Public'}") |
| print("="*80 + "\n") |
| |
| |
| api = HfApi(token=token) |
| |
| |
| print("1๏ธโฃ ๋ฆฌํฌ์งํ ๋ฆฌ ์์ฑ ์ค...") |
| try: |
| repo_url = create_repo( |
| repo_id=repo_id, |
| token=token, |
| private=private, |
| exist_ok=True, |
| repo_type="model" |
| ) |
| print(f"โ ๋ฆฌํฌ์งํ ๋ฆฌ: {repo_url}\n") |
| except Exception as e: |
| print(f"โ ๏ธ ๋ฆฌํฌ์งํ ๋ฆฌ๊ฐ ์ด๋ฏธ ์กด์ฌํ๊ฑฐ๋ ์ค๋ฅ ๋ฐ์: {e}\n") |
| |
| |
| print("2๏ธโฃ ๋ชจ๋ธ ํ์ผ ์
๋ก๋ ์ค...") |
| print(" โฑ๏ธ ์ด ์์
์ ์๊ฐ์ด ๊ฑธ๋ฆฝ๋๋ค (๋ชจ๋ธ ํฌ๊ธฐ: ~20GB)...\n") |
| |
| try: |
| upload_folder( |
| repo_id=repo_id, |
| folder_path=model_dir, |
| token=token, |
| commit_message=commit_message, |
| repo_type="model" |
| ) |
| print("โ ์
๋ก๋ ์๋ฃ!\n") |
| except Exception as e: |
| print(f"โ ์
๋ก๋ ์คํจ: {e}") |
| raise |
| |
| |
| print("="*80) |
| print("โ
์
๋ก๋ ์ฑ๊ณต!") |
| print("="*80) |
| print(f"\n๐ ๋ชจ๋ธ ํ์ด์ง: https://huggingface.co/{repo_id}") |
| print(f"๐ ์ฌ์ฉ ๋ฐฉ๋ฒ:") |
| print(f""" |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| |
| model = AutoModelForCausalLM.from_pretrained("{repo_id}") |
| tokenizer = AutoTokenizer.from_pretrained("{repo_id}") |
| """) |
| print("="*80 + "\n") |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="EEVE ๋ชจ๋ธ์ Hugging Face Hub์ ์
๋ก๋", |
| formatter_class=argparse.RawDescriptionHelpFormatter, |
| epilog=""" |
| ์ฌ์ฉ ์์: |
| |
| # ๊ธฐ๋ณธ ์ฌ์ฉ (public) |
| python upload_to_hf.py --repo-id username/model-name |
| |
| # Private ๋ฆฌํฌ์งํ ๋ฆฌ๋ก ์
๋ก๋ |
| python upload_to_hf.py --repo-id username/model-name --private |
| |
| # ํ ํฐ ์ง์ ์ง์ |
| python upload_to_hf.py --repo-id username/model-name --token hf_xxxxx |
| |
| # ๋ค๋ฅธ ๋๋ ํ ๋ฆฌ์์ ์
๋ก๋ |
| python upload_to_hf.py --repo-id username/model-name --model-dir /path/to/model |
| |
| ์ ์ฒด ์ต์
: |
| python upload_to_hf.py \\ |
| --repo-id MyeongHo0621/EEVE-Korean-Custom-10.8B \\ |
| --model-dir /home/work/tesseract/eeve_hf_upload \\ |
| --private \\ |
| --commit-message "Initial upload: checkpoint-500 merged" |
| |
| ์ฃผ์์ฌํญ: |
| 1. ๋จผ์ Hugging Face์ ๋ก๊ทธ์ธํด์ผ ํฉ๋๋ค: |
| huggingface-cli login |
| ๋๋ |
| hf auth login |
| |
| 2. ํ ํฐ์ Write ๊ถํ์ด ์์ด์ผ ํฉ๋๋ค: |
| https://huggingface.co/settings/tokens |
| |
| 3. ์
๋ก๋ ์๊ฐ: ~20-30๋ถ (๋คํธ์ํฌ ์๋์ ๋ฐ๋ผ) |
| """ |
| ) |
| |
| parser.add_argument( |
| "--repo-id", |
| type=str, |
| required=True, |
| help="Hugging Face ๋ฆฌํฌ์งํ ๋ฆฌ ID (username/model-name)" |
| ) |
| |
| parser.add_argument( |
| "--model-dir", |
| type=str, |
| default="/home/work/tesseract/eeve_hf_upload", |
| help="์
๋ก๋ํ ๋ชจ๋ธ ๋๋ ํ ๋ฆฌ (๊ธฐ๋ณธ: eeve_hf_upload)" |
| ) |
| |
| parser.add_argument( |
| "--token", |
| type=str, |
| default=None, |
| help="Hugging Face API ํ ํฐ (์ ํ, ์์ผ๋ฉด CLI ๋ก๊ทธ์ธ ์ฌ์ฉ)" |
| ) |
| |
| parser.add_argument( |
| "--private", |
| action="store_true", |
| help="Private ๋ฆฌํฌ์งํ ๋ฆฌ๋ก ์์ฑ" |
| ) |
| |
| parser.add_argument( |
| "--commit-message", |
| type=str, |
| default="Upload EEVE Korean Custom model (checkpoint-500 merged)", |
| help="์ปค๋ฐ ๋ฉ์์ง" |
| ) |
| |
| args = parser.parse_args() |
| |
| |
| model_dir = Path(args.model_dir) |
| if not model_dir.exists(): |
| print(f"โ ์ค๋ฅ: ๋ชจ๋ธ ๋๋ ํ ๋ฆฌ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค: {model_dir}") |
| return 1 |
| |
| |
| required_files = ["config.json", "tokenizer_config.json"] |
| missing_files = [f for f in required_files if not (model_dir / f).exists()] |
| if missing_files: |
| print(f"โ ์ค๋ฅ: ํ์ ํ์ผ์ด ์์ต๋๋ค: {missing_files}") |
| return 1 |
| |
| |
| try: |
| upload_model_to_hub( |
| model_dir=str(model_dir), |
| repo_id=args.repo_id, |
| token=args.token, |
| private=args.private, |
| commit_message=args.commit_message |
| ) |
| print("โ
๋ชจ๋ ์์
์ด ์๋ฃ๋์์ต๋๋ค!") |
| return 0 |
| |
| except Exception as e: |
| print(f"\nโ ์ค๋ฅ ๋ฐ์: {e}") |
| import traceback |
| traceback.print_exc() |
| return 1 |
|
|
|
|
| if __name__ == "__main__": |
| exit(main()) |
|
|
|
|