| |
| """ |
| Prepare Codsworth for HuggingFace Upload |
| Creates proper HF-compatible format |
| """ |
|
|
| import json |
| import os |
| import shutil |
| import torch |
| import sys |
|
|
| sys.path.insert(0, '.') |
|
|
| from codsworth.config import CodsworthConfig |
| from codsworth.model import CodsworthTransformer |
|
|
|
|
| def prepare_for_huggingface( |
| model_path: str = "codsworth_model.pt", |
| tokenizer_path: str = "tokenizer.json", |
| output_dir: str = "hf_upload", |
| ): |
| """Prepare files in HuggingFace format""" |
| |
| print(f"Preparing files in {output_dir}/") |
| |
| |
| os.makedirs(output_dir, exist_ok=True) |
| |
| |
| print("Copying model weights...") |
| shutil.copy(model_path, f"{output_dir}/pytorch_model.bin") |
| |
| |
| print("Copying tokenizer...") |
| shutil.copy(tokenizer_path, f"{output_dir}/tokenizer.json") |
| |
| |
| print("Creating config.json...") |
| hf_config = { |
| "model_type": "codsworth", |
| "architectures": ["CodsworthTransformer"], |
| "vocab_size": 5004, |
| "hidden_size": 256, |
| "num_hidden_layers": 2, |
| "num_attention_heads": 4, |
| "head_dim": 64, |
| "intermediate_size": 512, |
| "max_position_embeddings": 128, |
| "rope_theta": 10000.0, |
| "use_rope": True, |
| "hidden_dropout": 0.1, |
| "attention_dropout": 0.0, |
| "pad_token_id": 0, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "torch_dtype": "float32", |
| "transformers_version": "4.0.0", |
| } |
| |
| with open(f"{output_dir}/config.json", 'w') as f: |
| json.dump(hf_config, f, indent=2) |
| |
| |
| print("Creating tokenizer_config.json...") |
| tokenizer_config = { |
| "added_tokens_decoder": { |
| "0": {"content": "<pad>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True}, |
| "1": {"content": "<unk>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True}, |
| "2": {"content": "<bos>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True}, |
| "3": {"content": "<eos>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True} |
| }, |
| "bos_token": "<bos>", |
| "eos_token": "<eos>", |
| "pad_token": "<pad>", |
| "unk_token": "<unk>", |
| "model_max_length": 128, |
| "tokenizer_class": "PreTrainedTokenizer", |
| } |
| |
| with open(f"{output_dir}/tokenizer_config.json", 'w') as f: |
| json.dump(tokenizer_config, f, indent=2) |
| |
| |
| print("Creating README.md...") |
| readme = """--- |
| license: mit |
| tags: |
| - transformer |
| - language-model |
| - pytorch |
| - decoder-only |
| --- |
| |
| # Codsworth |
| |
| A small transformer language model built from scratch in PyTorch. |
| |
| ## Model Details |
| |
| - **Parameters**: ~3.9M |
| - **Architecture**: GPT-style decoder-only transformer |
| - **Position Encoding**: RoPE (Rotary Position Embedding) |
| - **Activation**: SwiGLU |
| |
| ## Usage |
| |
| ```python |
| import torch |
| import json |
| from huggingface_hub import hf_hub_download |
| |
| # Download files |
| model_path = hf_hub_download(repo_id="your-username/codsworth", filename="pytorch_model.bin") |
| tokenizer_path = hf_hub_download(repo_id="your-username/codsworth", filename="tokenizer.json") |
| config_path = hf_hub_download(repo_id="your-username/codsworth", filename="config.json") |
| |
| # Load using codsworth library |
| import sys |
| sys.path.insert(0, 'path/to/codsworth') |
| from codsworth.config import CodsworthConfig |
| from codsworth.model import CodsworthTransformer |
| |
| with open(config_path) as f: |
| cfg = json.load(f) |
| |
| model_cfg = cfg["model"] |
| config = CodsworthConfig(**model_cfg) |
| model = CodsworthTransformer(config) |
| model.load_state_dict(torch.load(model_path)) |
| |
| # Generate |
| input_ids = torch.tensor([[1, 2, 3]]) |
| output = model.generate(input_ids, max_new_tokens=20) |
| print(output) |
| ``` |
| """ |
| |
| with open(f"{output_dir}/README.md", 'w') as f: |
| f.write(readme) |
| |
| print(f"\n✓ Files prepared in {output_dir}/") |
| print("\nFiles:") |
| for f in os.listdir(output_dir): |
| size = os.path.getsize(f"{output_dir}/{f}") |
| print(f" - {f} ({size/1024:.1f} KB)") |
| |
| print("\n" + "=" * 50) |
| print("To upload to HuggingFace:") |
| print("=" * 50) |
| print(f""" |
| Option 1 - Using huggingface-cli: |
| huggingface-cli upload-repo your-username/codsworth |
| |
| Option 2 - Using Python: |
| pip install huggingface_hub |
| python -c " |
| from huggingface_hub import HfApi |
| api = HfApi() |
| api.upload_folder( |
| folder_path='{output_dir}', |
| repo_id='your-username/codsworth', |
| repo_type='model' |
| ) |
| " |
| |
| Option 3 - Manual: |
| 1. Go to https://huggingface.co/new |
| 2. Create repo 'codsworth' |
| 3. Upload files from {output_dir}/ |
| """) |
|
|
|
|
| if __name__ == "__main__": |
| prepare_for_huggingface() |