File size: 4,947 Bytes

b84d85a

#!/usr/bin/env python3
"""
Prepare Codsworth for HuggingFace Upload
Creates proper HF-compatible format
"""

import json
import os
import shutil
import torch
import sys

sys.path.insert(0, '.')

from codsworth.config import CodsworthConfig
from codsworth.model import CodsworthTransformer


def prepare_for_huggingface(
    model_path: str = "codsworth_model.pt",
    tokenizer_path: str = "tokenizer.json",
    output_dir: str = "hf_upload",
):
    """Prepare files in HuggingFace format"""
    
    print(f"Preparing files in {output_dir}/")
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # 1. Copy model weights
    print("Copying model weights...")
    shutil.copy(model_path, f"{output_dir}/pytorch_model.bin")
    
    # 2. Copy tokenizer
    print("Copying tokenizer...")
    shutil.copy(tokenizer_path, f"{output_dir}/tokenizer.json")
    
    # 3. Create HuggingFace config.json
    print("Creating config.json...")
    hf_config = {
        "model_type": "codsworth",
        "architectures": ["CodsworthTransformer"],
        "vocab_size": 5004,
        "hidden_size": 256,
        "num_hidden_layers": 2,
        "num_attention_heads": 4,
        "head_dim": 64,
        "intermediate_size": 512,
        "max_position_embeddings": 128,
        "rope_theta": 10000.0,
        "use_rope": True,
        "hidden_dropout": 0.1,
        "attention_dropout": 0.0,
        "pad_token_id": 0,
        "bos_token_id": 1,
        "eos_token_id": 2,
        "torch_dtype": "float32",
        "transformers_version": "4.0.0",
    }
    
    with open(f"{output_dir}/config.json", 'w') as f:
        json.dump(hf_config, f, indent=2)
    
    # 4. Create tokenizer_config.json
    print("Creating tokenizer_config.json...")
    tokenizer_config = {
        "added_tokens_decoder": {
            "0": {"content": "<pad>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True},
            "1": {"content": "<unk>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True},
            "2": {"content": "<bos>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True},
            "3": {"content": "<eos>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True}
        },
        "bos_token": "<bos>",
        "eos_token": "<eos>",
        "pad_token": "<pad>",
        "unk_token": "<unk>",
        "model_max_length": 128,
        "tokenizer_class": "PreTrainedTokenizer",
    }
    
    with open(f"{output_dir}/tokenizer_config.json", 'w') as f:
        json.dump(tokenizer_config, f, indent=2)
    
    # 5. Create README.md (model card)
    print("Creating README.md...")
    readme = """---
license: mit
tags:
- transformer
- language-model
- pytorch
- decoder-only
---

# Codsworth

A small transformer language model built from scratch in PyTorch.

## Model Details

- **Parameters**: ~3.9M
- **Architecture**: GPT-style decoder-only transformer
- **Position Encoding**: RoPE (Rotary Position Embedding)
- **Activation**: SwiGLU

## Usage

```python
import torch
import json
from huggingface_hub import hf_hub_download

# Download files
model_path = hf_hub_download(repo_id="your-username/codsworth", filename="pytorch_model.bin")
tokenizer_path = hf_hub_download(repo_id="your-username/codsworth", filename="tokenizer.json")
config_path = hf_hub_download(repo_id="your-username/codsworth", filename="config.json")

# Load using codsworth library
import sys
sys.path.insert(0, 'path/to/codsworth')
from codsworth.config import CodsworthConfig
from codsworth.model import CodsworthTransformer

with open(config_path) as f:
    cfg = json.load(f)

model_cfg = cfg["model"]
config = CodsworthConfig(**model_cfg)
model = CodsworthTransformer(config)
model.load_state_dict(torch.load(model_path))

# Generate
input_ids = torch.tensor([[1, 2, 3]])
output = model.generate(input_ids, max_new_tokens=20)
print(output)
```
"""
    
    with open(f"{output_dir}/README.md", 'w') as f:
        f.write(readme)
    
    print(f"\n✓ Files prepared in {output_dir}/")
    print("\nFiles:")
    for f in os.listdir(output_dir):
        size = os.path.getsize(f"{output_dir}/{f}")
        print(f"  - {f} ({size/1024:.1f} KB)")
    
    print("\n" + "=" * 50)
    print("To upload to HuggingFace:")
    print("=" * 50)
    print(f"""
Option 1 - Using huggingface-cli:
  huggingface-cli upload-repo your-username/codsworth

Option 2 - Using Python:
  pip install huggingface_hub
  python -c "
  from huggingface_hub import HfApi
  api = HfApi()
  api.upload_folder(
      folder_path='{output_dir}',
      repo_id='your-username/codsworth',
      repo_type='model'
  )
  "

Option 3 - Manual:
  1. Go to https://huggingface.co/new
  2. Create repo 'codsworth'
  3. Upload files from {output_dir}/
""")


if __name__ == "__main__":
    prepare_for_huggingface()