codsworth-3.8m / codsworth /scripts /prepare_hf.py
Jaqshanahan's picture
Initial upload of Codsworth model
b84d85a verified
#!/usr/bin/env python3
"""
Prepare Codsworth for HuggingFace Upload
Creates proper HF-compatible format
"""
import json
import os
import shutil
import torch
import sys
sys.path.insert(0, '.')
from codsworth.config import CodsworthConfig
from codsworth.model import CodsworthTransformer
def prepare_for_huggingface(
model_path: str = "codsworth_model.pt",
tokenizer_path: str = "tokenizer.json",
output_dir: str = "hf_upload",
):
"""Prepare files in HuggingFace format"""
print(f"Preparing files in {output_dir}/")
# Create output directory
os.makedirs(output_dir, exist_ok=True)
# 1. Copy model weights
print("Copying model weights...")
shutil.copy(model_path, f"{output_dir}/pytorch_model.bin")
# 2. Copy tokenizer
print("Copying tokenizer...")
shutil.copy(tokenizer_path, f"{output_dir}/tokenizer.json")
# 3. Create HuggingFace config.json
print("Creating config.json...")
hf_config = {
"model_type": "codsworth",
"architectures": ["CodsworthTransformer"],
"vocab_size": 5004,
"hidden_size": 256,
"num_hidden_layers": 2,
"num_attention_heads": 4,
"head_dim": 64,
"intermediate_size": 512,
"max_position_embeddings": 128,
"rope_theta": 10000.0,
"use_rope": True,
"hidden_dropout": 0.1,
"attention_dropout": 0.0,
"pad_token_id": 0,
"bos_token_id": 1,
"eos_token_id": 2,
"torch_dtype": "float32",
"transformers_version": "4.0.0",
}
with open(f"{output_dir}/config.json", 'w') as f:
json.dump(hf_config, f, indent=2)
# 4. Create tokenizer_config.json
print("Creating tokenizer_config.json...")
tokenizer_config = {
"added_tokens_decoder": {
"0": {"content": "<pad>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True},
"1": {"content": "<unk>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True},
"2": {"content": "<bos>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True},
"3": {"content": "<eos>", "single_word": False, "lstrip": False, "rstrip": False, "normalized": False, "special": True}
},
"bos_token": "<bos>",
"eos_token": "<eos>",
"pad_token": "<pad>",
"unk_token": "<unk>",
"model_max_length": 128,
"tokenizer_class": "PreTrainedTokenizer",
}
with open(f"{output_dir}/tokenizer_config.json", 'w') as f:
json.dump(tokenizer_config, f, indent=2)
# 5. Create README.md (model card)
print("Creating README.md...")
readme = """---
license: mit
tags:
- transformer
- language-model
- pytorch
- decoder-only
---
# Codsworth
A small transformer language model built from scratch in PyTorch.
## Model Details
- **Parameters**: ~3.9M
- **Architecture**: GPT-style decoder-only transformer
- **Position Encoding**: RoPE (Rotary Position Embedding)
- **Activation**: SwiGLU
## Usage
```python
import torch
import json
from huggingface_hub import hf_hub_download
# Download files
model_path = hf_hub_download(repo_id="your-username/codsworth", filename="pytorch_model.bin")
tokenizer_path = hf_hub_download(repo_id="your-username/codsworth", filename="tokenizer.json")
config_path = hf_hub_download(repo_id="your-username/codsworth", filename="config.json")
# Load using codsworth library
import sys
sys.path.insert(0, 'path/to/codsworth')
from codsworth.config import CodsworthConfig
from codsworth.model import CodsworthTransformer
with open(config_path) as f:
cfg = json.load(f)
model_cfg = cfg["model"]
config = CodsworthConfig(**model_cfg)
model = CodsworthTransformer(config)
model.load_state_dict(torch.load(model_path))
# Generate
input_ids = torch.tensor([[1, 2, 3]])
output = model.generate(input_ids, max_new_tokens=20)
print(output)
```
"""
with open(f"{output_dir}/README.md", 'w') as f:
f.write(readme)
print(f"\n✓ Files prepared in {output_dir}/")
print("\nFiles:")
for f in os.listdir(output_dir):
size = os.path.getsize(f"{output_dir}/{f}")
print(f" - {f} ({size/1024:.1f} KB)")
print("\n" + "=" * 50)
print("To upload to HuggingFace:")
print("=" * 50)
print(f"""
Option 1 - Using huggingface-cli:
huggingface-cli upload-repo your-username/codsworth
Option 2 - Using Python:
pip install huggingface_hub
python -c "
from huggingface_hub import HfApi
api = HfApi()
api.upload_folder(
folder_path='{output_dir}',
repo_id='your-username/codsworth',
repo_type='model'
)
"
Option 3 - Manual:
1. Go to https://huggingface.co/new
2. Create repo 'codsworth'
3. Upload files from {output_dir}/
""")
if __name__ == "__main__":
prepare_for_huggingface()