File size: 5,381 Bytes
b84d85a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | #!/usr/bin/env python3
"""
Codsworth Hugging Face Upload Script
This script uploads the Codsworth model to Hugging Face Hub.
Requires: pip install huggingface_hub
"""
import os
import json
import argparse
from pathlib import Path
try:
from huggingface_hub import HfApi, create_repo, upload_folder
except ImportError:
print("Error: huggingface_hub not installed.")
print("Install with: pip install huggingface_hub")
exit(1)
def create_model_card():
"""Create a Model Card for Hugging Face."""
model_card = """---
license: mit
tags:
- transformer
- language-model
- pytorch
- decoder-only
- rope
datasets:
- wikitext
---
# Codsworth
A transformer language model built from scratch in PyTorch.
## Model Details
- **Architecture**: GPT-style decoder-only transformer with RoPE (Rotary Position Embedding)
- **Parameters**: ~3.9M (Tiny config)
- **Framework**: PyTorch
### Architecture Details
- 2 transformer layers
- 256 embedding dimension
- 4 attention heads
- 512 FFN hidden dimension
- SwiGLU activation
- RoPE positional encoding
## Usage
```python
from transformers import AutoModel, AutoTokenizer
import torch
model = AutoModel.from_pretrained("your-username/codsworth")
tokenizer = AutoTokenizer.from_pretrained("your-username/codsworth")
inputs = tokenizer("Hello world", return_tensors="pt")
outputs = model(**inputs)
```
## Training
- Trained on WikiText subset
- Optimizer: AdamW (lr=1e-4)
- Loss: Cross-entropy
## Limitations
- Small vocabulary (~5000 words)
- Limited training data
- Not suitable for production use
## Citation
```bibtex
@misc{codsworth2026,
title={Codsworth: A Transformer Language Model Implementation},
author={Shanahan, Jaq R.},
year={2026},
institution={Hermes Research Center}
}
```
"""
return model_card
def upload_to_huggingface(
model_path: str = "codsworth_model.pt",
tokenizer_path: str = "tokenizer.json",
config_path: str = "config.json",
repo_id: str = None,
token: str = None,
private: bool = False,
):
"""
Upload Codsworth to Hugging Face Hub.
Args:
model_path: Path to model weights (.pt file)
tokenizer_path: Path to tokenizer JSON
config_path: Path to config JSON
repo_id: Hugging Face repo ID (e.g., "username/codsworth")
token: Hugging Face API token (or set HF_TOKEN env var)
private: Whether to create a private repo
"""
# Get token from environment if not provided
if token is None:
token = os.environ.get("HF_TOKEN")
if token is None:
print("Error: No token provided.")
print("Set HF_TOKEN environment variable or pass --token")
print("Get token from: https://huggingface.co/settings/tokens")
return
if repo_id is None:
print("Error: Please specify --repo-id (e.g., 'username/codsworth')")
return
# Check files exist
files_to_upload = []
for f in [model_path, tokenizer_path, config_path]:
if os.path.exists(f):
files_to_upload.append(f)
else:
print(f"Warning: {f} not found, skipping")
if not files_to_upload:
print("Error: No files found to upload!")
return
print(f"\nUploading to https://huggingface.co/{repo_id}")
print(f"Files: {files_to_upload}")
# Initialize API
api = HfApi(token=token)
# Create repo if needed
try:
create_repo(repo_id, token=token, private=private, exist_ok=True)
print(f"Repository created/verified: {repo_id}")
except Exception as e:
print(f"Error creating repo: {e}")
return
# Create model card
model_card_path = "README.md"
with open(model_card_path, "w") as f:
f.write(create_model_card())
print(f"Created model card: {model_card_path}")
# Upload model files
try:
api.upload_folder(
folder_path=".",
repo_id=repo_id,
repo_type="model",
commit_message="Initial upload of Codsworth model",
)
print(f"\n✓ Successfully uploaded to https://huggingface.co/{repo_id}")
except Exception as e:
print(f"Error uploading: {e}")
print("\nAlternative: Manually upload via web interface")
print(f"1. Go to https://huggingface.co/new?repo={repo_id}")
print(f"2. Drag and drop these files: {files_to_upload}")
def main():
parser = argparse.ArgumentParser(description="Upload Codsworth to Hugging Face")
parser.add_argument("--repo-id", type=str, default="Jaqshanahan/codsworth-3.8m",
help="Repo ID (e.g., 'username/codsworth')")
parser.add_argument("--model", type=str, default="codsworth_model.pt",
help="Model file path")
parser.add_argument("--token", type=str, default=None,
help="Hugging Face token (or set HF_TOKEN)")
parser.add_argument("--private", action="store_true",
help="Create private repository")
args = parser.parse_args()
print("=" * 50)
print("Codsworth Hugging Face Uploader")
print("=" * 50)
upload_to_huggingface(
model_path=args.model,
repo_id=args.repo_id,
token=args.token,
private=args.private,
)
if __name__ == "__main__":
main() |