#!/usr/bin/env python3 """ Example: Loading Codsworth from Hugging Face Hub After uploading to Hugging Face, users can load the model using this script. """ # ======================== # QUICK START (After Upload) # ======================== """ # Option 1: Using Hugging Face Transformers (if converted) from transformers import AutoModel, AutoTokenizer model_name = "your-username/codsworth" # Change to your username tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Generate inputs = tokenizer("Hello world", return_tensors="pt") outputs = model(**inputs) # Option 2: Using Codsworth directly (recommended for this implementation) import torch import json # Download from HF from huggingface_hub import hf_hub_download config_path = hf_hub_download(repo_id="your-username/codsworth", filename="config.json") model_path = hf_hub_download(repo_id="your-username/codsworth", filename="codsworth_model.pt") tokenizer_path = hf_hub_download(repo_id="your-username/codsworth", filename="tokenizer.json") # Load locally import sys sys.path.insert(0, '/path/to/codsworth') from codsworth.config import CodsworthConfig from codsworth.model import CodsworthTransformer with open(config_path) as f: cfg = json.load(f)["model"] config = CodsworthConfig(**cfg) model = CodsworthTransformer(config) model.load_state_dict(torch.load(model_path, map_location="cpu")) model.eval() with open(tokenizer_path) as f: vocab = json.load(f) id_to_word = {v: k for k, v in vocab.items()} # Generate def generate(prompt): words = prompt.lower().split() ids = [vocab.get(w, vocab[""]) for w in words] for _ in range(50): inp = ids[-128:] + [0] * max(0, 128-len(ids)) with torch.no_grad(): logits = model(torch.tensor([inp]))["logits"] next_tok = torch.multinomial(torch.softmax(logits[0,-1], -1), 1).item() ids.append(next_tok) return " ".join(id_to_word.get(t, "") for t in ids) print(generate("hello")) """ # ======================== # FULL EXAMPLE SCRIPT # ======================== import argparse import json import os import sys import torch # Check for transformers try: from transformers import AutoTokenizer HAS_TRANSFORMERS = True except ImportError: HAS_TRANSFORMERS = False try: from huggingface_hub import hf_hub_download HAS_HF_HUB = True except ImportError: HAS_HF_HUB = False def load_from_huggingface( repo_id: str = "your-username/codsworth", device: str = "cpu", ): """ Load Codsworth model from Hugging Face Hub. Args: repo_id: Your HF repo ID (e.g., "jaqrshanahan/codsworth") device: "cpu" or "cuda" Returns: model, vocab, id_to_word, config """ if not HAS_HF_HUB: print("Installing huggingface_hub...") os.system("pip install huggingface_hub") from huggingface_hub import hf_hub_download print(f"Downloading from https://huggingface.co/{repo_id}") # Download files config_path = hf_hub_download(repo_id=repo_id, filename="config.json") model_path = hf_hub_download(repo_id=repo_id, filename="codsworth_model.pt") tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.json") print("Files downloaded!") # Load config with open(config_path) as f: config_data = json.load(f) # Add to path (adjust path to your local codsworth) sys.path.insert(0, ".") from codsworth.config import CodsworthConfig from codsworth.model import CodsworthTransformer # Create config model_cfg = config_data["model"] config = CodsworthConfig( vocab_size=model_cfg["vocab_size"], context_length=model_cfg["context_length"], embedding_dim=model_cfg["embedding_dim"], num_layers=model_cfg["num_layers"], num_heads=model_cfg["num_heads"], ffn_hidden_dim=model_cfg["ffn_hidden_dim"], use_rope=model_cfg["use_rope"], rope_theta=model_cfg["rope_theta"], ) # Load model model = CodsworthTransformer(config) model.load_state_dict(torch.load(model_path, map_location=device)) model.to(device) model.eval() # Load tokenizer with open(tokenizer_path) as f: vocab = json.load(f) id_to_word = {v: k for k, v in vocab.items()} print(f"Loaded! Parameters: {model.get_num_params():,}") return model, vocab, id_to_word, config def generate_text( model, vocab, id_to_word, prompt: str, max_tokens: int = 50, temperature: float = 1.0, device: str = "cpu", ) -> str: """Generate text from prompt.""" words = prompt.lower().split() ids = [vocab.get(w, vocab[""]) for w in words] for _ in range(max_tokens): input_seq = ids[-model.config.context_length:] if len(input_seq) < model.config.context_length: input_seq = [vocab[""]] * (model.config.context_length - len(input_seq)) + input_seq with torch.no_grad(): inp = torch.tensor([input_seq], dtype=torch.long).to(device) logits = model(inp)["logits"] next_logits = logits[0, -1, :] / temperature probs = torch.softmax(next_logits, dim=-1) next_token = torch.multinomial(probs, 1).item() ids.append(next_token) if next_token == vocab.get("", 2): break return " ".join(id_to_word.get(t, "") for t in ids) def main(): parser = argparse.ArgumentParser(description="Load Codsworth from Hugging Face") parser.add_argument("--repo", type=str, default="your-username/codsworth", help="Hugging Face repo ID") parser.add_argument("--prompt", type=str, default="hello world", help="Prompt for generation") parser.add_argument("--tokens", type=int, default=50, help="Max tokens to generate") parser.add_argument("--temp", type=float, default=1.0, help="Temperature") parser.add_argument("--cuda", action="store_true", help="Use GPU") args = parser.parse_args() device = "cuda" if args.cuda and torch.cuda.is_available() else "cpu" print("=" * 50) print("Loading Codsworth from Hugging Face Hub") print("=" * 50) model, vocab, id_to_word, config = load_from_huggingface(args.repo, device) print(f"\nGenerating from: '{args.prompt}'") result = generate_text(model, vocab, id_to_word, args.prompt, args.tokens, device) print(f"\nResult:\n{result}") if __name__ == "__main__": main()