""" Load Codsworth from HuggingFace Hub - Direct Download Method Works without needing custom HuggingFace model class """ import torch import json import sys import os # Add codsworth to path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from codsworth.config import CodsworthConfig from codsworth.model import CodsworthTransformer def load_from_huggingface( repo_id: str = "Jaqshanahan/codsworth-3.8m", device: str = "cpu", ): """ Load Codsworth directly from HuggingFace Hub using hf_hub_download. No transformers library needed for the model itself. """ try: from huggingface_hub import hf_hub_download except ImportError: print("Install: pip install huggingface_hub") return None, None, None, None print(f"Downloading from https://huggingface.co/{repo_id}") # Download files config_path = hf_hub_download(repo_id=repo_id, filename="config.json") model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin") tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.json") print("Files downloaded!") # Load config with open(config_path) as f: cfg = json.load(f) model_cfg = cfg["model"] # Create Codsworth config config = CodsworthConfig( vocab_size=model_cfg["vocab_size"], context_length=model_cfg["context_length"], embedding_dim=model_cfg["embedding_dim"], num_layers=model_cfg["num_layers"], num_heads=model_cfg["num_heads"], head_dim=model_cfg["head_dim"], ffn_hidden_dim=model_cfg["ffn_hidden_dim"], use_rope=model_cfg["use_rope"], rope_theta=model_cfg["rope_theta"], use_flash_attention=False, use_gradient_checkpointing=False, dropout=model_cfg["dropout"], ) # Create model model = CodsworthTransformer(config) # Load weights state_dict = torch.load(model_path, map_location=device) model.load_state_dict(state_dict) model.to(device) model.eval() # Load tokenizer with open(tokenizer_path) as f: vocab = json.load(f) id_to_word = {v: k for k, v in vocab.items()} print(f"Model loaded! Parameters: {model.get_num_params():,}") return model, vocab, id_to_word, config def generate( model, vocab, id_to_word, prompt: str, max_tokens: int = 50, temperature: float = 1.0, device: str = "cpu", ) -> str: """Generate text from prompt""" model.eval() words = prompt.lower().split() prompt_ids = [vocab.get(w, vocab.get("", 1)) for w in words] for _ in range(max_tokens): input_seq = prompt_ids[-model.config.context_length:] if len(input_seq) < model.config.context_length: input_seq = [vocab[""]] * (model.config.context_length - len(input_seq)) + input_seq with torch.no_grad(): inp = torch.tensor([input_seq], dtype=torch.long).to(device) logits = model(inp)["logits"] next_logits = logits[0, -1, :] / temperature probs = torch.softmax(next_logits, dim=-1) next_token = torch.multinomial(probs, 1).item() prompt_ids.append(next_token) if next_token == vocab.get("", 3): break return " ".join(id_to_word.get(t, "") for t in prompt_ids) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Load Codsworth from HF") parser.add_argument("--repo", default="Jaqshanahan/codsworth-3.8m") parser.add_argument("--prompt", default="hello world") parser.add_argument("--tokens", type=int, default=30) parser.add_argument("--cuda", action="store_true") args = parser.parse_args() device = "cuda" if args.cuda and torch.cuda.is_available() else "cpu" model, vocab, id_to_word, _ = load_from_huggingface(args.repo, device) if model: print(f"\nGenerating from: '{args.prompt}'") result = generate(model, vocab, id_to_word, args.prompt, args.tokens, device) print(f"\n{result}")