File size: 4,216 Bytes

b84d85a

"""
Load Codsworth from HuggingFace Hub - Direct Download Method
Works without needing custom HuggingFace model class
"""

import torch
import json
import sys
import os

# Add codsworth to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from codsworth.config import CodsworthConfig
from codsworth.model import CodsworthTransformer


def load_from_huggingface(
    repo_id: str = "Jaqshanahan/codsworth-3.8m",
    device: str = "cpu",
):
    """
    Load Codsworth directly from HuggingFace Hub using hf_hub_download.
    No transformers library needed for the model itself.
    """
    try:
        from huggingface_hub import hf_hub_download
    except ImportError:
        print("Install: pip install huggingface_hub")
        return None, None, None, None
    
    print(f"Downloading from https://huggingface.co/{repo_id}")
    
    # Download files
    config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
    model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin")
    tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
    
    print("Files downloaded!")
    
    # Load config
    with open(config_path) as f:
        cfg = json.load(f)
    
    model_cfg = cfg["model"]
    
    # Create Codsworth config
    config = CodsworthConfig(
        vocab_size=model_cfg["vocab_size"],
        context_length=model_cfg["context_length"],
        embedding_dim=model_cfg["embedding_dim"],
        num_layers=model_cfg["num_layers"],
        num_heads=model_cfg["num_heads"],
        head_dim=model_cfg["head_dim"],
        ffn_hidden_dim=model_cfg["ffn_hidden_dim"],
        use_rope=model_cfg["use_rope"],
        rope_theta=model_cfg["rope_theta"],
        use_flash_attention=False,
        use_gradient_checkpointing=False,
        dropout=model_cfg["dropout"],
    )
    
    # Create model
    model = CodsworthTransformer(config)
    
    # Load weights
    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()
    
    # Load tokenizer
    with open(tokenizer_path) as f:
        vocab = json.load(f)
    id_to_word = {v: k for k, v in vocab.items()}
    
    print(f"Model loaded! Parameters: {model.get_num_params():,}")
    
    return model, vocab, id_to_word, config


def generate(
    model,
    vocab,
    id_to_word,
    prompt: str,
    max_tokens: int = 50,
    temperature: float = 1.0,
    device: str = "cpu",
) -> str:
    """Generate text from prompt"""
    
    model.eval()
    words = prompt.lower().split()
    prompt_ids = [vocab.get(w, vocab.get("<unk>", 1)) for w in words]
    
    for _ in range(max_tokens):
        input_seq = prompt_ids[-model.config.context_length:]
        if len(input_seq) < model.config.context_length:
            input_seq = [vocab["<pad>"]] * (model.config.context_length - len(input_seq)) + input_seq
        
        with torch.no_grad():
            inp = torch.tensor([input_seq], dtype=torch.long).to(device)
            logits = model(inp)["logits"]
            next_logits = logits[0, -1, :] / temperature
            probs = torch.softmax(next_logits, dim=-1)
            next_token = torch.multinomial(probs, 1).item()
        
        prompt_ids.append(next_token)
        
        if next_token == vocab.get("<eos>", 3):
            break
    
    return " ".join(id_to_word.get(t, "<unk>") for t in prompt_ids)


if __name__ == "__main__":
    import argparse
    
    parser = argparse.ArgumentParser(description="Load Codsworth from HF")
    parser.add_argument("--repo", default="Jaqshanahan/codsworth-3.8m")
    parser.add_argument("--prompt", default="hello world")
    parser.add_argument("--tokens", type=int, default=30)
    parser.add_argument("--cuda", action="store_true")
    args = parser.parse_args()
    
    device = "cuda" if args.cuda and torch.cuda.is_available() else "cpu"
    
    model, vocab, id_to_word, _ = load_from_huggingface(args.repo, device)
    
    if model:
        print(f"\nGenerating from: '{args.prompt}'")
        result = generate(model, vocab, id_to_word, args.prompt, args.tokens, device)
        print(f"\n{result}")