File size: 4,216 Bytes
b84d85a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
Load Codsworth from HuggingFace Hub - Direct Download Method
Works without needing custom HuggingFace model class
"""

import torch
import json
import sys
import os

# Add codsworth to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from codsworth.config import CodsworthConfig
from codsworth.model import CodsworthTransformer


def load_from_huggingface(
    repo_id: str = "Jaqshanahan/codsworth-3.8m",
    device: str = "cpu",
):
    """
    Load Codsworth directly from HuggingFace Hub using hf_hub_download.
    No transformers library needed for the model itself.
    """
    try:
        from huggingface_hub import hf_hub_download
    except ImportError:
        print("Install: pip install huggingface_hub")
        return None, None, None, None
    
    print(f"Downloading from https://huggingface.co/{repo_id}")
    
    # Download files
    config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
    model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin")
    tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
    
    print("Files downloaded!")
    
    # Load config
    with open(config_path) as f:
        cfg = json.load(f)
    
    model_cfg = cfg["model"]
    
    # Create Codsworth config
    config = CodsworthConfig(
        vocab_size=model_cfg["vocab_size"],
        context_length=model_cfg["context_length"],
        embedding_dim=model_cfg["embedding_dim"],
        num_layers=model_cfg["num_layers"],
        num_heads=model_cfg["num_heads"],
        head_dim=model_cfg["head_dim"],
        ffn_hidden_dim=model_cfg["ffn_hidden_dim"],
        use_rope=model_cfg["use_rope"],
        rope_theta=model_cfg["rope_theta"],
        use_flash_attention=False,
        use_gradient_checkpointing=False,
        dropout=model_cfg["dropout"],
    )
    
    # Create model
    model = CodsworthTransformer(config)
    
    # Load weights
    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()
    
    # Load tokenizer
    with open(tokenizer_path) as f:
        vocab = json.load(f)
    id_to_word = {v: k for k, v in vocab.items()}
    
    print(f"Model loaded! Parameters: {model.get_num_params():,}")
    
    return model, vocab, id_to_word, config


def generate(
    model,
    vocab,
    id_to_word,
    prompt: str,
    max_tokens: int = 50,
    temperature: float = 1.0,
    device: str = "cpu",
) -> str:
    """Generate text from prompt"""
    
    model.eval()
    words = prompt.lower().split()
    prompt_ids = [vocab.get(w, vocab.get("<unk>", 1)) for w in words]
    
    for _ in range(max_tokens):
        input_seq = prompt_ids[-model.config.context_length:]
        if len(input_seq) < model.config.context_length:
            input_seq = [vocab["<pad>"]] * (model.config.context_length - len(input_seq)) + input_seq
        
        with torch.no_grad():
            inp = torch.tensor([input_seq], dtype=torch.long).to(device)
            logits = model(inp)["logits"]
            next_logits = logits[0, -1, :] / temperature
            probs = torch.softmax(next_logits, dim=-1)
            next_token = torch.multinomial(probs, 1).item()
        
        prompt_ids.append(next_token)
        
        if next_token == vocab.get("<eos>", 3):
            break
    
    return " ".join(id_to_word.get(t, "<unk>") for t in prompt_ids)


if __name__ == "__main__":
    import argparse
    
    parser = argparse.ArgumentParser(description="Load Codsworth from HF")
    parser.add_argument("--repo", default="Jaqshanahan/codsworth-3.8m")
    parser.add_argument("--prompt", default="hello world")
    parser.add_argument("--tokens", type=int, default=30)
    parser.add_argument("--cuda", action="store_true")
    args = parser.parse_args()
    
    device = "cuda" if args.cuda and torch.cuda.is_available() else "cpu"
    
    model, vocab, id_to_word, _ = load_from_huggingface(args.repo, device)
    
    if model:
        print(f"\nGenerating from: '{args.prompt}'")
        result = generate(model, vocab, id_to_word, args.prompt, args.tokens, device)
        print(f"\n{result}")