File size: 4,216 Bytes
b84d85a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | """
Load Codsworth from HuggingFace Hub - Direct Download Method
Works without needing custom HuggingFace model class
"""
import torch
import json
import sys
import os
# Add codsworth to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from codsworth.config import CodsworthConfig
from codsworth.model import CodsworthTransformer
def load_from_huggingface(
repo_id: str = "Jaqshanahan/codsworth-3.8m",
device: str = "cpu",
):
"""
Load Codsworth directly from HuggingFace Hub using hf_hub_download.
No transformers library needed for the model itself.
"""
try:
from huggingface_hub import hf_hub_download
except ImportError:
print("Install: pip install huggingface_hub")
return None, None, None, None
print(f"Downloading from https://huggingface.co/{repo_id}")
# Download files
config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin")
tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
print("Files downloaded!")
# Load config
with open(config_path) as f:
cfg = json.load(f)
model_cfg = cfg["model"]
# Create Codsworth config
config = CodsworthConfig(
vocab_size=model_cfg["vocab_size"],
context_length=model_cfg["context_length"],
embedding_dim=model_cfg["embedding_dim"],
num_layers=model_cfg["num_layers"],
num_heads=model_cfg["num_heads"],
head_dim=model_cfg["head_dim"],
ffn_hidden_dim=model_cfg["ffn_hidden_dim"],
use_rope=model_cfg["use_rope"],
rope_theta=model_cfg["rope_theta"],
use_flash_attention=False,
use_gradient_checkpointing=False,
dropout=model_cfg["dropout"],
)
# Create model
model = CodsworthTransformer(config)
# Load weights
state_dict = torch.load(model_path, map_location=device)
model.load_state_dict(state_dict)
model.to(device)
model.eval()
# Load tokenizer
with open(tokenizer_path) as f:
vocab = json.load(f)
id_to_word = {v: k for k, v in vocab.items()}
print(f"Model loaded! Parameters: {model.get_num_params():,}")
return model, vocab, id_to_word, config
def generate(
model,
vocab,
id_to_word,
prompt: str,
max_tokens: int = 50,
temperature: float = 1.0,
device: str = "cpu",
) -> str:
"""Generate text from prompt"""
model.eval()
words = prompt.lower().split()
prompt_ids = [vocab.get(w, vocab.get("<unk>", 1)) for w in words]
for _ in range(max_tokens):
input_seq = prompt_ids[-model.config.context_length:]
if len(input_seq) < model.config.context_length:
input_seq = [vocab["<pad>"]] * (model.config.context_length - len(input_seq)) + input_seq
with torch.no_grad():
inp = torch.tensor([input_seq], dtype=torch.long).to(device)
logits = model(inp)["logits"]
next_logits = logits[0, -1, :] / temperature
probs = torch.softmax(next_logits, dim=-1)
next_token = torch.multinomial(probs, 1).item()
prompt_ids.append(next_token)
if next_token == vocab.get("<eos>", 3):
break
return " ".join(id_to_word.get(t, "<unk>") for t in prompt_ids)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Load Codsworth from HF")
parser.add_argument("--repo", default="Jaqshanahan/codsworth-3.8m")
parser.add_argument("--prompt", default="hello world")
parser.add_argument("--tokens", type=int, default=30)
parser.add_argument("--cuda", action="store_true")
args = parser.parse_args()
device = "cuda" if args.cuda and torch.cuda.is_available() else "cpu"
model, vocab, id_to_word, _ = load_from_huggingface(args.repo, device)
if model:
print(f"\nGenerating from: '{args.prompt}'")
result = generate(model, vocab, id_to_word, args.prompt, args.tokens, device)
print(f"\n{result}") |