codsworth-3.8m / codsworth /scripts /load_hf_direct.py
Jaqshanahan's picture
Initial upload of Codsworth model
b84d85a verified
"""
Load Codsworth from HuggingFace Hub - Direct Download Method
Works without needing custom HuggingFace model class
"""
import torch
import json
import sys
import os
# Add codsworth to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from codsworth.config import CodsworthConfig
from codsworth.model import CodsworthTransformer
def load_from_huggingface(
repo_id: str = "Jaqshanahan/codsworth-3.8m",
device: str = "cpu",
):
"""
Load Codsworth directly from HuggingFace Hub using hf_hub_download.
No transformers library needed for the model itself.
"""
try:
from huggingface_hub import hf_hub_download
except ImportError:
print("Install: pip install huggingface_hub")
return None, None, None, None
print(f"Downloading from https://huggingface.co/{repo_id}")
# Download files
config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin")
tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
print("Files downloaded!")
# Load config
with open(config_path) as f:
cfg = json.load(f)
model_cfg = cfg["model"]
# Create Codsworth config
config = CodsworthConfig(
vocab_size=model_cfg["vocab_size"],
context_length=model_cfg["context_length"],
embedding_dim=model_cfg["embedding_dim"],
num_layers=model_cfg["num_layers"],
num_heads=model_cfg["num_heads"],
head_dim=model_cfg["head_dim"],
ffn_hidden_dim=model_cfg["ffn_hidden_dim"],
use_rope=model_cfg["use_rope"],
rope_theta=model_cfg["rope_theta"],
use_flash_attention=False,
use_gradient_checkpointing=False,
dropout=model_cfg["dropout"],
)
# Create model
model = CodsworthTransformer(config)
# Load weights
state_dict = torch.load(model_path, map_location=device)
model.load_state_dict(state_dict)
model.to(device)
model.eval()
# Load tokenizer
with open(tokenizer_path) as f:
vocab = json.load(f)
id_to_word = {v: k for k, v in vocab.items()}
print(f"Model loaded! Parameters: {model.get_num_params():,}")
return model, vocab, id_to_word, config
def generate(
model,
vocab,
id_to_word,
prompt: str,
max_tokens: int = 50,
temperature: float = 1.0,
device: str = "cpu",
) -> str:
"""Generate text from prompt"""
model.eval()
words = prompt.lower().split()
prompt_ids = [vocab.get(w, vocab.get("<unk>", 1)) for w in words]
for _ in range(max_tokens):
input_seq = prompt_ids[-model.config.context_length:]
if len(input_seq) < model.config.context_length:
input_seq = [vocab["<pad>"]] * (model.config.context_length - len(input_seq)) + input_seq
with torch.no_grad():
inp = torch.tensor([input_seq], dtype=torch.long).to(device)
logits = model(inp)["logits"]
next_logits = logits[0, -1, :] / temperature
probs = torch.softmax(next_logits, dim=-1)
next_token = torch.multinomial(probs, 1).item()
prompt_ids.append(next_token)
if next_token == vocab.get("<eos>", 3):
break
return " ".join(id_to_word.get(t, "<unk>") for t in prompt_ids)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Load Codsworth from HF")
parser.add_argument("--repo", default="Jaqshanahan/codsworth-3.8m")
parser.add_argument("--prompt", default="hello world")
parser.add_argument("--tokens", type=int, default=30)
parser.add_argument("--cuda", action="store_true")
args = parser.parse_args()
device = "cuda" if args.cuda and torch.cuda.is_available() else "cpu"
model, vocab, id_to_word, _ = load_from_huggingface(args.repo, device)
if model:
print(f"\nGenerating from: '{args.prompt}'")
result = generate(model, vocab, id_to_word, args.prompt, args.tokens, device)
print(f"\n{result}")