| """ |
| Load Codsworth from HuggingFace Hub - Direct Download Method |
| Works without needing custom HuggingFace model class |
| """ |
|
|
| import torch |
| import json |
| import sys |
| import os |
|
|
| |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
| from codsworth.config import CodsworthConfig |
| from codsworth.model import CodsworthTransformer |
|
|
|
|
| def load_from_huggingface( |
| repo_id: str = "Jaqshanahan/codsworth-3.8m", |
| device: str = "cpu", |
| ): |
| """ |
| Load Codsworth directly from HuggingFace Hub using hf_hub_download. |
| No transformers library needed for the model itself. |
| """ |
| try: |
| from huggingface_hub import hf_hub_download |
| except ImportError: |
| print("Install: pip install huggingface_hub") |
| return None, None, None, None |
| |
| print(f"Downloading from https://huggingface.co/{repo_id}") |
| |
| |
| config_path = hf_hub_download(repo_id=repo_id, filename="config.json") |
| model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin") |
| tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.json") |
| |
| print("Files downloaded!") |
| |
| |
| with open(config_path) as f: |
| cfg = json.load(f) |
| |
| model_cfg = cfg["model"] |
| |
| |
| config = CodsworthConfig( |
| vocab_size=model_cfg["vocab_size"], |
| context_length=model_cfg["context_length"], |
| embedding_dim=model_cfg["embedding_dim"], |
| num_layers=model_cfg["num_layers"], |
| num_heads=model_cfg["num_heads"], |
| head_dim=model_cfg["head_dim"], |
| ffn_hidden_dim=model_cfg["ffn_hidden_dim"], |
| use_rope=model_cfg["use_rope"], |
| rope_theta=model_cfg["rope_theta"], |
| use_flash_attention=False, |
| use_gradient_checkpointing=False, |
| dropout=model_cfg["dropout"], |
| ) |
| |
| |
| model = CodsworthTransformer(config) |
| |
| |
| state_dict = torch.load(model_path, map_location=device) |
| model.load_state_dict(state_dict) |
| model.to(device) |
| model.eval() |
| |
| |
| with open(tokenizer_path) as f: |
| vocab = json.load(f) |
| id_to_word = {v: k for k, v in vocab.items()} |
| |
| print(f"Model loaded! Parameters: {model.get_num_params():,}") |
| |
| return model, vocab, id_to_word, config |
|
|
|
|
| def generate( |
| model, |
| vocab, |
| id_to_word, |
| prompt: str, |
| max_tokens: int = 50, |
| temperature: float = 1.0, |
| device: str = "cpu", |
| ) -> str: |
| """Generate text from prompt""" |
| |
| model.eval() |
| words = prompt.lower().split() |
| prompt_ids = [vocab.get(w, vocab.get("<unk>", 1)) for w in words] |
| |
| for _ in range(max_tokens): |
| input_seq = prompt_ids[-model.config.context_length:] |
| if len(input_seq) < model.config.context_length: |
| input_seq = [vocab["<pad>"]] * (model.config.context_length - len(input_seq)) + input_seq |
| |
| with torch.no_grad(): |
| inp = torch.tensor([input_seq], dtype=torch.long).to(device) |
| logits = model(inp)["logits"] |
| next_logits = logits[0, -1, :] / temperature |
| probs = torch.softmax(next_logits, dim=-1) |
| next_token = torch.multinomial(probs, 1).item() |
| |
| prompt_ids.append(next_token) |
| |
| if next_token == vocab.get("<eos>", 3): |
| break |
| |
| return " ".join(id_to_word.get(t, "<unk>") for t in prompt_ids) |
|
|
|
|
| if __name__ == "__main__": |
| import argparse |
| |
| parser = argparse.ArgumentParser(description="Load Codsworth from HF") |
| parser.add_argument("--repo", default="Jaqshanahan/codsworth-3.8m") |
| parser.add_argument("--prompt", default="hello world") |
| parser.add_argument("--tokens", type=int, default=30) |
| parser.add_argument("--cuda", action="store_true") |
| args = parser.parse_args() |
| |
| device = "cuda" if args.cuda and torch.cuda.is_available() else "cpu" |
| |
| model, vocab, id_to_word, _ = load_from_huggingface(args.repo, device) |
| |
| if model: |
| print(f"\nGenerating from: '{args.prompt}'") |
| result = generate(model, vocab, id_to_word, args.prompt, args.tokens, device) |
| print(f"\n{result}") |