GPT4All-Model / hanuman_loader.py
JonusNattapong's picture
Upload trained model
7f29093 verified
"""
Custom Hanuman Model Loader for Hugging Face
This script allows you to load and use the custom Hanuman model from Hugging Face.
Make sure to include 'modeling.py' in your repository for this to work.
Usage:
from hanuman_loader import HanumanModel
model = HanumanModel.from_pretrained("your-username/your-model-repo")
tokenizer = AutoTokenizer.from_pretrained("your-username/your-model-repo")
prompt = "สวัสดี"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(inputs["input_ids"], max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
"""
import torch
from transformers import AutoTokenizer
from modeling import Hanuman
import json
from huggingface_hub import hf_hub_download
class HanumanModel:
"""Custom loader for Hanuman model compatible with Hugging Face Hub."""
@classmethod
def from_pretrained(cls, repo_id, map_location="cpu"):
"""
Load Hanuman model from Hugging Face repository.
Args:
repo_id (str): Hugging Face repository ID (e.g., "username/model-name")
map_location (str): Device to load model on
Returns:
HanumanModel: Loaded model instance
"""
# Download tokenizer
tokenizer = AutoTokenizer.from_pretrained(repo_id)
# Download config file from HF Hub
config_path = hf_hub_download(repo_id, "config.json")
with open(config_path, "r", encoding="utf-8") as f:
cfg = json.load(f)
# Instantiate model
model = Hanuman(
vocab_size=cfg["vocab_size"],
n_positions=cfg["n_positions"],
n_embd=cfg["n_embd"],
n_layer=cfg["n_layer"],
n_head=cfg["n_head"],
use_think_head=True # Assuming your model uses this
)
# Download and load model weights
model_path = hf_hub_download(repo_id, "pytorch_model.bin")
state = torch.load(model_path, map_location=map_location)
model.load_state_dict(state)
return cls(model, tokenizer)
def __init__(self, model, tokenizer):
self.model = model
self.tokenizer = tokenizer
def generate(self, input_ids, max_new_tokens=50, temperature=1.0, top_k=50, top_p=0.95):
"""Generate text using the model."""
return self.model.generate(
input_ids,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_k=top_k,
top_p=top_p
)
def __call__(self, input_ids, **kwargs):
"""Forward pass through the model."""
return self.model(input_ids, **kwargs)