|
|
"""hanuman_pkg
|
|
|
|
|
|
Helper to load the custom Hanuman model directly from a Hugging Face repo.
|
|
|
|
|
|
Usage:
|
|
|
from hanuman_pkg import from_pretrained
|
|
|
model, tokenizer = from_pretrained("ZombitX64/GPT4All-Model")
|
|
|
|
|
|
This will download `modeling.py`, `config.json` and `pytorch_model.bin` (if present)
|
|
|
from the repo and dynamically import the Hanuman class.
|
|
|
"""
|
|
|
from __future__ import annotations
|
|
|
|
|
|
import importlib.util
|
|
|
import json
|
|
|
import os
|
|
|
import tempfile
|
|
|
from typing import Tuple
|
|
|
|
|
|
import torch
|
|
|
from huggingface_hub import hf_hub_download
|
|
|
from transformers import AutoTokenizer
|
|
|
|
|
|
|
|
|
def _download_file(repo_id: str, filename: str) -> str:
|
|
|
"""Try to download `filename` from repo_id. Return local path or raise."""
|
|
|
try:
|
|
|
return hf_hub_download(repo_id, filename)
|
|
|
except Exception:
|
|
|
|
|
|
try:
|
|
|
return hf_hub_download(repo_id, os.path.join("out_run1", "epoch-3", filename))
|
|
|
except Exception as e:
|
|
|
raise RuntimeError(f"Failed to download {filename} from repo {repo_id}: {e}")
|
|
|
|
|
|
|
|
|
def _load_module_from_path(path: str, module_name: str):
|
|
|
spec = importlib.util.spec_from_file_location(module_name, path)
|
|
|
mod = importlib.util.module_from_spec(spec)
|
|
|
loader = spec.loader
|
|
|
assert loader is not None
|
|
|
loader.exec_module(mod)
|
|
|
return mod
|
|
|
|
|
|
|
|
|
def from_pretrained(repo_id: str, map_location: str = "cpu") -> Tuple[torch.nn.Module, object]:
|
|
|
"""Download model artifacts from HF and return (model, tokenizer).
|
|
|
|
|
|
Args:
|
|
|
repo_id: Hugging Face repo id, e.g. "username/model-repo"
|
|
|
map_location: device string for torch.load
|
|
|
|
|
|
Returns:
|
|
|
model: Hanuman model instance (on CPU unless moved)
|
|
|
tokenizer: transformers tokenizer loaded from the repo
|
|
|
"""
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
|
|
|
|
|
|
|
|
cfg_path = _download_file(repo_id, "config.json")
|
|
|
with open(cfg_path, "r", encoding="utf-8") as f:
|
|
|
cfg = json.load(f)
|
|
|
|
|
|
|
|
|
modeling_path = _download_file(repo_id, "modeling.py")
|
|
|
modeling_mod = _load_module_from_path(modeling_path, "hanuman_modeling")
|
|
|
|
|
|
if not hasattr(modeling_mod, "Hanuman"):
|
|
|
raise RuntimeError("Downloaded modeling.py does not define Hanuman class")
|
|
|
|
|
|
Hanuman = modeling_mod.Hanuman
|
|
|
|
|
|
|
|
|
model = Hanuman(
|
|
|
vocab_size=cfg.get("vocab_size", 32000),
|
|
|
n_positions=cfg.get("n_positions", cfg.get("n_ctx", 4096)),
|
|
|
n_embd=cfg.get("n_embd", 512),
|
|
|
n_layer=cfg.get("n_layer", 8),
|
|
|
n_head=cfg.get("n_head", 8),
|
|
|
use_think_head=cfg.get("use_think_head", True),
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
state_path = None
|
|
|
try:
|
|
|
state_path = _download_file(repo_id, "pytorch_model.safetensors")
|
|
|
except Exception:
|
|
|
try:
|
|
|
state_path = _download_file(repo_id, "pytorch_model.bin")
|
|
|
except Exception as e:
|
|
|
raise RuntimeError(f"Failed to download model weights: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
state = torch.load(state_path, map_location=map_location)
|
|
|
model.load_state_dict(state)
|
|
|
|
|
|
return model, tokenizer
|
|
|
|