JonusNattapong commited on
Commit
c139f1d
·
verified ·
1 Parent(s): b35f3f5

Upload trained model

Browse files
Files changed (1) hide show
  1. hanuman_loader.py +81 -0
hanuman_loader.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Custom Hanuman Model Loader for Hugging Face
3
+
4
+ This script allows you to load and use the custom Hanuman model from Hugging Face.
5
+ Make sure to include 'modeling.py' in your repository for this to work.
6
+
7
+ Usage:
8
+ from hanuman_loader import HanumanModel
9
+ model = HanumanModel.from_pretrained("your-username/your-model-repo")
10
+ tokenizer = AutoTokenizer.from_pretrained("your-username/your-model-repo")
11
+
12
+ prompt = "สวัสดี"
13
+ inputs = tokenizer(prompt, return_tensors="pt")
14
+ outputs = model.generate(inputs["input_ids"], max_new_tokens=50)
15
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
16
+ """
17
+
18
+ import torch
19
+ from transformers import AutoTokenizer
20
+ from modeling import Hanuman
21
+ import json
22
+ import os
23
+
24
+
25
+ class HanumanModel:
26
+ """Custom loader for Hanuman model compatible with Hugging Face Hub."""
27
+
28
+ @classmethod
29
+ def from_pretrained(cls, repo_id, map_location="cpu"):
30
+ """
31
+ Load Hanuman model from Hugging Face repository.
32
+
33
+ Args:
34
+ repo_id (str): Hugging Face repository ID (e.g., "username/model-name")
35
+ map_location (str): Device to load model on
36
+
37
+ Returns:
38
+ HanumanModel: Loaded model instance
39
+ """
40
+ # Download tokenizer
41
+ tokenizer = AutoTokenizer.from_pretrained(repo_id)
42
+
43
+ # Download config
44
+ config_path = os.path.join(repo_id, "config.json") # This will be downloaded by HF
45
+ with open(config_path, "r", encoding="utf-8") as f:
46
+ cfg = json.load(f)
47
+
48
+ # Instantiate model
49
+ model = Hanuman(
50
+ vocab_size=cfg["vocab_size"],
51
+ n_positions=cfg["n_positions"],
52
+ n_embd=cfg["n_embd"],
53
+ n_layer=cfg["n_layer"],
54
+ n_head=cfg["n_head"],
55
+ use_think_head=True # Assuming your model uses this
56
+ )
57
+
58
+ # Load weights
59
+ model_path = os.path.join(repo_id, "pytorch_model.bin")
60
+ state = torch.load(model_path, map_location=map_location)
61
+ model.load_state_dict(state)
62
+
63
+ return cls(model, tokenizer)
64
+
65
+ def __init__(self, model, tokenizer):
66
+ self.model = model
67
+ self.tokenizer = tokenizer
68
+
69
+ def generate(self, input_ids, max_new_tokens=50, temperature=1.0, top_k=50, top_p=0.95):
70
+ """Generate text using the model."""
71
+ return self.model.generate(
72
+ input_ids,
73
+ max_new_tokens=max_new_tokens,
74
+ temperature=temperature,
75
+ top_k=top_k,
76
+ top_p=top_p
77
+ )
78
+
79
+ def __call__(self, input_ids, **kwargs):
80
+ """Forward pass through the model."""
81
+ return self.model(input_ids, **kwargs)