AgGPT13nano / model.py
AGofficial's picture
Upload 12 files
8d8418d verified
import torch
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from safetensors.torch import load_file
import json
class AgGPT:
def __init__(self, model_path="aggpt13/"):
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
quant_path = model_path + "model-int8.safetensors"
quant_params_path = model_path + "model-int8-quant_params.json"
quant_tensors = load_file(quant_path)
with open(quant_params_path, "r") as f:
quant_params = json.load(f)
# Dequantize tensors
state_dict = {}
for k, q_tensor in quant_tensors.items():
if k in quant_params:
scale = quant_params[k]["scale"]
zero_point = quant_params[k]["zero_point"]
dequant = (q_tensor.to(torch.float32) - zero_point) * scale
state_dict[k] = dequant
else:
state_dict[k] = q_tensor
# Load config and model architecture only (without weights)
config = AutoConfig.from_pretrained(model_path)
self.model = AutoModelForCausalLM.from_config(config)
# Load dequantized state dict manually
missing, unexpected = self.model.load_state_dict(state_dict, strict=False)
print(f"Missing keys: {missing}")
print(f"Unexpected keys: {unexpected}")
self.model.to("cuda" if torch.cuda.is_available() else "cpu")
self.model.eval()
def ask(self, prompt):
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
outputs = self.model.generate(**inputs, max_new_tokens=50)
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
if __name__ == "__main__":
agent = AgGPT()
response = agent.ask("hey, who are you?")
print(response)