AGofficial
/

AgGPT13nano

Model card Files Files and versions

AgGPT13nano / model.py

AGofficial's picture

Upload 12 files

8d8418d verified 5 months ago

history blame contribute delete

1.84 kB

	import torch
	from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
	from safetensors.torch import load_file
	import json

	class AgGPT:
	def __init__(self, model_path="aggpt13/"):
	self.tokenizer = AutoTokenizer.from_pretrained(model_path)

	quant_path = model_path + "model-int8.safetensors"
	quant_params_path = model_path + "model-int8-quant_params.json"

	quant_tensors = load_file(quant_path)
	with open(quant_params_path, "r") as f:
	quant_params = json.load(f)

	# Dequantize tensors
	state_dict = {}
	for k, q_tensor in quant_tensors.items():
	if k in quant_params:
	scale = quant_params[k]["scale"]
	zero_point = quant_params[k]["zero_point"]
	dequant = (q_tensor.to(torch.float32) - zero_point) * scale
	state_dict[k] = dequant
	else:
	state_dict[k] = q_tensor

	# Load config and model architecture only (without weights)
	config = AutoConfig.from_pretrained(model_path)
	self.model = AutoModelForCausalLM.from_config(config)

	# Load dequantized state dict manually
	missing, unexpected = self.model.load_state_dict(state_dict, strict=False)
	print(f"Missing keys: {missing}")
	print(f"Unexpected keys: {unexpected}")

	self.model.to("cuda" if torch.cuda.is_available() else "cpu")
	self.model.eval()

	def ask(self, prompt):
	inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
	outputs = self.model.generate(**inputs, max_new_tokens=50)
	return self.tokenizer.decode(outputs[0], skip_special_tokens=True)


	if __name__ == "__main__":
	agent = AgGPT()
	response = agent.ask("hey, who are you?")
	print(response)