|
|
import torch |
|
|
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer |
|
|
from safetensors.torch import load_file |
|
|
import json |
|
|
|
|
|
class AgGPT: |
|
|
def __init__(self, model_path="aggpt13/"): |
|
|
self.tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
|
|
|
|
quant_path = model_path + "model-int8.safetensors" |
|
|
quant_params_path = model_path + "model-int8-quant_params.json" |
|
|
|
|
|
quant_tensors = load_file(quant_path) |
|
|
with open(quant_params_path, "r") as f: |
|
|
quant_params = json.load(f) |
|
|
|
|
|
|
|
|
state_dict = {} |
|
|
for k, q_tensor in quant_tensors.items(): |
|
|
if k in quant_params: |
|
|
scale = quant_params[k]["scale"] |
|
|
zero_point = quant_params[k]["zero_point"] |
|
|
dequant = (q_tensor.to(torch.float32) - zero_point) * scale |
|
|
state_dict[k] = dequant |
|
|
else: |
|
|
state_dict[k] = q_tensor |
|
|
|
|
|
|
|
|
config = AutoConfig.from_pretrained(model_path) |
|
|
self.model = AutoModelForCausalLM.from_config(config) |
|
|
|
|
|
|
|
|
missing, unexpected = self.model.load_state_dict(state_dict, strict=False) |
|
|
print(f"Missing keys: {missing}") |
|
|
print(f"Unexpected keys: {unexpected}") |
|
|
|
|
|
self.model.to("cuda" if torch.cuda.is_available() else "cpu") |
|
|
self.model.eval() |
|
|
|
|
|
def ask(self, prompt): |
|
|
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) |
|
|
outputs = self.model.generate(**inputs, max_new_tokens=50) |
|
|
return self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
agent = AgGPT() |
|
|
response = agent.ask("hey, who are you?") |
|
|
print(response) |
|
|
|