AxionLab-official
/

MiniText-v1.0-base

Text Generation

Model card Files Files and versions

MiniText-v1.0-base / infer.py

Arthur Samuel Galego Panucci FIgueiredo

Upload 5 files

c0741ab verified about 1 month ago

history blame contribute delete

2.3 kB

	import torch
	import torch.nn.functional as F
	from model import MiniText
	import random

	# -----------------------
	# config
	# -----------------------
	MODEL_PATH = "minitext.pt"
	DEVICE = "cpu"

	# -----------------------
	# load model
	# -----------------------
	model = MiniText().to(DEVICE)
	model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
	model.eval()

	# -----------------------
	# sampling utils
	# -----------------------
	def sample_logits(logits, temperature=1.0, top_k=0):
	logits = logits / temperature

	if top_k > 0:
	values, _ = torch.topk(logits, top_k)
	min_val = values[:, -1].unsqueeze(-1)
	logits = torch.where(logits < min_val, torch.full_like(logits, -1e9), logits)

	probs = F.softmax(logits, dim=-1)
	return torch.multinomial(probs, 1).item()

	# -----------------------
	# text generation
	# -----------------------
	def generate(
	prompt="o",
	max_new_tokens=300,
	temperature=0.5,
	top_k=50,
	top_p=0.95,
	repetition_penalty=1.2,
	seed=None,
	h=None
	):
	if seed is not None:
	torch.manual_seed(seed)
	random.seed(seed)

	bytes_in = list(prompt.encode("utf-8", errors="ignore"))
	output = bytes_in.copy()

	# feed prompt
	x = torch.tensor([bytes_in], dtype=torch.long, device=DEVICE)
	with torch.no_grad():
	_, h = model(x, h)

	last = x[:, -1:]

	for _ in range(max_new_tokens):
	with torch.no_grad():
	logits, h = model(last, h)

	next_byte = sample_logits(
	logits[:, -1],
	temperature=temperature,
	top_k=top_k
	)

	output.append(next_byte)
	last = torch.tensor([[next_byte]], device=DEVICE)

	return bytes(output).decode(errors="ignore"), h

	h = None

	print("MiniText-v1.5 Chat \| digite 'exit' para sair")

	while True:
	user = input("usuario: ")
	if user.lower() == "quit":
	break

	prompt = f"usuario: {user}\nia: "
	text, h = generate(
	prompt=prompt,
	max_new_tokens=120,
	temperature=0.5,
	top_k=50,
	top_p=0.95,
	repetition_penalty=1.2,
	h=h
	)

	reply = text.split("ia:")[-1].strip()
	print("ia:", reply)