Upload 5 files

Browse files

Files changed (5) hide show

checkpoint.pt +3 -0
infer.py +97 -0
minitext.pt +3 -0
model.py +15 -0
train.py +79 -0

checkpoint.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22eb08b8bfa508f28e0d5d4e531a4c4cff7207375afe34aeab7d7787a92e198e
+size 129845

infer.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import torch
+import torch.nn.functional as F
+from model import MiniText
+import random
+# -----------------------
+# config
+# -----------------------
+MODEL_PATH = "minitext.pt"
+DEVICE = "cpu"
+# -----------------------
+# load model
+# -----------------------
+model = MiniText().to(DEVICE)
+model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
+model.eval()
+# -----------------------
+# sampling utils
+# -----------------------
+def sample_logits(logits, temperature=1.0, top_k=0):
+    logits = logits / temperature
+    if top_k > 0:
+        values, _ = torch.topk(logits, top_k)
+        min_val = values[:, -1].unsqueeze(-1)
+        logits = torch.where(logits < min_val, torch.full_like(logits, -1e9), logits)
+    probs = F.softmax(logits, dim=-1)
+    return torch.multinomial(probs, 1).item()
+# -----------------------
+# text generation
+# -----------------------
+def generate(
+    prompt="o",
+    max_new_tokens=300,
+    temperature=0.5,
+    top_k=50,
+    top_p=0.95,
+    repetition_penalty=1.2,
+    seed=None,
+    h=None
+):
+    if seed is not None:
+        torch.manual_seed(seed)
+        random.seed(seed)
+    bytes_in = list(prompt.encode("utf-8", errors="ignore"))
+    output = bytes_in.copy()
+    # feed prompt
+    x = torch.tensor([bytes_in], dtype=torch.long, device=DEVICE)
+    with torch.no_grad():
+        _, h = model(x, h)
+    last = x[:, -1:]
+    for _ in range(max_new_tokens):
+        with torch.no_grad():
+            logits, h = model(last, h)
+        next_byte = sample_logits(
+            logits[:, -1],
+            temperature=temperature,
+            top_k=top_k
+        )
+        output.append(next_byte)
+        last = torch.tensor([[next_byte]], device=DEVICE)
+    return bytes(output).decode(errors="ignore"), h
+h = None
+print("MiniText-v1.5 Chat | digite 'exit' para sair")
+while True:
+    user = input("usuario: ")
+    if user.lower() == "quit":
+        break
+    prompt = f"usuario: {user}\nia: "
+    text, h = generate(
+        prompt=prompt,
+        max_new_tokens=120,
+        temperature=0.5,
+        top_k=50,
+        top_p=0.95,
+        repetition_penalty=1.2,
+        h=h
+    )
+    reply = text.split("ia:")[-1].strip()
+    print("ia:", reply)

minitext.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23e5a67e5738c67a9dbf49dc1e26bf43d0ff224330863415561fff082c42c41a
+size 43614

model.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import torch
+import torch.nn as nn
+class MiniText(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.embed = nn.Embedding(256, 16)
+        self.gru = nn.GRU(16, 16, batch_first=True)
+        self.fc = nn.Linear(16, 256)
+    def forward(self, x, h=None):
+        x = self.embed(x)
+        out, h = self.gru(x, h)
+        logits = self.fc(out)
+        return logits, h

train.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import torch
+import torch.nn as nn
+import os
+from model import MiniText
+# -----------------------
+# hiperparâmetros
+# -----------------------
+SEQ_LEN = 64
+EPOCHS = 12000
+LR = 1e-4
+SAVE_EVERY = 2000  # salva checkpoint a cada X epochs
+CHECKPOINT_PATH = "checkpoint.pt"
+# -----------------------
+# dataset
+# -----------------------
+with open("dataset.txt", "rb") as f:
+    data = torch.tensor(list(f.read()), dtype=torch.long)
+# -----------------------
+# model + optimizer
+# -----------------------
+model = MiniText()
+optimizer = torch.optim.Adam(model.parameters(), lr=LR)
+loss_fn = nn.CrossEntropyLoss()
+start_epoch = 0
+# -----------------------
+# load checkpoint (se existir)
+# -----------------------
+if os.path.exists(CHECKPOINT_PATH):
+    print("Checkpoint encontrado, retomando treino...")
+    checkpoint = torch.load(CHECKPOINT_PATH)
+    model.load_state_dict(checkpoint["model"])
+    optimizer.load_state_dict(checkpoint["optimizer"])
+    start_epoch = checkpoint["epoch"] + 1
+else:
+    print("Nenhum checkpoint encontrado, treino do zero.")
+# -----------------------
+# batch sampler
+# -----------------------
+def get_batch():
+    idx = torch.randint(0, len(data) - SEQ_LEN - 1, (1,))
+    x = data[idx:idx + SEQ_LEN].unsqueeze(0)
+    y = data[idx + 1:idx + SEQ_LEN + 1].unsqueeze(0)
+    return x, y
+# -----------------------
+# training loop
+# -----------------------
+for epoch in range(start_epoch, EPOCHS):
+    x, y = get_batch()
+    logits, _ = model(x)
+    loss = loss_fn(logits.view(-1, 256), y.view(-1))
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {loss.item():.4f}")
+    # salvar checkpoint
+    if (epoch + 1) % SAVE_EVERY == 0:
+        torch.save({
+            "epoch": epoch,
+            "model": model.state_dict(),
+            "optimizer": optimizer.state_dict()
+        }, CHECKPOINT_PATH)
+        print("Checkpoint salvo.")
+# -----------------------
+# salvar modelo final
+# -----------------------
+torch.save(model.state_dict(), "minitext.pt")
+print("Treino finalizado. Modelo salvo em minitext.pt")