agent-ai / model.py
Mauricio-100's picture
Create model.py
646f4e9 verified
# mini_gpt.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
# ----------------------
# 1️⃣ Tokenizer local simple
# ----------------------
class SimpleTokenizer:
def __init__(self, texts):
chars = sorted(list(set("".join(texts))))
self.stoi = {ch:i for i,ch in enumerate(chars)}
self.itos = {i:ch for i,ch in enumerate(chars)}
self.vocab_size = len(chars)
def encode(self, text):
return [self.stoi[c] for c in text]
def decode(self, ids):
return "".join([self.itos[i] for i in ids])
# ----------------------
# 2️⃣ MiniGPT Transformer
# ----------------------
class MiniGPT(nn.Module):
def __init__(self, vocab_size, n_embd=64, n_layer=4, n_head=4, block_size=64):
super().__init__()
self.token_emb = nn.Embedding(vocab_size, n_embd)
self.pos_emb = nn.Embedding(block_size, n_embd)
self.blocks = nn.ModuleList([
nn.TransformerEncoderLayer(d_model=n_embd, nhead=n_head)
for _ in range(n_layer)
])
self.ln_f = nn.LayerNorm(n_embd)
self.head = nn.Linear(n_embd, vocab_size)
self.block_size = block_size
def forward(self, idx):
B, T = idx.shape
token_embeddings = self.token_emb(idx) # (B, T, n_embd)
positions = torch.arange(T, device=idx.device)
pos_embeddings = self.pos_emb(positions) # (T, n_embd)
x = token_embeddings + pos_embeddings
# Transformer expects (T, B, E)
x = x.transpose(0, 1)
for block in self.blocks:
x = block(x)
x = x.transpose(0,1)
x = self.ln_f(x)
logits = self.head(x)
return logits
# ----------------------
# 3️⃣ Exemple de dataset
# ----------------------
texts = [
"Bonjour je suis un mini agent IA. ",
"L'espace est immense et mystérieux. ",
"Les étoiles brillent dans le ciel nocturne. ",
"Le futur de l'IA est fascinant. "
]
tokenizer = SimpleTokenizer(texts)
data = [tokenizer.encode(t) for t in texts]
data = torch.tensor([t + [0]*(64-len(t)) for t in data]) # padding simple
# ----------------------
# 4️⃣ Entraînement simple
# ----------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
model = MiniGPT(vocab_size=tokenizer.vocab_size).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()
for epoch in range(200):
idx = data.to(device)
logits = model(idx)
# On décale pour prédire le prochain caractère
loss = loss_fn(logits[:,:-1,:].reshape(-1, tokenizer.vocab_size),
idx[:,1:].reshape(-1))
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 20 == 0:
print(f"Epoch {epoch} - Loss: {loss.item():.4f}")
# ----------------------
# 5️⃣ Génération de texte
# ----------------------
def generate(model, tokenizer, start="L", length=100):
model.eval()
idx = torch.tensor([tokenizer.encode(start)], device=device)
for _ in range(length):
logits = model(idx)
logits = logits[:,-1,:]
probs = F.softmax(logits, dim=-1)
next_id = torch.multinomial(probs, num_samples=1)
idx = torch.cat([idx, next_id], dim=1)
return tokenizer.decode(idx[0].tolist())
print("Texte généré :")
print(generate(model, tokenizer, start="L"))