# mini_gpt.py import torch import torch.nn as nn import torch.nn.functional as F import random # ---------------------- # 1️⃣ Tokenizer local simple # ---------------------- class SimpleTokenizer: def __init__(self, texts): chars = sorted(list(set("".join(texts)))) self.stoi = {ch:i for i,ch in enumerate(chars)} self.itos = {i:ch for i,ch in enumerate(chars)} self.vocab_size = len(chars) def encode(self, text): return [self.stoi[c] for c in text] def decode(self, ids): return "".join([self.itos[i] for i in ids]) # ---------------------- # 2️⃣ MiniGPT Transformer # ---------------------- class MiniGPT(nn.Module): def __init__(self, vocab_size, n_embd=64, n_layer=4, n_head=4, block_size=64): super().__init__() self.token_emb = nn.Embedding(vocab_size, n_embd) self.pos_emb = nn.Embedding(block_size, n_embd) self.blocks = nn.ModuleList([ nn.TransformerEncoderLayer(d_model=n_embd, nhead=n_head) for _ in range(n_layer) ]) self.ln_f = nn.LayerNorm(n_embd) self.head = nn.Linear(n_embd, vocab_size) self.block_size = block_size def forward(self, idx): B, T = idx.shape token_embeddings = self.token_emb(idx) # (B, T, n_embd) positions = torch.arange(T, device=idx.device) pos_embeddings = self.pos_emb(positions) # (T, n_embd) x = token_embeddings + pos_embeddings # Transformer expects (T, B, E) x = x.transpose(0, 1) for block in self.blocks: x = block(x) x = x.transpose(0,1) x = self.ln_f(x) logits = self.head(x) return logits # ---------------------- # 3️⃣ Exemple de dataset # ---------------------- texts = [ "Bonjour je suis un mini agent IA. ", "L'espace est immense et mystérieux. ", "Les étoiles brillent dans le ciel nocturne. ", "Le futur de l'IA est fascinant. " ] tokenizer = SimpleTokenizer(texts) data = [tokenizer.encode(t) for t in texts] data = torch.tensor([t + [0]*(64-len(t)) for t in data]) # padding simple # ---------------------- # 4️⃣ Entraînement simple # ---------------------- device = "cuda" if torch.cuda.is_available() else "cpu" model = MiniGPT(vocab_size=tokenizer.vocab_size).to(device) optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3) loss_fn = nn.CrossEntropyLoss() for epoch in range(200): idx = data.to(device) logits = model(idx) # On décale pour prédire le prochain caractère loss = loss_fn(logits[:,:-1,:].reshape(-1, tokenizer.vocab_size), idx[:,1:].reshape(-1)) optimizer.zero_grad() loss.backward() optimizer.step() if epoch % 20 == 0: print(f"Epoch {epoch} - Loss: {loss.item():.4f}") # ---------------------- # 5️⃣ Génération de texte # ---------------------- def generate(model, tokenizer, start="L", length=100): model.eval() idx = torch.tensor([tokenizer.encode(start)], device=device) for _ in range(length): logits = model(idx) logits = logits[:,-1,:] probs = F.softmax(logits, dim=-1) next_id = torch.multinomial(probs, num_samples=1) idx = torch.cat([idx, next_id], dim=1) return tokenizer.decode(idx[0].tolist()) print("Texte généré :") print(generate(model, tokenizer, start="L"))