Mauricio-100 commited on
Commit
646f4e9
·
verified ·
1 Parent(s): 376e1bc

Create model.py

Browse files
Files changed (1) hide show
  1. model.py +103 -0
model.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mini_gpt.py
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ import random
6
+
7
+ # ----------------------
8
+ # 1️⃣ Tokenizer local simple
9
+ # ----------------------
10
+ class SimpleTokenizer:
11
+ def __init__(self, texts):
12
+ chars = sorted(list(set("".join(texts))))
13
+ self.stoi = {ch:i for i,ch in enumerate(chars)}
14
+ self.itos = {i:ch for i,ch in enumerate(chars)}
15
+ self.vocab_size = len(chars)
16
+
17
+ def encode(self, text):
18
+ return [self.stoi[c] for c in text]
19
+
20
+ def decode(self, ids):
21
+ return "".join([self.itos[i] for i in ids])
22
+
23
+ # ----------------------
24
+ # 2️⃣ MiniGPT Transformer
25
+ # ----------------------
26
+ class MiniGPT(nn.Module):
27
+ def __init__(self, vocab_size, n_embd=64, n_layer=4, n_head=4, block_size=64):
28
+ super().__init__()
29
+ self.token_emb = nn.Embedding(vocab_size, n_embd)
30
+ self.pos_emb = nn.Embedding(block_size, n_embd)
31
+ self.blocks = nn.ModuleList([
32
+ nn.TransformerEncoderLayer(d_model=n_embd, nhead=n_head)
33
+ for _ in range(n_layer)
34
+ ])
35
+ self.ln_f = nn.LayerNorm(n_embd)
36
+ self.head = nn.Linear(n_embd, vocab_size)
37
+ self.block_size = block_size
38
+
39
+ def forward(self, idx):
40
+ B, T = idx.shape
41
+ token_embeddings = self.token_emb(idx) # (B, T, n_embd)
42
+ positions = torch.arange(T, device=idx.device)
43
+ pos_embeddings = self.pos_emb(positions) # (T, n_embd)
44
+ x = token_embeddings + pos_embeddings
45
+ # Transformer expects (T, B, E)
46
+ x = x.transpose(0, 1)
47
+ for block in self.blocks:
48
+ x = block(x)
49
+ x = x.transpose(0,1)
50
+ x = self.ln_f(x)
51
+ logits = self.head(x)
52
+ return logits
53
+
54
+ # ----------------------
55
+ # 3️⃣ Exemple de dataset
56
+ # ----------------------
57
+ texts = [
58
+ "Bonjour je suis un mini agent IA. ",
59
+ "L'espace est immense et mystérieux. ",
60
+ "Les étoiles brillent dans le ciel nocturne. ",
61
+ "Le futur de l'IA est fascinant. "
62
+ ]
63
+
64
+ tokenizer = SimpleTokenizer(texts)
65
+ data = [tokenizer.encode(t) for t in texts]
66
+ data = torch.tensor([t + [0]*(64-len(t)) for t in data]) # padding simple
67
+
68
+ # ----------------------
69
+ # 4️⃣ Entraînement simple
70
+ # ----------------------
71
+ device = "cuda" if torch.cuda.is_available() else "cpu"
72
+ model = MiniGPT(vocab_size=tokenizer.vocab_size).to(device)
73
+ optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
74
+ loss_fn = nn.CrossEntropyLoss()
75
+
76
+ for epoch in range(200):
77
+ idx = data.to(device)
78
+ logits = model(idx)
79
+ # On décale pour prédire le prochain caractère
80
+ loss = loss_fn(logits[:,:-1,:].reshape(-1, tokenizer.vocab_size),
81
+ idx[:,1:].reshape(-1))
82
+ optimizer.zero_grad()
83
+ loss.backward()
84
+ optimizer.step()
85
+ if epoch % 20 == 0:
86
+ print(f"Epoch {epoch} - Loss: {loss.item():.4f}")
87
+
88
+ # ----------------------
89
+ # 5️⃣ Génération de texte
90
+ # ----------------------
91
+ def generate(model, tokenizer, start="L", length=100):
92
+ model.eval()
93
+ idx = torch.tensor([tokenizer.encode(start)], device=device)
94
+ for _ in range(length):
95
+ logits = model(idx)
96
+ logits = logits[:,-1,:]
97
+ probs = F.softmax(logits, dim=-1)
98
+ next_id = torch.multinomial(probs, num_samples=1)
99
+ idx = torch.cat([idx, next_id], dim=1)
100
+ return tokenizer.decode(idx[0].tolist())
101
+
102
+ print("Texte généré :")
103
+ print(generate(model, tokenizer, start="L"))