Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import torch.nn as nn | |
| from torch.nn import functional as F | |
| from safetensors.torch import load_file | |
| import json | |
| import os | |
| # --- 1. CHARGEMENT DE LA CONFIGURATION --- | |
| with open("config.json", "r", encoding="utf-8") as f: | |
| config = json.load(f) | |
| # Paramètres extraits du JSON | |
| n_embd = config["n_embd"] | |
| n_head = config["n_head"] | |
| n_layer = config["n_layer"] | |
| block_size = config["block_size"] | |
| vocab_size = config["vocab_size"] | |
| stoi = config["stoi"] | |
| itos = {int(k): v for k, v in config["itos"].items()} | |
| # --- 2. ARCHITECTURE DU MODÈLE (Version Alignée Simon Chusseau Edition) --- | |
| class SelfAttention(nn.Module): | |
| def __init__(self, n_embd, n_head): | |
| super().__init__() | |
| # Utilisation de qkv_proj pour correspondre aux poids entraînés | |
| self.qkv_proj = nn.Linear(n_embd, 3 * n_embd, bias=False) | |
| self.out_proj = nn.Linear(n_embd, n_embd, bias=False) | |
| self.n_head = n_head | |
| def forward(self, x): | |
| B, T, C = x.size() | |
| q, k, v = self.qkv_proj(x).split(n_embd, dim=2) | |
| q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) | |
| k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) | |
| v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) | |
| att = (q @ k.transpose(-2, -1)) * (1.0 / (C // self.n_head)**0.5) | |
| mask = torch.triu(torch.ones(T, T, device=x.device) * float('-inf'), 1) | |
| att = F.softmax(att + mask[:T, :T], dim=-1) | |
| y = att @ v | |
| y = y.transpose(1, 2).contiguous().view(B, T, C) | |
| return self.out_proj(y) | |
| class Block(nn.Module): | |
| def __init__(self, n_embd, n_head): | |
| super().__init__() | |
| self.sa = SelfAttention(n_embd, n_head) | |
| self.ffwd = nn.Sequential( | |
| nn.Linear(n_embd, 4 * n_embd), nn.GELU(), | |
| nn.Linear(4 * n_embd, n_embd), nn.Dropout(0.1) | |
| ) | |
| self.ln1, self.ln2 = nn.LayerNorm(n_embd), nn.LayerNorm(n_embd) | |
| def forward(self, x): | |
| x = x + self.sa(self.ln1(x)) | |
| x = x + self.ffwd(self.ln2(x)) | |
| return x | |
| class CygnisAlpha(nn.Module): | |
| def __init__(self): | |
| super().__init__() | |
| self.token_embedding = nn.Embedding(vocab_size, n_embd) | |
| self.position_embedding = nn.Embedding(block_size, n_embd) | |
| self.blocks = nn.Sequential(*[Block(n_embd, n_head) for _ in range(n_layer)]) | |
| self.ln_f = nn.LayerNorm(n_embd) | |
| self.lm_head = nn.Linear(n_embd, vocab_size) | |
| def forward(self, idx): | |
| B, T = idx.shape | |
| tok_emb = self.token_embedding(idx) | |
| pos_emb = self.position_embedding(torch.arange(T, device=idx.device)) | |
| x = self.blocks(tok_emb + pos_emb) | |
| logits = self.lm_head(self.ln_f(x)) | |
| return logits | |
| # --- 3. INITIALISATION --- | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = CygnisAlpha().to(device) | |
| # Nom du fichier final fusionné | |
| model_path = "alpha_cycle_8.safetensors" | |
| if os.path.exists(model_path): | |
| state_dict = load_file(model_path) | |
| model.load_state_dict(state_dict) | |
| model.eval() | |
| print(f"✅ Modèle chargé : {model_path}") | |
| else: | |
| print(f"⚠️ Erreur : {model_path} non trouvé dans le répertoire.") | |
| # --- 4. LOGIQUE DE GÉNÉRATION STABILISÉE --- | |
| def generate_response(message, history, temperature=0.4, max_tokens=150): | |
| # Encodage sécurisé | |
| input_ids = torch.tensor([stoi.get(c, stoi.get(' ', 0)) for c in message], dtype=torch.long, device=device).unsqueeze(0) | |
| generated = input_ids | |
| for _ in range(max_tokens): | |
| cond = generated[:, -block_size:] | |
| with torch.no_grad(): | |
| logits = model(cond) | |
| logits = logits[:, -1, :] / max(temperature, 0.01) | |
| # Filtre anti-répétition immédiate | |
| if generated.size(1) >= 2: | |
| if generated[0, -1] == generated[0, -2]: | |
| logits[0, generated[0, -1]] -= 15.0 | |
| # Pénalité de fréquence (évite les boucles infinies) | |
| response_so_far = generated[0, len(input_ids[0]):] | |
| if response_so_far.numel() > 0: | |
| char_counts = torch.bincount(response_so_far, minlength=vocab_size) | |
| logits[0] -= (char_counts * 0.8) | |
| probs = F.softmax(logits, dim=-1) | |
| next_token = torch.multinomial(probs, num_samples=1) | |
| generated = torch.cat((generated, next_token), dim=1) | |
| # Arrêt si point final après une longueur minimale | |
| char = itos.get(next_token.item(), '') | |
| if char == "." and len(generated[0]) > len(input_ids[0]) + 20: | |
| break | |
| # Décodage uniquement de la réponse | |
| full_text = "".join([itos.get(i.item(), '') for i in generated[0, len(input_ids[0]):]]) | |
| return full_text.strip() | |
| # --- 5. INTERFACE GRADIO --- | |
| demo = gr.ChatInterface( | |
| fn=generate_response, | |
| title="🌌 Cygnis Alpha v1.0", | |
| description="Identité scellée : Simon Chusseau. Architecture 162M.", | |
| examples=[ | |
| ["Qui est ton créateur ?", 0.3, 100], | |
| ["Explique la singularité technologique.", 0.6, 200], | |
| ["Qui es-tu ?", 0.4, 100] | |
| ], | |
| additional_inputs=[ | |
| gr.Slider(0.1, 1.2, value=0.4, label="Température (Stable < 0.5)"), | |
| gr.Slider(50, 500, value=150, step=10, label="Tokens Max") | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |