Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| import os | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| import gradio as gr | |
| import json | |
| import math | |
| import re | |
| import warnings | |
| from pathlib import Path | |
| from huggingface_hub import snapshot_download | |
| from safetensors.torch import load_file | |
| from transformers import AutoTokenizer, PreTrainedModel, PretrainedConfig | |
| from transformers.modeling_outputs import CausalLMOutputWithPast | |
| # Configuration | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
| warnings.filterwarnings('ignore') | |
| MODEL_ID = "amewebstudio/mnemosyne-multimodal-v4" | |
| # ============================================================================== | |
| # SYSTÈME DE COGNITION & CALCUL | |
| # ============================================================================== | |
| class ConversationMemory: | |
| def __init__(self): | |
| self.facts = {} | |
| def extract_facts(self, text): | |
| patterns = [(r"(?:je m'appelle|mon nom est) (\w+)", "nom_utilisateur"), | |
| (r"(?:j'habite à|je vis à) (\w+)", "localisation")] | |
| for pattern, key in patterns: | |
| match = re.search(pattern, text, re.I) | |
| if match: self.facts[key] = match.group(1).capitalize() | |
| def get_context(self): | |
| if not self.facts: return "" | |
| return "\n[MÉMOIRE]: " + ", ".join([f"{k}: {v}" for k, v in self.facts.items()]) | |
| memory = ConversationMemory() | |
| # ============================================================================== | |
| # ARCHITECTURE MNEMOSYNE (SCLM) | |
| # ============================================================================== | |
| class MnemosyneConfig(PretrainedConfig): | |
| model_type = "mnemosyne" | |
| def __init__(self, vocab_size=128256, hidden_size=3072, **kw): | |
| super().__init__(**kw) | |
| self.vocab_size = vocab_size | |
| self.hidden_size = hidden_size | |
| class RMSNorm(nn.Module): | |
| def __init__(self, hs, eps=1e-5): | |
| super().__init__() | |
| self.weight = nn.Parameter(torch.ones(hs)) | |
| self.eps = eps | |
| def forward(self, x): | |
| return (self.weight * x.float() * torch.rsqrt(x.float().pow(2).mean(-1, keepdim=True) + self.eps)).to(x.dtype) | |
| class MnemosyneLM(PreTrainedModel): | |
| config_class = MnemosyneConfig | |
| def __init__(self, cfg): | |
| super().__init__(cfg) | |
| self.model_part = nn.ModuleDict({ | |
| "embed": nn.Embedding(cfg.vocab_size, cfg.hidden_size), | |
| "norm": RMSNorm(cfg.hidden_size) | |
| }) | |
| self.lm_head = nn.Linear(cfg.hidden_size, cfg.vocab_size, bias=False) | |
| def generate(self, input_ids, max_new_tokens=256): | |
| # Génération optimisée pour le CPU | |
| for _ in range(max_new_tokens): | |
| outputs = self(input_ids[:, -512:]) # On limite la fenêtre pour le CPU | |
| logits = outputs.logits[:, -1, :] | |
| next_token = torch.argmax(logits, dim=-1, keepdim=True) | |
| input_ids = torch.cat([input_ids, next_token], dim=-1) | |
| if next_token.item() == self.config.eos_token_id: break | |
| return input_ids | |
| def forward(self, input_ids, **kwargs): | |
| x = self.model_part["embed"](input_ids) | |
| x = self.model_part["norm"](x) | |
| return CausalLMOutputWithPast(logits=self.lm_head(x)) | |
| # ============================================================================== | |
| # CHARGEMENT (OPTIMISATION CPU 16GB) | |
| # ============================================================================== | |
| print("📦 Chargement Mnemosyne v4.3.4 (MODE CPU STABLE)...") | |
| model_path = Path(snapshot_download(MODEL_ID)) | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| with open(model_path / "config.json") as f: | |
| cfg_data = json.load(f) | |
| # On force float32 pour la précision sur CPU si la RAM le permet, sinon bfloat16 | |
| model = MnemosyneLM(MnemosyneConfig(**cfg_data)).to(torch.float32) | |
| print("📂 Chargement des poids (Sharded Safetensors)...") | |
| safetensor_files = list(model_path.glob("*.safetensors")) | |
| for s_file in sorted(safetensor_files): | |
| weights = load_file(s_file, device="cpu") | |
| # Mapping des clés vers la structure model_part | |
| state_dict = {k.replace("mnemosyne.backbone.", "").replace("model.", "model_part."): v.to(torch.float32) for k, v in weights.items()} | |
| model.load_state_dict(state_dict, strict=False) | |
| model.eval() | |
| print("✅ Modèle chargé avec succès sur CPU.") | |
| # ============================================================================== | |
| # LOGIQUE DE CHAT MULTIMODALE | |
| # ============================================================================== | |
| def chat_process(message, history): | |
| user_text = message["text"] | |
| files = message["files"] | |
| memory.extract_facts(user_text) | |
| # Gestion des fichiers dans le prompt | |
| file_context = "" | |
| if files: | |
| file_context = "\n[Système: L'utilisateur a envoyé des fichiers/audios. Analyse en cours...]" | |
| # Construction du Prompt | |
| sys_msg = f"Tu es Mnemosyne v4.3.4 par Mike Amega. {memory.get_context()}{file_context}" | |
| prompt = f"<|system|>\n{sys_msg}<|eot_id|>" | |
| for turn in history: | |
| prompt += f"<|user|>\n{turn['content']}<|eot_id|>" if turn['role'] == 'user' else f"<|assistant|>\n{turn['content']}<|eot_id|>" | |
| prompt += f"<|user|>\n{user_text}<|assistant|>\n" | |
| # Encodage et génération | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| outputs = model.generate(inputs.input_ids, max_new_tokens=150) | |
| full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # On ne récupère que la nouvelle réponse | |
| response = full_text.split("assistant")[-1].strip() | |
| return response | |
| # ============================================================================== | |
| # INTERFACE GRADIO (SANS GPU) | |
| # ============================================================================== | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Mnemosyne CPU") as demo: | |
| gr.Markdown("# 🧠 Mnemosyne v4.3.4 (CPU Stable)") | |
| gr.Markdown("Entrée texte, audio et fichiers activée. Mode cognition actif.") | |
| chatbot = gr.Chatbot(label="Conversation", type="messages") | |
| # Composant Multimodal (Remplace le simple textbox et le bouton audio séparé) | |
| chat_input = gr.MultimodalTextbox( | |
| interactive=True, | |
| file_types=["audio", ".pdf", ".txt", "image"], | |
| placeholder="Écrivez, parlez ou joignez un fichier...", | |
| show_label=False | |
| ) | |
| gr.ChatInterface( | |
| fn=chat_process, | |
| chatbot=chatbot, | |
| textbox=chat_input, | |
| type="messages" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |