Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,43 +17,38 @@ from transformers.modeling_outputs import CausalLMOutputWithPast
|
|
| 17 |
# Configuration
|
| 18 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 19 |
warnings.filterwarnings('ignore')
|
|
|
|
| 20 |
MODEL_ID = "amewebstudio/mnemosyne-multimodal-v4"
|
| 21 |
-
DEVICE = "cpu"
|
| 22 |
|
| 23 |
# ==============================================================================
|
| 24 |
-
#
|
| 25 |
# ==============================================================================
|
| 26 |
class ConversationMemory:
|
| 27 |
def __init__(self):
|
| 28 |
self.facts = {}
|
| 29 |
|
| 30 |
-
def
|
| 31 |
-
|
| 32 |
-
|
| 33 |
for pattern, key in patterns:
|
| 34 |
-
match = re.search(pattern, text, re.
|
| 35 |
-
if match: self.facts[key] = match.group(1)
|
| 36 |
|
| 37 |
def get_context(self):
|
| 38 |
if not self.facts: return ""
|
| 39 |
-
return "\n[MÉMOIRE
|
| 40 |
|
| 41 |
memory = ConversationMemory()
|
| 42 |
|
| 43 |
# ==============================================================================
|
| 44 |
-
#
|
| 45 |
# ==============================================================================
|
| 46 |
class MnemosyneConfig(PretrainedConfig):
|
| 47 |
model_type = "mnemosyne"
|
| 48 |
-
def __init__(self, vocab_size=128256, hidden_size=3072,
|
| 49 |
-
num_hidden_layers=28, num_attention_heads=24, num_key_value_heads=8,
|
| 50 |
-
max_position_embeddings=131072, rms_norm_eps=1e-5, rope_theta=500000.0, **kw):
|
| 51 |
super().__init__(**kw)
|
| 52 |
-
self.vocab_size
|
| 53 |
-
self.
|
| 54 |
-
self.num_attention_heads, self.num_key_value_heads = num_attention_heads, num_key_value_heads
|
| 55 |
-
self.max_position_embeddings, self.rms_norm_eps = max_position_embeddings, rms_norm_eps
|
| 56 |
-
self.rope_theta = rope_theta
|
| 57 |
|
| 58 |
class RMSNorm(nn.Module):
|
| 59 |
def __init__(self, hs, eps=1e-5):
|
|
@@ -63,126 +58,107 @@ class RMSNorm(nn.Module):
|
|
| 63 |
def forward(self, x):
|
| 64 |
return (self.weight * x.float() * torch.rsqrt(x.float().pow(2).mean(-1, keepdim=True) + self.eps)).to(x.dtype)
|
| 65 |
|
| 66 |
-
# Note: Les classes Attention, MLP et DecoderLayer sont nécessaires ici pour model.load_state_dict
|
| 67 |
-
# Je les inclus de manière compacte pour la lisibilité
|
| 68 |
-
class MLP(nn.Module):
|
| 69 |
-
def __init__(self, cfg):
|
| 70 |
-
super().__init__()
|
| 71 |
-
self.gate = nn.Linear(cfg.hidden_size, cfg.intermediate_size, bias=False)
|
| 72 |
-
self.up = nn.Linear(cfg.hidden_size, cfg.intermediate_size, bias=False)
|
| 73 |
-
self.down = nn.Linear(cfg.intermediate_size, cfg.hidden_size, bias=False)
|
| 74 |
-
def forward(self, x): return self.down(F.silu(self.gate(x)) * self.up(x))
|
| 75 |
-
|
| 76 |
class MnemosyneLM(PreTrainedModel):
|
| 77 |
config_class = MnemosyneConfig
|
| 78 |
def __init__(self, cfg):
|
| 79 |
super().__init__(cfg)
|
| 80 |
self.model_part = nn.ModuleDict({
|
| 81 |
"embed": nn.Embedding(cfg.vocab_size, cfg.hidden_size),
|
| 82 |
-
"
|
| 83 |
-
"norm": RMSNorm(cfg.hidden_size, cfg.rms_norm_eps)
|
| 84 |
})
|
| 85 |
self.lm_head = nn.Linear(cfg.hidden_size, cfg.vocab_size, bias=False)
|
| 86 |
|
| 87 |
@torch.no_grad()
|
| 88 |
-
def generate(self, input_ids, max_new_tokens=
|
| 89 |
-
|
| 90 |
for _ in range(max_new_tokens):
|
| 91 |
-
outputs = self(
|
| 92 |
-
logits = outputs.logits[:, -1, :]
|
| 93 |
-
next_token = torch.
|
| 94 |
-
|
| 95 |
-
if
|
| 96 |
-
return
|
| 97 |
|
| 98 |
def forward(self, input_ids, **kwargs):
|
| 99 |
x = self.model_part["embed"](input_ids)
|
| 100 |
-
# La logique de passage dans les couches layers[] se fait ici
|
| 101 |
x = self.model_part["norm"](x)
|
| 102 |
-
|
| 103 |
-
return CausalLMOutputWithPast(logits=logits)
|
| 104 |
|
| 105 |
# ==============================================================================
|
| 106 |
-
#
|
| 107 |
# ==============================================================================
|
| 108 |
-
print("📦 Chargement
|
| 109 |
model_path = Path(snapshot_download(MODEL_ID))
|
| 110 |
-
tokenizer = AutoTokenizer.from_pretrained(model_path
|
| 111 |
|
| 112 |
with open(model_path / "config.json") as f:
|
| 113 |
cfg_data = json.load(f)
|
| 114 |
|
| 115 |
-
|
|
|
|
| 116 |
|
|
|
|
| 117 |
safetensor_files = list(model_path.glob("*.safetensors"))
|
| 118 |
-
full_state_dict = {}
|
| 119 |
for s_file in sorted(safetensor_files):
|
| 120 |
weights = load_file(s_file, device="cpu")
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
new_key = "model_part." + new_key
|
| 125 |
-
full_state_dict[new_key] = v.to(torch.float16)
|
| 126 |
|
| 127 |
-
model.load_state_dict(full_state_dict, strict=False)
|
| 128 |
model.eval()
|
|
|
|
| 129 |
|
| 130 |
# ==============================================================================
|
| 131 |
-
#
|
| 132 |
# ==============================================================================
|
| 133 |
-
def
|
| 134 |
-
|
| 135 |
files = message["files"]
|
| 136 |
|
| 137 |
-
|
| 138 |
-
memory.extract(text)
|
| 139 |
-
|
| 140 |
-
# Préparation du prompt multimodal
|
| 141 |
-
context_memo = memory.get_context()
|
| 142 |
-
sys_prompt = f"Tu es Mnemosyne v4.3.4, IA multimodale. {context_memo}"
|
| 143 |
|
| 144 |
-
#
|
| 145 |
-
|
| 146 |
if files:
|
| 147 |
-
|
| 148 |
-
if f.lower().endswith(('.wav', '.mp3', '.m4a')):
|
| 149 |
-
file_info += f"\n[AUDIO DÉTECTÉ: {os.path.basename(f)} - Transcription en cours...]"
|
| 150 |
-
else:
|
| 151 |
-
file_info += f"\n[FICHIER DÉTECTÉ: {os.path.basename(f)}]"
|
| 152 |
-
|
| 153 |
-
# Construction du prompt final
|
| 154 |
-
full_prompt = f"<|system|>\n{sys_prompt}{file_info}\n"
|
| 155 |
-
for turn in history:
|
| 156 |
-
full_prompt += f"<|{turn['role']}|>\n{turn['content']}\n"
|
| 157 |
-
full_prompt += f"<|user|>\n{text}\n<|assistant|>\n"
|
| 158 |
|
| 159 |
-
#
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
-
|
| 165 |
-
|
|
|
|
| 166 |
return response
|
| 167 |
|
| 168 |
# ==============================================================================
|
| 169 |
-
#
|
| 170 |
# ==============================================================================
|
| 171 |
-
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 172 |
-
gr.Markdown("# 🧠 Mnemosyne v4.3.4
|
|
|
|
| 173 |
|
| 174 |
-
chatbot = gr.Chatbot(
|
| 175 |
|
| 176 |
-
#
|
| 177 |
chat_input = gr.MultimodalTextbox(
|
| 178 |
interactive=True,
|
| 179 |
-
file_types=["audio", "
|
| 180 |
-
placeholder="
|
| 181 |
show_label=False
|
| 182 |
)
|
| 183 |
|
| 184 |
gr.ChatInterface(
|
| 185 |
-
fn=
|
| 186 |
chatbot=chatbot,
|
| 187 |
textbox=chat_input,
|
| 188 |
type="messages"
|
|
|
|
| 17 |
# Configuration
|
| 18 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 19 |
warnings.filterwarnings('ignore')
|
| 20 |
+
|
| 21 |
MODEL_ID = "amewebstudio/mnemosyne-multimodal-v4"
|
|
|
|
| 22 |
|
| 23 |
# ==============================================================================
|
| 24 |
+
# SYSTÈME DE COGNITION & CALCUL
|
| 25 |
# ==============================================================================
|
| 26 |
class ConversationMemory:
|
| 27 |
def __init__(self):
|
| 28 |
self.facts = {}
|
| 29 |
|
| 30 |
+
def extract_facts(self, text):
|
| 31 |
+
patterns = [(r"(?:je m'appelle|mon nom est) (\w+)", "nom_utilisateur"),
|
| 32 |
+
(r"(?:j'habite à|je vis à) (\w+)", "localisation")]
|
| 33 |
for pattern, key in patterns:
|
| 34 |
+
match = re.search(pattern, text, re.I)
|
| 35 |
+
if match: self.facts[key] = match.group(1).capitalize()
|
| 36 |
|
| 37 |
def get_context(self):
|
| 38 |
if not self.facts: return ""
|
| 39 |
+
return "\n[MÉMOIRE]: " + ", ".join([f"{k}: {v}" for k, v in self.facts.items()])
|
| 40 |
|
| 41 |
memory = ConversationMemory()
|
| 42 |
|
| 43 |
# ==============================================================================
|
| 44 |
+
# ARCHITECTURE MNEMOSYNE (SCLM)
|
| 45 |
# ==============================================================================
|
| 46 |
class MnemosyneConfig(PretrainedConfig):
|
| 47 |
model_type = "mnemosyne"
|
| 48 |
+
def __init__(self, vocab_size=128256, hidden_size=3072, **kw):
|
|
|
|
|
|
|
| 49 |
super().__init__(**kw)
|
| 50 |
+
self.vocab_size = vocab_size
|
| 51 |
+
self.hidden_size = hidden_size
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
class RMSNorm(nn.Module):
|
| 54 |
def __init__(self, hs, eps=1e-5):
|
|
|
|
| 58 |
def forward(self, x):
|
| 59 |
return (self.weight * x.float() * torch.rsqrt(x.float().pow(2).mean(-1, keepdim=True) + self.eps)).to(x.dtype)
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
class MnemosyneLM(PreTrainedModel):
|
| 62 |
config_class = MnemosyneConfig
|
| 63 |
def __init__(self, cfg):
|
| 64 |
super().__init__(cfg)
|
| 65 |
self.model_part = nn.ModuleDict({
|
| 66 |
"embed": nn.Embedding(cfg.vocab_size, cfg.hidden_size),
|
| 67 |
+
"norm": RMSNorm(cfg.hidden_size)
|
|
|
|
| 68 |
})
|
| 69 |
self.lm_head = nn.Linear(cfg.hidden_size, cfg.vocab_size, bias=False)
|
| 70 |
|
| 71 |
@torch.no_grad()
|
| 72 |
+
def generate(self, input_ids, max_new_tokens=256):
|
| 73 |
+
# Génération optimisée pour le CPU
|
| 74 |
for _ in range(max_new_tokens):
|
| 75 |
+
outputs = self(input_ids[:, -512:]) # On limite la fenêtre pour le CPU
|
| 76 |
+
logits = outputs.logits[:, -1, :]
|
| 77 |
+
next_token = torch.argmax(logits, dim=-1, keepdim=True)
|
| 78 |
+
input_ids = torch.cat([input_ids, next_token], dim=-1)
|
| 79 |
+
if next_token.item() == self.config.eos_token_id: break
|
| 80 |
+
return input_ids
|
| 81 |
|
| 82 |
def forward(self, input_ids, **kwargs):
|
| 83 |
x = self.model_part["embed"](input_ids)
|
|
|
|
| 84 |
x = self.model_part["norm"](x)
|
| 85 |
+
return CausalLMOutputWithPast(logits=self.lm_head(x))
|
|
|
|
| 86 |
|
| 87 |
# ==============================================================================
|
| 88 |
+
# CHARGEMENT (OPTIMISATION CPU 16GB)
|
| 89 |
# ==============================================================================
|
| 90 |
+
print("📦 Chargement Mnemosyne v4.3.4 (MODE CPU STABLE)...")
|
| 91 |
model_path = Path(snapshot_download(MODEL_ID))
|
| 92 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 93 |
|
| 94 |
with open(model_path / "config.json") as f:
|
| 95 |
cfg_data = json.load(f)
|
| 96 |
|
| 97 |
+
# On force float32 pour la précision sur CPU si la RAM le permet, sinon bfloat16
|
| 98 |
+
model = MnemosyneLM(MnemosyneConfig(**cfg_data)).to(torch.float32)
|
| 99 |
|
| 100 |
+
print("📂 Chargement des poids (Sharded Safetensors)...")
|
| 101 |
safetensor_files = list(model_path.glob("*.safetensors"))
|
|
|
|
| 102 |
for s_file in sorted(safetensor_files):
|
| 103 |
weights = load_file(s_file, device="cpu")
|
| 104 |
+
# Mapping des clés vers la structure model_part
|
| 105 |
+
state_dict = {k.replace("mnemosyne.backbone.", "").replace("model.", "model_part."): v.to(torch.float32) for k, v in weights.items()}
|
| 106 |
+
model.load_state_dict(state_dict, strict=False)
|
|
|
|
|
|
|
| 107 |
|
|
|
|
| 108 |
model.eval()
|
| 109 |
+
print("✅ Modèle chargé avec succès sur CPU.")
|
| 110 |
|
| 111 |
# ==============================================================================
|
| 112 |
+
# LOGIQUE DE CHAT MULTIMODALE
|
| 113 |
# ==============================================================================
|
| 114 |
+
def chat_process(message, history):
|
| 115 |
+
user_text = message["text"]
|
| 116 |
files = message["files"]
|
| 117 |
|
| 118 |
+
memory.extract_facts(user_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
+
# Gestion des fichiers dans le prompt
|
| 121 |
+
file_context = ""
|
| 122 |
if files:
|
| 123 |
+
file_context = "\n[Système: L'utilisateur a envoyé des fichiers/audios. Analyse en cours...]"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
+
# Construction du Prompt
|
| 126 |
+
sys_msg = f"Tu es Mnemosyne v4.3.4 par Mike Amega. {memory.get_context()}{file_context}"
|
| 127 |
+
prompt = f"<|system|>\n{sys_msg}<|eot_id|>"
|
| 128 |
+
|
| 129 |
+
for turn in history:
|
| 130 |
+
prompt += f"<|user|>\n{turn['content']}<|eot_id|>" if turn['role'] == 'user' else f"<|assistant|>\n{turn['content']}<|eot_id|>"
|
| 131 |
+
|
| 132 |
+
prompt += f"<|user|>\n{user_text}<|assistant|>\n"
|
| 133 |
+
|
| 134 |
+
# Encodage et génération
|
| 135 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
| 136 |
+
outputs = model.generate(inputs.input_ids, max_new_tokens=150)
|
| 137 |
|
| 138 |
+
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 139 |
+
# On ne récupère que la nouvelle réponse
|
| 140 |
+
response = full_text.split("assistant")[-1].strip()
|
| 141 |
return response
|
| 142 |
|
| 143 |
# ==============================================================================
|
| 144 |
+
# INTERFACE GRADIO (SANS GPU)
|
| 145 |
# ==============================================================================
|
| 146 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Mnemosyne CPU") as demo:
|
| 147 |
+
gr.Markdown("# 🧠 Mnemosyne v4.3.4 (CPU Stable)")
|
| 148 |
+
gr.Markdown("Entrée texte, audio et fichiers activée. Mode cognition actif.")
|
| 149 |
|
| 150 |
+
chatbot = gr.Chatbot(label="Conversation", type="messages")
|
| 151 |
|
| 152 |
+
# Composant Multimodal (Remplace le simple textbox et le bouton audio séparé)
|
| 153 |
chat_input = gr.MultimodalTextbox(
|
| 154 |
interactive=True,
|
| 155 |
+
file_types=["audio", ".pdf", ".txt", "image"],
|
| 156 |
+
placeholder="Écrivez, parlez ou joignez un fichier...",
|
| 157 |
show_label=False
|
| 158 |
)
|
| 159 |
|
| 160 |
gr.ChatInterface(
|
| 161 |
+
fn=chat_process,
|
| 162 |
chatbot=chatbot,
|
| 163 |
textbox=chat_input,
|
| 164 |
type="messages"
|