amewebstudio commited on
Commit
87de8db
·
verified ·
1 Parent(s): 1c9777a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +707 -53
app.py CHANGED
@@ -1,70 +1,724 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
20
 
21
- messages.extend(history)
22
 
23
- messages.append({"role": "user", "content": message})
 
 
24
 
25
- response = ""
 
 
 
 
 
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
 
 
 
 
 
 
38
 
39
- response += token
40
- yield response
41
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  )
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ================================================================================
4
+ 🧠 MNEMOSYNE v4.3.3 - HuggingFace Space (CPU MODE)
5
+ ================================================================================
6
+ Author: Mike Amega (Logo) - Ame Web Studio
7
+ Date: 2024
8
+
9
+ DUAL LICENSE:
10
+ - Open Source: Apache 2.0 (non-commercial use)
11
+ - Commercial: Contact amewebstudio@gmail.com for enterprise licensing
12
+
13
+ CPU MODE:
14
+ ✅ Force CPU execution (no ZeroGPU quota issues)
15
+ ✅ Auto-detect local CUDA if available
16
+ ✅ No quota limitations
17
+ ================================================================================
18
+ """
19
+
20
+ # ==============================================================================
21
+ # 🚨 No ZeroGPU - CPU mode to avoid quota issues
22
+ # ==============================================================================
23
+ import os
24
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
25
+
26
+ # ==============================================================================
27
+ # Now safe to import torch and other CUDA packages
28
+ # ==============================================================================
29
+ import torch
30
+ import torch.nn as nn
31
+ import torch.nn.functional as F
32
  import gradio as gr
33
+ import json
34
+ import math
35
+ import re
36
+ import warnings
37
+ from pathlib import Path
38
+ from typing import Optional, Tuple, List
39
 
40
+ warnings.filterwarnings('ignore')
41
+
42
+ # ==============================================================================
43
+ # 🔧 RUNTIME CONFIGURATION
44
+ # ==============================================================================
45
+ class RuntimeConfig:
46
+ """Configuration automatique de l'environnement - CPU mode (pas de ZeroGPU)"""
47
+
48
+ def __init__(self):
49
+ self.cuda_available = torch.cuda.is_available()
50
+ self.device = "cpu"
51
+
52
+ self._configure_device()
53
+
54
+ def _configure_device(self):
55
+ """Configure le device approprié - CPU ou CUDA local uniquement"""
56
+ if self.cuda_available:
57
+ self.device = "cuda"
58
+ print(f"🖥️ Local CUDA detected: {torch.cuda.get_device_name(0)}")
59
+ print(f" VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
60
+ else:
61
+ self.device = "cpu"
62
+ print("💻 CPU mode (no GPU detected)")
63
+
64
+ print(f" Device: {self.device}")
65
+
66
+ def get_device(self) -> torch.device:
67
+ """Retourne le device approprié"""
68
+ return torch.device(self.device)
69
+
70
+ def to_device(self, tensor_or_model):
71
+ """Déplace un tensor ou modèle sur le bon device"""
72
+ if hasattr(tensor_or_model, 'to'):
73
+ return tensor_or_model.to(self.device)
74
+ return tensor_or_model
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ # Initialize runtime config
78
+ runtime = RuntimeConfig()
79
 
80
+ MODEL_ID = "amewebstudio/mnemosyne-multimodal-v4"
81
 
82
+ print("=" * 60)
83
+ print("🧠 MNEMOSYNE v4.3.3 - LOADING")
84
+ print("=" * 60)
85
 
86
+ # ==============================================================================
87
+ # IMPORTS HUGGINGFACE
88
+ # ==============================================================================
89
+ from huggingface_hub import snapshot_download
90
+ from safetensors.torch import load_file
91
+ from transformers import AutoTokenizer, PreTrainedModel, PretrainedConfig
92
+ from transformers.modeling_outputs import CausalLMOutputWithPast
93
 
94
+ # ==============================================================================
95
+ # WHISPER POUR AUDIO (chargement lazy)
96
+ # ==============================================================================
97
+ whisper_model = None
98
+ whisper_processor = None
99
+
100
+ def load_whisper():
101
+ """Charge Whisper de manière lazy pour économiser la mémoire"""
102
+ global whisper_model, whisper_processor
103
+ if whisper_model is None:
104
+ try:
105
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
106
+ print("🎤 Loading Whisper...")
107
+ whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-small")
108
+ whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
109
+ whisper_model.eval()
110
+ print(" ✅ Whisper loaded")
111
+ except Exception as e:
112
+ print(f" ⚠️ Whisper failed: {e}")
113
+ return whisper_model, whisper_processor
114
+
115
+
116
+ # ==============================================================================
117
+ # MODEL CLASSES
118
+ # ==============================================================================
119
+ class MnemosyneConfig(PretrainedConfig):
120
+ model_type = "mnemosyne"
121
+
122
+ def __init__(
123
+ self,
124
+ vocab_size: int = 128256,
125
+ hidden_size: int = 3072,
126
+ intermediate_size: int = 8192,
127
+ num_hidden_layers: int = 28,
128
+ num_attention_heads: int = 24,
129
+ num_key_value_heads: int = 8,
130
+ max_position_embeddings: int = 131072,
131
+ rms_norm_eps: float = 1e-5,
132
+ rope_theta: float = 500000.0,
133
+ **kwargs
134
  ):
135
+ self.vocab_size = vocab_size
136
+ self.hidden_size = hidden_size
137
+ self.intermediate_size = intermediate_size
138
+ self.num_hidden_layers = num_hidden_layers
139
+ self.num_attention_heads = num_attention_heads
140
+ self.num_key_value_heads = num_key_value_heads
141
+ self.max_position_embeddings = max_position_embeddings
142
+ self.rms_norm_eps = rms_norm_eps
143
+ self.rope_theta = rope_theta
144
+ super().__init__(**kwargs)
145
 
 
 
146
 
147
+ class RMSNorm(nn.Module):
148
+ def __init__(self, hidden_size: int, eps: float = 1e-5):
149
+ super().__init__()
150
+ self.weight = nn.Parameter(torch.ones(hidden_size))
151
+ self.eps = eps
152
+
153
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
154
+ variance = x.float().pow(2).mean(-1, keepdim=True)
155
+ x_normed = x.float() * torch.rsqrt(variance + self.eps)
156
+ return (self.weight * x_normed).to(x.dtype)
157
 
158
+
159
+ class RotaryEmbedding(nn.Module):
160
+ def __init__(self, dim: int, base: float = 500000.0):
161
+ super().__init__()
162
+ inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim))
163
+ self.register_buffer("inv_freq", inv_freq)
164
+
165
+ def forward(self, x: torch.Tensor, position_ids: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
166
+ freqs = torch.outer(position_ids[0].float(), self.inv_freq.to(x.device))
167
+ emb = torch.cat((freqs, freqs), dim=-1).unsqueeze(0).unsqueeze(0)
168
+ return emb.cos().to(x.dtype), emb.sin().to(x.dtype)
169
+
170
+
171
+ def rotate_half(x: torch.Tensor) -> torch.Tensor:
172
+ x1, x2 = x[..., :x.shape[-1]//2], x[..., x.shape[-1]//2:]
173
+ return torch.cat((-x2, x1), dim=-1)
174
+
175
+
176
+ class Attention(nn.Module):
177
+ def __init__(self, config: MnemosyneConfig, layer_idx: int):
178
+ super().__init__()
179
+ self.hidden_size = config.hidden_size
180
+ self.num_heads = config.num_attention_heads
181
+ self.head_dim = self.hidden_size // self.num_heads
182
+ self.num_kv_heads = config.num_key_value_heads
183
+ self.num_groups = self.num_heads // self.num_kv_heads
184
+
185
+ self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
186
+ self.k_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)
187
+ self.v_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)
188
+ self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
189
+ self.rotary_emb = RotaryEmbedding(self.head_dim, config.rope_theta)
190
+
191
+ def forward(
192
+ self,
193
+ hidden_states: torch.Tensor,
194
+ attention_mask: torch.Tensor,
195
+ position_ids: torch.Tensor,
196
+ past_key_value: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
197
+ use_cache: bool = False
198
+ ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor, torch.Tensor]]]:
199
+ batch_size, seq_len, _ = hidden_states.size()
200
+
201
+ q = self.q_proj(hidden_states).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
202
+ k = self.k_proj(hidden_states).view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2)
203
+ v = self.v_proj(hidden_states).view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2)
204
+
205
+ cos, sin = self.rotary_emb(q, position_ids)
206
+ q = (q * cos) + (rotate_half(q) * sin)
207
+ k = (k * cos) + (rotate_half(k) * sin)
208
+
209
+ if past_key_value is not None:
210
+ k = torch.cat([past_key_value[0], k], dim=2)
211
+ v = torch.cat([past_key_value[1], v], dim=2)
212
+
213
+ new_kv = (k, v) if use_cache else None
214
+
215
+ k = k.repeat_interleave(self.num_groups, dim=1)
216
+ v = v.repeat_interleave(self.num_groups, dim=1)
217
+
218
+ attn_weights = torch.matmul(q.float(), k.float().transpose(2, 3)) / math.sqrt(self.head_dim)
219
+ attn_weights = attn_weights + attention_mask.float()
220
+ attn_weights = F.softmax(attn_weights, dim=-1).to(hidden_states.dtype)
221
+
222
+ attn_output = torch.matmul(attn_weights, v)
223
+ attn_output = attn_output.transpose(1, 2).reshape(batch_size, seq_len, self.hidden_size)
224
+
225
+ return self.o_proj(attn_output), new_kv
226
+
227
+
228
+ class MLP(nn.Module):
229
+ def __init__(self, config: MnemosyneConfig):
230
+ super().__init__()
231
+ self.gate_proj = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)
232
+ self.up_proj = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)
233
+ self.down_proj = nn.Linear(config.intermediate_size, config.hidden_size, bias=False)
234
+
235
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
236
+ return self.down_proj(F.silu(self.gate_proj(x)) * self.up_proj(x))
237
+
238
+
239
+ class DecoderLayer(nn.Module):
240
+ def __init__(self, config: MnemosyneConfig, layer_idx: int):
241
+ super().__init__()
242
+ self.self_attn = Attention(config, layer_idx)
243
+ self.mlp = MLP(config)
244
+ self.input_layernorm = RMSNorm(config.hidden_size, config.rms_norm_eps)
245
+ self.post_attention_layernorm = RMSNorm(config.hidden_size, config.rms_norm_eps)
246
+
247
+ def forward(
248
+ self,
249
+ hidden_states: torch.Tensor,
250
+ attention_mask: torch.Tensor,
251
+ position_ids: torch.Tensor,
252
+ past_key_value: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
253
+ use_cache: bool = False
254
+ ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor, torch.Tensor]]]:
255
+ residual = hidden_states
256
+ hidden_states = self.input_layernorm(hidden_states)
257
+ hidden_states, new_kv = self.self_attn(hidden_states, attention_mask, position_ids, past_key_value, use_cache)
258
+ hidden_states = residual + hidden_states
259
+
260
+ residual = hidden_states
261
+ hidden_states = self.post_attention_layernorm(hidden_states)
262
+ hidden_states = residual + self.mlp(hidden_states)
263
+
264
+ return hidden_states, new_kv
265
+
266
+
267
+ class MnemosyneModel(nn.Module):
268
+ def __init__(self, config: MnemosyneConfig):
269
+ super().__init__()
270
+ self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
271
+ self.layers = nn.ModuleList([DecoderLayer(config, i) for i in range(config.num_hidden_layers)])
272
+ self.norm = RMSNorm(config.hidden_size, config.rms_norm_eps)
273
+
274
+ def forward(
275
+ self,
276
+ input_ids: torch.Tensor,
277
+ past_key_values: Optional[List[Tuple[torch.Tensor, torch.Tensor]]] = None,
278
+ use_cache: bool = False
279
+ ) -> Tuple[torch.Tensor, Optional[List[Tuple[torch.Tensor, torch.Tensor]]]]:
280
+ hidden_states = self.embed_tokens(input_ids)
281
+ batch_size, seq_len = input_ids.shape
282
+
283
+ past_len = past_key_values[0][0].shape[2] if past_key_values else 0
284
+ position_ids = torch.arange(past_len, past_len + seq_len, device=input_ids.device).unsqueeze(0)
285
+
286
+ attention_mask = torch.triu(
287
+ torch.full((seq_len, seq_len), float("-inf"), device=input_ids.device),
288
+ diagonal=1
289
+ ).unsqueeze(0).unsqueeze(0)
290
+
291
+ new_kvs = [] if use_cache else None
292
+
293
+ for i, layer in enumerate(self.layers):
294
+ past_kv = past_key_values[i] if past_key_values else None
295
+ hidden_states, new_kv = layer(hidden_states, attention_mask, position_ids, past_kv, use_cache)
296
+ if use_cache:
297
+ new_kvs.append(new_kv)
298
+
299
+ return self.norm(hidden_states), new_kvs
300
+
301
+
302
+ class MnemosyneLM(PreTrainedModel):
303
+ config_class = MnemosyneConfig
304
+
305
+ def __init__(self, config: MnemosyneConfig):
306
+ super().__init__(config)
307
+ self.model = MnemosyneModel(config)
308
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
309
+
310
+ def forward(
311
+ self,
312
+ input_ids: torch.Tensor,
313
+ past_key_values: Optional[List[Tuple[torch.Tensor, torch.Tensor]]] = None,
314
+ use_cache: bool = False,
315
+ **kwargs
316
+ ) -> CausalLMOutputWithPast:
317
+ hidden_states, new_kvs = self.model(input_ids, past_key_values, use_cache)
318
+ logits = self.lm_head(hidden_states)
319
+ return CausalLMOutputWithPast(logits=logits, past_key_values=new_kvs)
320
+
321
+ @torch.no_grad()
322
+ def generate(
323
+ self,
324
+ input_ids: torch.Tensor,
325
+ max_new_tokens: int = 512,
326
+ temperature: float = 0.7,
327
+ top_p: float = 0.9,
328
+ eos_token_id: Optional[int] = None
329
+ ) -> torch.Tensor:
330
+ past_key_values = None
331
+ generated = input_ids
332
+
333
+ for _ in range(max_new_tokens):
334
+ inp = generated if past_key_values is None else generated[:, -1:]
335
+ outputs = self(inp, past_key_values=past_key_values, use_cache=True)
336
+ logits = outputs.logits[:, -1, :] / temperature
337
+ past_key_values = outputs.past_key_values
338
+
339
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
340
+ cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
341
+
342
+ sorted_indices_to_remove = cumulative_probs > top_p
343
+ sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
344
+ sorted_indices_to_remove[..., 0] = 0
345
+
346
+ indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
347
+ logits[indices_to_remove] = float("-inf")
348
+
349
+ next_token = torch.multinomial(F.softmax(logits, dim=-1), num_samples=1)
350
+ generated = torch.cat([generated, next_token], dim=-1)
351
+
352
+ if eos_token_id is not None and (next_token == eos_token_id).all():
353
+ break
354
+
355
+ return generated
356
+
357
+
358
+ # ==============================================================================
359
+ # SYMBOLIC CALCULATOR
360
+ # ==============================================================================
361
+ class SymbolicCalculator:
362
+ """Calculatrice symbolique avec SymPy"""
363
+
364
+ def __init__(self):
365
+ self.available = False
366
+ try:
367
+ import sympy
368
+ self.sympy = sympy
369
+ self.available = True
370
+ print(" ✅ SymPy loaded - symbolic math enabled")
371
+ except ImportError:
372
+ print(" ⚠️ SymPy not available")
373
+
374
+ def solve(self, expression: str) -> str:
375
+ if not self.available:
376
+ return ""
377
+
378
+ try:
379
+ expression = expression.strip()
380
+
381
+ # Simple arithmetic
382
+ if re.match(r'^[\d\s\+\-\*\/\(\)\.\^]+$', expression):
383
+ expr = expression.replace('^', '**')
384
+ result = eval(expr)
385
+ return f"{expression} = {result}"
386
+
387
+ # Symbolic
388
+ expr_clean = re.sub(r'[=\?].*', '', expression).strip()
389
+
390
+ # Variables
391
+ variables = set(re.findall(r'[a-zA-Z]', expr_clean))
392
+ if variables:
393
+ symbols = {v: self.sympy.Symbol(v) for v in variables}
394
+ expr_sympy = expr_clean.replace('^', '**')
395
+
396
+ for var, sym in symbols.items():
397
+ expr_sympy = re.sub(rf'(?<![a-zA-Z]){var}(?![a-zA-Z])', f'symbols["{var}"]', expr_sympy)
398
+
399
+ result = eval(expr_sympy)
400
+ simplified = self.sympy.simplify(result)
401
+ return f"{expr_clean} = {simplified}"
402
+
403
+ return ""
404
+ except Exception:
405
+ return ""
406
+
407
+
408
+ calculator = SymbolicCalculator()
409
+
410
+
411
+ # ==============================================================================
412
+ # LOAD MODEL
413
+ # ==============================================================================
414
+ print("📦 Loading model...")
415
+
416
+ model_path = Path(snapshot_download(MODEL_ID))
417
+
418
+ with open(model_path / "config.json") as f:
419
+ cfg = json.load(f)
420
+
421
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
422
+ tokenizer.pad_token = tokenizer.eos_token
423
+
424
+ config = MnemosyneConfig(
425
+ vocab_size=cfg.get("vocab_size", 128256),
426
+ hidden_size=cfg.get("hidden_size", 3072),
427
+ intermediate_size=cfg.get("intermediate_size", 8192),
428
+ num_hidden_layers=cfg.get("num_hidden_layers", 28),
429
+ num_attention_heads=cfg.get("num_attention_heads", 24),
430
+ num_key_value_heads=cfg.get("num_key_value_heads", 8),
431
+ max_position_embeddings=cfg.get("max_position_embeddings", 131072),
432
+ rms_norm_eps=cfg.get("rms_norm_eps", 1e-5),
433
+ rope_theta=cfg.get("rope_theta", 500000.0),
434
  )
435
 
436
+ model = MnemosyneLM(config)
437
+
438
+ # Load weights
439
+ idx_path = model_path / "model.safetensors.index.json"
440
+ if idx_path.exists():
441
+ with open(idx_path) as f:
442
+ index = json.load(f)
443
+ weights = {}
444
+ for sf in set(index["weight_map"].values()):
445
+ print(f" Loading {sf}...")
446
+ weights.update(load_file(model_path / sf))
447
+
448
+ # Map weights
449
+ state_dict = {}
450
+ for k, v in weights.items():
451
+ if "backbone" in k:
452
+ new_key = k.replace("mnemosyne.backbone.", "")
453
+ state_dict[new_key] = v
454
+
455
+ model.load_state_dict(state_dict, strict=False)
456
+
457
+ # Keep model on CPU by default - will move to CUDA if available at inference
458
+ model = model.float().eval()
459
+ print(f" Model loaded on {runtime.device}")
460
+ print("✅ Model ready!")
461
+
462
+ # Load facts
463
+ facts = {}
464
+ for p in ["cognitive_states.pt", "states.pt"]:
465
+ if (model_path / p).exists():
466
+ try:
467
+ data = torch.load(model_path / p, map_location="cpu", weights_only=False)
468
+ facts = data.get("facts", {})
469
+ break
470
+ except:
471
+ pass
472
+
473
+ print(f" {len(facts)} facts loaded")
474
+
475
+
476
+ # ==============================================================================
477
+ # AUDIO TRANSCRIPTION
478
+ # ==============================================================================
479
+ def transcribe_audio(audio_path: str) -> str:
480
+ """Transcrit l'audio avec Whisper"""
481
+ if audio_path is None:
482
+ return ""
483
+
484
+ try:
485
+ import librosa
486
+ wm, wp = load_whisper()
487
+
488
+ if wm is None:
489
+ return "[Whisper non disponible]"
490
+
491
+ audio, sr = librosa.load(audio_path, sr=16000)
492
+ inputs = wp(audio, sampling_rate=16000, return_tensors="pt")
493
+
494
+ with torch.no_grad():
495
+ predicted_ids = wm.generate(inputs.input_features, max_new_tokens=256)
496
+
497
+ transcription = wp.batch_decode(predicted_ids, skip_special_tokens=True)[0]
498
+ return transcription.strip()
499
+
500
+ except Exception as e:
501
+ return f"[Erreur transcription: {e}]"
502
+
503
+
504
+ # ==============================================================================
505
+ # CHAT FUNCTION (CPU MODE - no ZeroGPU decorator)
506
+ # ==============================================================================
507
+ def generate_response(prompt: str, max_tokens: int = 512) -> str:
508
+ """Génère une réponse - CPU ou CUDA local selon l'environnement"""
509
+ try:
510
+ # Use the configured device (cpu or local cuda)
511
+ dev = runtime.get_device()
512
+ model.to(dev)
513
+
514
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096)
515
+ input_ids = inputs.input_ids.to(dev)
516
+
517
+ output = model.generate(
518
+ input_ids,
519
+ max_new_tokens=max_tokens,
520
+ temperature=0.7,
521
+ top_p=0.9,
522
+ eos_token_id=tokenizer.eos_token_id
523
+ )
524
+
525
+ response = tokenizer.decode(output[0][input_ids.shape[1]:], skip_special_tokens=True)
526
+ return response.strip()
527
+
528
+ except Exception as e:
529
+ return f"Erreur: {e}"
530
+
531
+
532
+ def build_prompt(message: str, chat_history: List[Tuple[str, str]]) -> str:
533
+ """Construit le prompt avec l'historique"""
534
+ sys_prompt = "Tu es Mnemosyne, une IA cognitive avancée créée par Mike Amega (Ame Web Studio).\n"
535
+ sys_prompt += "Tu réponds de manière intelligente, précise et naturelle.\n"
536
+
537
+ if facts:
538
+ facts_str = ", ".join([f"{k}={v['value'] if isinstance(v, dict) else v}" for k, v in list(facts.items())[:10]])
539
+ sys_prompt += f"Faits mémorisés: {facts_str}\n"
540
+
541
+ prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{sys_prompt}<|eot_id|>"
542
+
543
+ # Last 5 turns
544
+ for user_msg, bot_msg in chat_history[-5:]:
545
+ if user_msg:
546
+ prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
547
+ if bot_msg:
548
+ prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{bot_msg}<|eot_id|>"
549
+
550
+ prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|>"
551
+ prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
552
+
553
+ return prompt
554
+
555
+
556
+ def process_message(message: str) -> str:
557
+ """Traite le message (calculs, etc.)"""
558
+ math_patterns = [
559
+ r'\d+\s*[\+\-\*\/\^]\s*\d+',
560
+ r'[a-zA-Z]\s*[\+\-\*\/]\s*[a-zA-Z]',
561
+ r'calcul',
562
+ r'combien',
563
+ r'\='
564
+ ]
565
+
566
+ for pattern in math_patterns:
567
+ if re.search(pattern, message.lower()):
568
+ expr_match = re.search(r'([\d\w\s\+\-\*\/\^\(\)=]+)', message)
569
+ if expr_match:
570
+ result = calculator.solve(expr_match.group(1))
571
+ if result:
572
+ return result
573
+
574
+ return ""
575
+
576
+
577
+ def respond(message: str, chat_history: List[Tuple[str, str]], max_tokens: int = 512):
578
+ """Fonction principale de réponse"""
579
+ if not message or not message.strip():
580
+ return "", chat_history
581
+
582
+ message = message.strip()
583
+
584
+ # Process math
585
+ math_result = process_message(message)
586
+
587
+ # Build prompt
588
+ prompt = build_prompt(message, chat_history)
589
+
590
+ # Generate
591
+ response = generate_response(prompt, max_tokens)
592
+
593
+ # Add math result if available
594
+ if math_result and math_result not in response:
595
+ response = f"{math_result}\n\n{response}"
596
+
597
+ chat_history.append((message, response))
598
+ return "", chat_history
599
+
600
+
601
+ def respond_with_audio(
602
+ message: str,
603
+ audio: Optional[str],
604
+ chat_history: List[Tuple[str, str]],
605
+ max_tokens: int = 512
606
+ ):
607
+ """Répond avec texte ou audio"""
608
+ # Transcribe audio if provided
609
+ if audio:
610
+ transcription = transcribe_audio(audio)
611
+ if transcription and not transcription.startswith("["):
612
+ message = transcription
613
+
614
+ if not message or not message.strip():
615
+ return "", None, chat_history
616
+
617
+ _, updated_history = respond(message, chat_history, max_tokens)
618
+ return "", None, updated_history
619
+
620
+
621
+ # ==============================================================================
622
+ # GRADIO INTERFACE
623
+ # ==============================================================================
624
+ def get_status_message() -> str:
625
+ """Message de statut selon l'environnement"""
626
+ if runtime.cuda_available:
627
+ gpu_name = torch.cuda.get_device_name(0)
628
+ return f"🖥️ GPU: {gpu_name} | 🎤 Parlez ou tapez"
629
+ else:
630
+ return "💻 CPU mode (~30-60s) | 🎤 Parlez ou tapez"
631
+
632
+
633
+ css = """
634
+ .container { max-width: 900px; margin: auto; }
635
+ .chatbot { min-height: 400px; }
636
+ footer { visibility: hidden; }
637
+ """
638
 
639
+ with gr.Blocks(title="Mnemosyne v4.3.3", css=css, theme=gr.themes.Soft()) as demo:
640
+ gr.Markdown(f"""
641
+ # 🧠 Mnemosyne v4.3.3
642
+ *IA cognitive par Mike Amega - Ame Web Studio*
643
+
644
+ **Features:** Audio input (auto-send) • Symbolic Math • Memory System
645
+
646
+ {get_status_message()}
647
+ """)
648
+
649
+ chatbot = gr.Chatbot(
650
+ label="Conversation",
651
+ height=450,
652
+ show_copy_button=True,
653
+ elem_classes=["chatbot"]
654
+ )
655
+
656
+ with gr.Row():
657
+ with gr.Column(scale=4):
658
+ msg = gr.Textbox(
659
+ label="Message",
660
+ placeholder="Tapez votre message ici...",
661
+ lines=2,
662
+ show_label=False
663
+ )
664
+ with gr.Column(scale=1):
665
+ audio_input = gr.Audio(
666
+ sources=["microphone"],
667
+ type="filepath",
668
+ label="🎤 Audio",
669
+ show_label=True
670
+ )
671
+
672
+ with gr.Row():
673
+ with gr.Column(scale=1):
674
+ max_tokens = gr.Slider(
675
+ minimum=64,
676
+ maximum=2048,
677
+ value=512,
678
+ step=64,
679
+ label="Max tokens"
680
+ )
681
+ with gr.Column(scale=1):
682
+ send_btn = gr.Button("📤 Envoyer", variant="primary", size="lg")
683
+ with gr.Column(scale=1):
684
+ clear_btn = gr.Button("🗑️ Effacer", size="lg")
685
+
686
+ gr.Markdown("""
687
+ ---
688
+ 📜 **License:** Apache 2.0 (non-commercial) | Commercial: amewebstudio@gmail.com
689
+ """)
690
+
691
+ # Event handlers
692
+
693
+ # Text submit
694
+ msg.submit(
695
+ fn=respond,
696
+ inputs=[msg, chatbot, max_tokens],
697
+ outputs=[msg, chatbot]
698
+ )
699
+
700
+ # Button click
701
+ send_btn.click(
702
+ fn=respond_with_audio,
703
+ inputs=[msg, audio_input, chatbot, max_tokens],
704
+ outputs=[msg, audio_input, chatbot]
705
+ )
706
+
707
+ # Audio auto-send when recording stops
708
+ audio_input.stop_recording(
709
+ fn=respond_with_audio,
710
+ inputs=[msg, audio_input, chatbot, max_tokens],
711
+ outputs=[msg, audio_input, chatbot]
712
+ )
713
+
714
+ # Clear
715
+ clear_btn.click(
716
+ fn=lambda: ([], "", None),
717
+ inputs=None,
718
+ outputs=[chatbot, msg, audio_input]
719
+ )
720
 
721
+ # Launch
722
  if __name__ == "__main__":
723
+ demo.queue()
724
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)