akra35567 commited on
Commit
2ed1b2f
·
verified ·
1 Parent(s): f2ed57f

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.py +19 -0
  2. local_llm.py +35 -17
config.py CHANGED
@@ -132,6 +132,8 @@ GROQ_MODEL: str = "llama-3.3-70b-versatile"
132
  GROK_MODEL: str = "grok-beta"
133
  COHERE_MODEL: str = "command-r-plus-08-2024"
134
  TOGETHER_MODEL: str = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
 
 
135
 
136
  # Modelo de embeddings (SentenceTransformers)
137
  EMBEDDING_MODEL: str = "paraphrase-multilingual-MiniLM-L12-v2"
@@ -183,6 +185,23 @@ MODEL_PARAMETERS: Dict[str, Dict[str, Any]] = {
183
  "frequency_penalty": 0.1,
184
  "max_tokens": 4096
185
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  # 🧠 MISTRAL LUANA 8x7B (Especialista PT-AO)
188
  # Arquitetura MoE (Mixture of Experts). Precisa de top_p alto.
 
132
  GROK_MODEL: str = "grok-beta"
133
  COHERE_MODEL: str = "command-r-plus-08-2024"
134
  TOGETHER_MODEL: str = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
135
+ DEEPSEEK_MODEL: str = "deepseek-ai/DeepSeek-V3"
136
+ MISTRAL_MODEL_HF: str = "mistralai/Mistral-7B-Instruct-v0.3"
137
 
138
  # Modelo de embeddings (SentenceTransformers)
139
  EMBEDDING_MODEL: str = "paraphrase-multilingual-MiniLM-L12-v2"
 
185
  "frequency_penalty": 0.1,
186
  "max_tokens": 4096
187
  },
188
+
189
+ "deepseek-ai/DeepSeek-V3": {
190
+ "temperature": 0.6,
191
+ "top_p": 0.95,
192
+ "max_tokens": 4096,
193
+ "repetition_penalty": 1.1,
194
+ "presence_penalty": 0.0,
195
+ "frequency_penalty": 0.0
196
+ },
197
+
198
+ # 🌬️ MISTRAL 7B INSTRUCT V0.3 (Human / Fluid)
199
+ "mistralai/Mistral-7B-Instruct-v0.3": {
200
+ "temperature": 0.7,
201
+ "top_p": 0.9,
202
+ "repetition_penalty": 1.1,
203
+ "max_tokens": 4096
204
+ },
205
 
206
  # 🧠 MISTRAL LUANA 8x7B (Especialista PT-AO)
207
  # Arquitetura MoE (Mixture of Experts). Precisa de top_p alto.
local_llm.py CHANGED
@@ -149,8 +149,10 @@ class LocalLLMFallback:
149
  # Componentes do modelo
150
  self._model = None # type: ignore
151
  self._model_path: Optional[str] = None
152
- self._heavy_model: Optional[str] = None
153
- self._portuguese_model: Optional[str] = None
 
 
154
  self._multilingual_beast: Optional[str] = None
155
  self._is_loaded = False
156
  self._tokenizer = None # type: ignore
@@ -195,11 +197,20 @@ class LocalLLMFallback:
195
  if hf_token:
196
  self._is_hf_inference_mode = True
197
  self._is_loaded = True
198
- # Prioridade absoluta: Modelo sem censura (Leve e Pesado)
199
- self._model_path = "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2"
200
- self._heavy_model = "huihui-ai/Qwen2.5-72B-Instruct-abliterated"
201
- self._portuguese_model = "rhaymison/Mistral-8x7b-Quantized-portuguese-luana"
 
 
 
 
 
 
 
202
  self._multilingual_beast = "Qwen/Qwen2.5-72B-Instruct"
 
 
203
  self._stats["model_loaded"] = True
204
 
205
  # Inicializa o cliente se possível
@@ -328,19 +339,25 @@ class LocalLLMFallback:
328
  palavras = len(prompt.split())
329
  is_very_short = palavras <= 5 and not needs_heavy
330
 
 
 
 
331
  if needs_heavy and not is_very_short:
332
- # MENSAGEM COMPLEXA/SEM CENSURA/DEBATE: Qwen -> Luna -> Lexi
333
- candidate_models.extend([self._heavy_model, self._portuguese_model, base_model])
334
  elif needs_luna and not is_very_short:
335
- # MENSAGEM CULTURAL/GÍRIA EM PT: Luna -> Lexi -> Qwen
336
- candidate_models.extend([self._portuguese_model, base_model, self._heavy_model])
 
 
 
337
  else:
338
- # MENSAGEM SIMPLES OU MUITO CURTA:
339
- if prefer_heavy and not is_very_short:
340
- candidate_models.extend([self._heavy_model, base_model, self._portuguese_model])
341
  else:
342
- # Força Lexi (leve) para "oi", "tudo bem"
343
- candidate_models.extend([base_model, self._portuguese_model, self._heavy_model])
344
 
345
  # Garantir apenas modelos únicos mantendo a ordem
346
  seen = set()
@@ -400,8 +417,9 @@ class LocalLLMFallback:
400
  err_json = resp.json()
401
  if "not supported" in str(err_json).lower():
402
  continue
403
- except: pass
404
- logger.error(f"⚠️ Router '{provider}': HTTP {resp.status_code}")
 
405
  except Exception:
406
  continue
407
 
 
149
  # Componentes do modelo
150
  self._model = None # type: ignore
151
  self._model_path: Optional[str] = None
152
+ self._deepseek_model: Optional[str] = None
153
+ self._mistral_model: Optional[str] = None
154
+ self._lexi_model: Optional[str] = None
155
+ self._luna_model: Optional[str] = None
156
  self._multilingual_beast: Optional[str] = None
157
  self._is_loaded = False
158
  self._tokenizer = None # type: ignore
 
197
  if hf_token:
198
  self._is_hf_inference_mode = True
199
  self._is_loaded = True
200
+
201
+ # Nova Hierarquia AKIRA V21 - Usando config se disponível
202
+ try:
203
+ self._deepseek_model = getattr(_cfgloc, 'DEEPSEEK_MODEL', "deepseek-ai/DeepSeek-V3")
204
+ self._mistral_model = getattr(_cfgloc, 'MISTRAL_MODEL_HF', "mistralai/Mistral-7B-Instruct-v0.3")
205
+ except:
206
+ self._deepseek_model = "deepseek-ai/DeepSeek-V3"
207
+ self._mistral_model = "mistralai/Mistral-7B-Instruct-v0.3"
208
+
209
+ self._lexi_model = "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2"
210
+ self._luna_model = "rhaymison/Mistral-8x7b-Quantized-portuguese-luana"
211
  self._multilingual_beast = "Qwen/Qwen2.5-72B-Instruct"
212
+
213
+ self._model_path = self._deepseek_model # Default principal
214
  self._stats["model_loaded"] = True
215
 
216
  # Inicializa o cliente se possível
 
339
  palavras = len(prompt.split())
340
  is_very_short = palavras <= 5 and not needs_heavy
341
 
342
+ # 0. DEFINIR HIERARQUIA ESTRETA
343
+ # 1. DeepSeek (Pesado/Padrão) -> 2. Mistral (Humano) -> 3. Lexi (Sem Censura) -> 4. Luna (Cultura)
344
+
345
  if needs_heavy and not is_very_short:
346
+ # MENSAGEM COMPLEXA/LÓGICA: DeepSeek -> Mistral -> Lexi
347
+ candidate_models.extend([self._deepseek_model, self._mistral_model, self._lexi_model])
348
  elif needs_luna and not is_very_short:
349
+ # MENSAGEM CULTURAL: Luna -> Mistral -> Lexi
350
+ candidate_models.extend([self._luna_model, self._mistral_model, self._lexi_model])
351
+ elif "humano" in prompt_lower or "conversa" in prompt_lower:
352
+ # MENSAGEM HUMANA: Mistral -> DeepSeek -> Lexi
353
+ candidate_models.extend([self._mistral_model, self._deepseek_model, self._lexi_model])
354
  else:
355
+ # PADRÃO: DeepSeek como base se não for curto
356
+ if is_very_short:
357
+ candidate_models.extend([self._lexi_model, self._mistral_model])
358
  else:
359
+ # Hierarquia padrão solicitada: DeepSeek > Mistral > Lexi > Luna
360
+ candidate_models.extend([self._deepseek_model, self._mistral_model, self._lexi_model, self._luna_model])
361
 
362
  # Garantir apenas modelos únicos mantendo a ordem
363
  seen = set()
 
417
  err_json = resp.json()
418
  if "not supported" in str(err_json).lower():
419
  continue
420
+ logger.error(f"⚠️ Router '{provider}' HTTP 400: {err_json}")
421
+ except:
422
+ logger.error(f"⚠️ Router '{provider}' HTTP 400: {resp.text[:200]}")
423
  except Exception:
424
  continue
425