Spaces:
Running
Running
Upload 2 files
Browse files- config.py +19 -0
- local_llm.py +35 -17
config.py
CHANGED
|
@@ -132,6 +132,8 @@ GROQ_MODEL: str = "llama-3.3-70b-versatile"
|
|
| 132 |
GROK_MODEL: str = "grok-beta"
|
| 133 |
COHERE_MODEL: str = "command-r-plus-08-2024"
|
| 134 |
TOGETHER_MODEL: str = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
|
|
|
|
|
|
| 135 |
|
| 136 |
# Modelo de embeddings (SentenceTransformers)
|
| 137 |
EMBEDDING_MODEL: str = "paraphrase-multilingual-MiniLM-L12-v2"
|
|
@@ -183,6 +185,23 @@ MODEL_PARAMETERS: Dict[str, Dict[str, Any]] = {
|
|
| 183 |
"frequency_penalty": 0.1,
|
| 184 |
"max_tokens": 4096
|
| 185 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
# 🧠 MISTRAL LUANA 8x7B (Especialista PT-AO)
|
| 188 |
# Arquitetura MoE (Mixture of Experts). Precisa de top_p alto.
|
|
|
|
| 132 |
GROK_MODEL: str = "grok-beta"
|
| 133 |
COHERE_MODEL: str = "command-r-plus-08-2024"
|
| 134 |
TOGETHER_MODEL: str = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
| 135 |
+
DEEPSEEK_MODEL: str = "deepseek-ai/DeepSeek-V3"
|
| 136 |
+
MISTRAL_MODEL_HF: str = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 137 |
|
| 138 |
# Modelo de embeddings (SentenceTransformers)
|
| 139 |
EMBEDDING_MODEL: str = "paraphrase-multilingual-MiniLM-L12-v2"
|
|
|
|
| 185 |
"frequency_penalty": 0.1,
|
| 186 |
"max_tokens": 4096
|
| 187 |
},
|
| 188 |
+
|
| 189 |
+
"deepseek-ai/DeepSeek-V3": {
|
| 190 |
+
"temperature": 0.6,
|
| 191 |
+
"top_p": 0.95,
|
| 192 |
+
"max_tokens": 4096,
|
| 193 |
+
"repetition_penalty": 1.1,
|
| 194 |
+
"presence_penalty": 0.0,
|
| 195 |
+
"frequency_penalty": 0.0
|
| 196 |
+
},
|
| 197 |
+
|
| 198 |
+
# 🌬️ MISTRAL 7B INSTRUCT V0.3 (Human / Fluid)
|
| 199 |
+
"mistralai/Mistral-7B-Instruct-v0.3": {
|
| 200 |
+
"temperature": 0.7,
|
| 201 |
+
"top_p": 0.9,
|
| 202 |
+
"repetition_penalty": 1.1,
|
| 203 |
+
"max_tokens": 4096
|
| 204 |
+
},
|
| 205 |
|
| 206 |
# 🧠 MISTRAL LUANA 8x7B (Especialista PT-AO)
|
| 207 |
# Arquitetura MoE (Mixture of Experts). Precisa de top_p alto.
|
local_llm.py
CHANGED
|
@@ -149,8 +149,10 @@ class LocalLLMFallback:
|
|
| 149 |
# Componentes do modelo
|
| 150 |
self._model = None # type: ignore
|
| 151 |
self._model_path: Optional[str] = None
|
| 152 |
-
self.
|
| 153 |
-
self.
|
|
|
|
|
|
|
| 154 |
self._multilingual_beast: Optional[str] = None
|
| 155 |
self._is_loaded = False
|
| 156 |
self._tokenizer = None # type: ignore
|
|
@@ -195,11 +197,20 @@ class LocalLLMFallback:
|
|
| 195 |
if hf_token:
|
| 196 |
self._is_hf_inference_mode = True
|
| 197 |
self._is_loaded = True
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
self._multilingual_beast = "Qwen/Qwen2.5-72B-Instruct"
|
|
|
|
|
|
|
| 203 |
self._stats["model_loaded"] = True
|
| 204 |
|
| 205 |
# Inicializa o cliente se possível
|
|
@@ -328,19 +339,25 @@ class LocalLLMFallback:
|
|
| 328 |
palavras = len(prompt.split())
|
| 329 |
is_very_short = palavras <= 5 and not needs_heavy
|
| 330 |
|
|
|
|
|
|
|
|
|
|
| 331 |
if needs_heavy and not is_very_short:
|
| 332 |
-
# MENSAGEM COMPLEXA/
|
| 333 |
-
candidate_models.extend([self.
|
| 334 |
elif needs_luna and not is_very_short:
|
| 335 |
-
# MENSAGEM CULTURAL
|
| 336 |
-
candidate_models.extend([self.
|
|
|
|
|
|
|
|
|
|
| 337 |
else:
|
| 338 |
-
#
|
| 339 |
-
if
|
| 340 |
-
candidate_models.extend([self.
|
| 341 |
else:
|
| 342 |
-
#
|
| 343 |
-
candidate_models.extend([
|
| 344 |
|
| 345 |
# Garantir apenas modelos únicos mantendo a ordem
|
| 346 |
seen = set()
|
|
@@ -400,8 +417,9 @@ class LocalLLMFallback:
|
|
| 400 |
err_json = resp.json()
|
| 401 |
if "not supported" in str(err_json).lower():
|
| 402 |
continue
|
| 403 |
-
|
| 404 |
-
|
|
|
|
| 405 |
except Exception:
|
| 406 |
continue
|
| 407 |
|
|
|
|
| 149 |
# Componentes do modelo
|
| 150 |
self._model = None # type: ignore
|
| 151 |
self._model_path: Optional[str] = None
|
| 152 |
+
self._deepseek_model: Optional[str] = None
|
| 153 |
+
self._mistral_model: Optional[str] = None
|
| 154 |
+
self._lexi_model: Optional[str] = None
|
| 155 |
+
self._luna_model: Optional[str] = None
|
| 156 |
self._multilingual_beast: Optional[str] = None
|
| 157 |
self._is_loaded = False
|
| 158 |
self._tokenizer = None # type: ignore
|
|
|
|
| 197 |
if hf_token:
|
| 198 |
self._is_hf_inference_mode = True
|
| 199 |
self._is_loaded = True
|
| 200 |
+
|
| 201 |
+
# Nova Hierarquia AKIRA V21 - Usando config se disponível
|
| 202 |
+
try:
|
| 203 |
+
self._deepseek_model = getattr(_cfgloc, 'DEEPSEEK_MODEL', "deepseek-ai/DeepSeek-V3")
|
| 204 |
+
self._mistral_model = getattr(_cfgloc, 'MISTRAL_MODEL_HF', "mistralai/Mistral-7B-Instruct-v0.3")
|
| 205 |
+
except:
|
| 206 |
+
self._deepseek_model = "deepseek-ai/DeepSeek-V3"
|
| 207 |
+
self._mistral_model = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 208 |
+
|
| 209 |
+
self._lexi_model = "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2"
|
| 210 |
+
self._luna_model = "rhaymison/Mistral-8x7b-Quantized-portuguese-luana"
|
| 211 |
self._multilingual_beast = "Qwen/Qwen2.5-72B-Instruct"
|
| 212 |
+
|
| 213 |
+
self._model_path = self._deepseek_model # Default principal
|
| 214 |
self._stats["model_loaded"] = True
|
| 215 |
|
| 216 |
# Inicializa o cliente se possível
|
|
|
|
| 339 |
palavras = len(prompt.split())
|
| 340 |
is_very_short = palavras <= 5 and not needs_heavy
|
| 341 |
|
| 342 |
+
# 0. DEFINIR HIERARQUIA ESTRETA
|
| 343 |
+
# 1. DeepSeek (Pesado/Padrão) -> 2. Mistral (Humano) -> 3. Lexi (Sem Censura) -> 4. Luna (Cultura)
|
| 344 |
+
|
| 345 |
if needs_heavy and not is_very_short:
|
| 346 |
+
# MENSAGEM COMPLEXA/LÓGICA: DeepSeek -> Mistral -> Lexi
|
| 347 |
+
candidate_models.extend([self._deepseek_model, self._mistral_model, self._lexi_model])
|
| 348 |
elif needs_luna and not is_very_short:
|
| 349 |
+
# MENSAGEM CULTURAL: Luna -> Mistral -> Lexi
|
| 350 |
+
candidate_models.extend([self._luna_model, self._mistral_model, self._lexi_model])
|
| 351 |
+
elif "humano" in prompt_lower or "conversa" in prompt_lower:
|
| 352 |
+
# MENSAGEM HUMANA: Mistral -> DeepSeek -> Lexi
|
| 353 |
+
candidate_models.extend([self._mistral_model, self._deepseek_model, self._lexi_model])
|
| 354 |
else:
|
| 355 |
+
# PADRÃO: DeepSeek como base se não for curto
|
| 356 |
+
if is_very_short:
|
| 357 |
+
candidate_models.extend([self._lexi_model, self._mistral_model])
|
| 358 |
else:
|
| 359 |
+
# Hierarquia padrão solicitada: DeepSeek > Mistral > Lexi > Luna
|
| 360 |
+
candidate_models.extend([self._deepseek_model, self._mistral_model, self._lexi_model, self._luna_model])
|
| 361 |
|
| 362 |
# Garantir apenas modelos únicos mantendo a ordem
|
| 363 |
seen = set()
|
|
|
|
| 417 |
err_json = resp.json()
|
| 418 |
if "not supported" in str(err_json).lower():
|
| 419 |
continue
|
| 420 |
+
logger.error(f"⚠️ Router '{provider}' HTTP 400: {err_json}")
|
| 421 |
+
except:
|
| 422 |
+
logger.error(f"⚠️ Router '{provider}' HTTP 400: {resp.text[:200]}")
|
| 423 |
except Exception:
|
| 424 |
continue
|
| 425 |
|