Madras1 commited on
Commit
46f327b
·
verified ·
1 Parent(s): d874e41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -28
app.py CHANGED
@@ -4,15 +4,23 @@ import torch
4
  import os
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  from groq import Groq
 
7
 
8
  # --- CONFIGURAÇÕES ---
 
 
9
  LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
10
  local_model = None
11
  local_tokenizer = None
12
 
13
- # Cliente Groq
14
- api_key = os.environ.get("GROQ_API_KEY")
15
- groq_client = Groq(api_key=api_key) if api_key else None
 
 
 
 
 
16
 
17
  # --- FUNÇÃO 1: H200 (ZeroGPU - Cota Limitada) ---
18
  @spaces.GPU(duration=60)
@@ -39,70 +47,84 @@ def run_local_h200(messages):
39
  )
40
  return local_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
41
 
42
- # --- FUNÇÃO 2: GROQ (Nuvem - Rápido e Grátis) ---
43
  def run_groq(messages, model_id):
44
  if not groq_client:
45
- return "❌ Erro: Chave GROQ_API_KEY não configurada nos Secrets."
46
-
47
  try:
48
  completion = groq_client.chat.completions.create(
49
  model=model_id,
50
  messages=messages,
51
  temperature=0.7,
52
  max_tokens=4096,
53
- top_p=1,
54
- stream=False,
55
- stop=None,
56
  )
57
  return completion.choices[0].message.content
58
  except Exception as e:
59
- return f"❌ Erro na Groq: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- # --- ROTEADOR ---
62
  def router(message, history, model_selector):
63
- # Converte histórico
64
  messages = []
65
  for user_msg, bot_msg in history:
66
  if user_msg: messages.append({"role": "user", "content": user_msg})
67
  if bot_msg: messages.append({"role": "assistant", "content": bot_msg})
68
  messages.append({"role": "user", "content": message})
69
 
70
- # Lógica baseada no SEU PRINT da Groq
71
  if "H200" in model_selector:
72
  return run_local_h200(messages)
73
 
74
- # ATUALIZADO PARA LLAMA 3.3 (O novo do seu print)
75
- elif "Llama 3.3 70B" in model_selector:
76
  return run_groq(messages, "llama-3.3-70b-versatile")
77
-
78
  elif "Llama 3.1 8B" in model_selector:
79
  return run_groq(messages, "llama-3.1-8b-instant")
80
 
 
 
 
 
 
 
81
  else:
82
- return "⚠️ Modelo não reconhecido. Verifique o dropdown."
83
 
84
  # --- INTERFACE ---
85
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
86
- gr.Markdown("# 🔀 APIDOST Router V2")
87
 
88
  with gr.Row():
89
  model_dropdown = gr.Dropdown(
90
  choices=[
91
- "☁️ Groq: Llama 3.3 70B (Novo & Versátil)",
92
- "☁️ Groq: Llama 3.1 8B (Flash - Instantâneo)",
93
- "🔥 Local H200: Qwen 2.5 Coder 32B (Gasta Cota!)"
 
 
94
  ],
95
- value="☁️ Groq: Llama 3.3 70B (Novo & Versátil)",
96
  label="Escolha o Cérebro",
97
  interactive=True
98
  )
99
 
100
- # Isso cria automaticamente o endpoint /chat
101
- chat = gr.ChatInterface(
102
- fn=router,
103
- additional_inputs=[model_dropdown]
104
- )
105
 
106
  if __name__ == "__main__":
107
- # SEM parâmetros extras. Isso corrige o erro de inicialização.
108
  demo.launch()
 
4
  import os
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  from groq import Groq
7
+ from mistralai import Mistral # SDK Oficial Atualizado
8
 
9
  # --- CONFIGURAÇÕES ---
10
+
11
+ # 1. LOCAL (H200)
12
  LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
13
  local_model = None
14
  local_tokenizer = None
15
 
16
+ # 2. CLIENTE GROQ
17
+ groq_api_key = os.environ.get("GROQ_API_KEY")
18
+ groq_client = Groq(api_key=groq_api_key) if groq_api_key else None
19
+
20
+ # 3. CLIENTE MISTRAL (NOVO!)
21
+ mistral_api_key = os.environ.get("MISTRAL_API_KEY")
22
+ mistral_client = Mistral(api_key=mistral_api_key) if mistral_api_key else None
23
+
24
 
25
  # --- FUNÇÃO 1: H200 (ZeroGPU - Cota Limitada) ---
26
  @spaces.GPU(duration=60)
 
47
  )
48
  return local_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
49
 
50
+ # --- FUNÇÃO 2: GROQ (Nuvem) ---
51
  def run_groq(messages, model_id):
52
  if not groq_client:
53
+ return "❌ Erro: Chave GROQ_API_KEY não configurada."
 
54
  try:
55
  completion = groq_client.chat.completions.create(
56
  model=model_id,
57
  messages=messages,
58
  temperature=0.7,
59
  max_tokens=4096,
60
+ top_p=1, stream=False, stop=None,
 
 
61
  )
62
  return completion.choices[0].message.content
63
  except Exception as e:
64
+ return f"❌ Erro no Groq: {str(e)}"
65
+
66
+ # --- FUNÇÃO 3: MISTRAL (Nuvem - NOVO!) ---
67
+ def run_mistral(messages, model_id):
68
+ if not mistral_client:
69
+ return "❌ Erro: Chave MISTRAL_API_KEY não configurada."
70
+
71
+ print(f"🇫🇷 Chamando Mistral: {model_id}")
72
+ try:
73
+ # A nova SDK da Mistral usa essa sintaxe
74
+ chat_response = mistral_client.chat.complete(
75
+ model=model_id,
76
+ messages=messages,
77
+ )
78
+ return chat_response.choices[0].message.content
79
+ except Exception as e:
80
+ return f"❌ Erro na Mistral AI: {str(e)}"
81
 
82
+ # --- ROTEADOR CENTRAL ---
83
  def router(message, history, model_selector):
 
84
  messages = []
85
  for user_msg, bot_msg in history:
86
  if user_msg: messages.append({"role": "user", "content": user_msg})
87
  if bot_msg: messages.append({"role": "assistant", "content": bot_msg})
88
  messages.append({"role": "user", "content": message})
89
 
90
+ # --- SELEÇÃO ---
91
  if "H200" in model_selector:
92
  return run_local_h200(messages)
93
 
94
+ # Rota Groq
95
+ elif "Llama 3.3" in model_selector:
96
  return run_groq(messages, "llama-3.3-70b-versatile")
 
97
  elif "Llama 3.1 8B" in model_selector:
98
  return run_groq(messages, "llama-3.1-8b-instant")
99
 
100
+ # Rota Mistral (NOVO)
101
+ elif "Mistral Large" in model_selector:
102
+ return run_mistral(messages, "mistral-large-latest") # O monstro do seu print
103
+ elif "Codestral" in model_selector:
104
+ return run_mistral(messages, "codestral-latest") # Especialista em código
105
+
106
  else:
107
+ return "⚠️ Modelo não reconhecido."
108
 
109
  # --- INTERFACE ---
110
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
111
+ gr.Markdown("# 🔀 APIDOST Router V3: Trinity")
112
 
113
  with gr.Row():
114
  model_dropdown = gr.Dropdown(
115
  choices=[
116
+ "☁️ Groq: Llama 3.3 70B (Versátil)",
117
+ "☁️ Groq: Llama 3.1 8B (Flash)",
118
+ "🇫🇷 Mistral: Large 3 (SOTA - Inteligente)",
119
+ "🇫🇷 Mistral: Codestral (Especialista em Code)",
120
+ "🔥 Local H200: Qwen 2.5 Coder 32B (Cota ZeroGPU)"
121
  ],
122
+ value="☁️ Groq: Llama 3.3 70B (Versátil)",
123
  label="Escolha o Cérebro",
124
  interactive=True
125
  )
126
 
127
+ chat = gr.ChatInterface(fn=router, additional_inputs=[model_dropdown])
 
 
 
 
128
 
129
  if __name__ == "__main__":
 
130
  demo.launch()