Madras1 commited on
Commit
7c69d42
·
verified ·
1 Parent(s): 3d6a4cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -20
app.py CHANGED
@@ -10,16 +10,15 @@ LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
10
  local_model = None
11
  local_tokenizer = None
12
 
13
- # Cliente Groq (LPU Nuvem)
14
  api_key = os.environ.get("GROQ_API_KEY")
15
  groq_client = Groq(api_key=api_key) if api_key else None
16
 
17
- # --- FUNÇÃO 1: H200 (ZeroGPU) ---
18
  @spaces.GPU(duration=60)
19
  def run_local_h200(messages):
20
  global local_model, local_tokenizer
21
 
22
- # Se a cota acabou, isso aqui vai dar erro "GPU task aborted"
23
  if local_model is None:
24
  print(f"🐢 Cold Start: Carregando {LOCAL_MODEL_ID}...")
25
  local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID)
@@ -31,14 +30,20 @@ def run_local_h200(messages):
31
 
32
  text = local_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
33
  inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device)
34
- outputs = local_model.generate(**inputs, max_new_tokens=2048, temperature=0.6, do_sample=True)
 
 
 
 
 
 
35
  return local_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
36
 
37
- # --- FUNÇÃO 2: GROQ (Nuvem - SALVA VIDAS) ---
38
  def run_groq(messages, model_id):
39
  if not groq_client:
40
- return "❌ Erro: Chave GROQ_API_KEY não configurada. Adicione nos Secrets."
41
-
42
  try:
43
  completion = groq_client.chat.completions.create(
44
  model=model_id,
@@ -51,42 +56,53 @@ def run_groq(messages, model_id):
51
  )
52
  return completion.choices[0].message.content
53
  except Exception as e:
54
- return f"❌ Erro no Groq: {str(e)}"
55
 
56
  # --- ROTEADOR ---
57
  def router(message, history, model_selector):
 
58
  messages = []
59
  for user_msg, bot_msg in history:
60
  if user_msg: messages.append({"role": "user", "content": user_msg})
61
  if bot_msg: messages.append({"role": "assistant", "content": bot_msg})
62
  messages.append({"role": "user", "content": message})
63
 
 
64
  if "H200" in model_selector:
65
  return run_local_h200(messages)
66
- elif "openai/gpt-oss-120b" in model_selector:
67
- return run_groq(messages, "openai/gpt-oss-120b")
 
 
 
68
  elif "Llama 3.1 8B" in model_selector:
69
  return run_groq(messages, "llama-3.1-8b-instant")
70
- elif "openai/gpt-oss-20b" in model_selector:
71
- return run_groq(messages, "openai/gpt-oss-20b")
72
  else:
73
- return "⚠️ Modelo não reconhecido."
74
 
75
  # --- INTERFACE ---
76
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
77
- gr.Markdown("# 🔀 APIDOST Router")
 
78
  with gr.Row():
79
  model_dropdown = gr.Dropdown(
80
  choices=[
81
- "☁️ Groq: Llama 3.1 70B (Rápido)",
82
- "☁️ Groq: Llama 3.1 8B (Flash)",
83
  "🔥 Local H200: Qwen 2.5 Coder 32B (Gasta Cota!)"
84
  ],
85
- value="☁️ Groq: Llama 3.1 70B (Rápido)",
86
- label="Escolha o Cérebro"
 
87
  )
88
- chat = gr.ChatInterface(fn=router, additional_inputs=[model_dropdown])
 
 
 
 
 
89
 
90
  if __name__ == "__main__":
91
- # SEM ARGUMENTOS EXTRAS! Deixa o HF lidar com o CORS.
92
  demo.launch()
 
10
  local_model = None
11
  local_tokenizer = None
12
 
13
+ # Cliente Groq
14
  api_key = os.environ.get("GROQ_API_KEY")
15
  groq_client = Groq(api_key=api_key) if api_key else None
16
 
17
+ # --- FUNÇÃO 1: H200 (ZeroGPU - Cota Limitada) ---
18
  @spaces.GPU(duration=60)
19
  def run_local_h200(messages):
20
  global local_model, local_tokenizer
21
 
 
22
  if local_model is None:
23
  print(f"🐢 Cold Start: Carregando {LOCAL_MODEL_ID}...")
24
  local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID)
 
30
 
31
  text = local_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
32
  inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device)
33
+
34
+ outputs = local_model.generate(
35
+ **inputs,
36
+ max_new_tokens=2048,
37
+ temperature=0.6,
38
+ do_sample=True
39
+ )
40
  return local_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
41
 
42
+ # --- FUNÇÃO 2: GROQ (Nuvem - Rápido e Grátis) ---
43
  def run_groq(messages, model_id):
44
  if not groq_client:
45
+ return "❌ Erro: Chave GROQ_API_KEY não configurada nos Secrets."
46
+
47
  try:
48
  completion = groq_client.chat.completions.create(
49
  model=model_id,
 
56
  )
57
  return completion.choices[0].message.content
58
  except Exception as e:
59
+ return f"❌ Erro na Groq: {str(e)}"
60
 
61
  # --- ROTEADOR ---
62
  def router(message, history, model_selector):
63
+ # Converte histórico
64
  messages = []
65
  for user_msg, bot_msg in history:
66
  if user_msg: messages.append({"role": "user", "content": user_msg})
67
  if bot_msg: messages.append({"role": "assistant", "content": bot_msg})
68
  messages.append({"role": "user", "content": message})
69
 
70
+ # Lógica baseada no SEU PRINT da Groq
71
  if "H200" in model_selector:
72
  return run_local_h200(messages)
73
+
74
+ # ATUALIZADO PARA LLAMA 3.3 (O novo do seu print)
75
+ elif "Llama 3.3 70B" in model_selector:
76
+ return run_groq(messages, "llama-3.3-70b-versatile")
77
+
78
  elif "Llama 3.1 8B" in model_selector:
79
  return run_groq(messages, "llama-3.1-8b-instant")
80
+
 
81
  else:
82
+ return "⚠️ Modelo não reconhecido. Verifique o dropdown."
83
 
84
  # --- INTERFACE ---
85
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
86
+ gr.Markdown("# 🔀 APIDOST Router V2")
87
+
88
  with gr.Row():
89
  model_dropdown = gr.Dropdown(
90
  choices=[
91
+ "☁️ Groq: Llama 3.3 70B (Novo & Versátil)",
92
+ "☁️ Groq: Llama 3.1 8B (Flash - Instantâneo)",
93
  "🔥 Local H200: Qwen 2.5 Coder 32B (Gasta Cota!)"
94
  ],
95
+ value="☁️ Groq: Llama 3.3 70B (Novo & Versátil)",
96
+ label="Escolha o Cérebro",
97
+ interactive=True
98
  )
99
+
100
+ # Isso cria automaticamente o endpoint /chat
101
+ chat = gr.ChatInterface(
102
+ fn=router,
103
+ additional_inputs=[model_dropdown]
104
+ )
105
 
106
  if __name__ == "__main__":
107
+ # SEM parâmetros extras. Isso corrige o erro de inicialização.
108
  demo.launch()