Madras1 commited on
Commit
b501e0a
·
verified ·
1 Parent(s): f4a1499

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -39
app.py CHANGED
@@ -5,24 +5,26 @@ import os
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  from groq import Groq
7
 
8
- # --- 1. Configuração do Peso Pesado (H200 Local) ---
9
- # Esse roda na placa da Nvidia de graça, mas gasta sua cota.
10
  LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
11
  local_model = None
12
  local_tokenizer = None
13
 
14
- # --- 2. Configuração da Groq (Nuvem Rápida) ---
15
- # Esse roda na API deles, não gasta sua cota do ZeroGPU.
16
- # Pegue a chave em: https://console.groq.com/keys
17
- groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
 
 
18
 
19
- # --- Função ZeroGPU (Blindada com Cota Pequena) ---
20
- @spaces.GPU(duration=60) # 60s pra não queimar tudo de uma vez
21
  def run_local_h200(messages):
22
  global local_model, local_tokenizer
23
 
24
  if local_model is None:
25
- print(f"🐢 Cold Start: Carregando {LOCAL_MODEL_ID} na H200...")
26
  local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID)
27
  local_model = AutoModelForCausalLM.from_pretrained(
28
  LOCAL_MODEL_ID,
@@ -30,7 +32,6 @@ def run_local_h200(messages):
30
  device_map="cuda"
31
  )
32
 
33
- # Prepara o prompt
34
  text = local_tokenizer.apply_chat_template(
35
  messages,
36
  tokenize=False,
@@ -38,20 +39,20 @@ def run_local_h200(messages):
38
  )
39
  inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device)
40
 
41
- # Gera a resposta
42
  outputs = local_model.generate(
43
  **inputs,
44
- max_new_tokens=2048, # Aumentei pra caber código grande
45
  temperature=0.6,
46
  do_sample=True
47
  )
48
 
49
- response = local_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
50
- return response
51
 
52
- # --- Função Groq (Rápida e Grátis) ---
53
  def run_groq(messages, model_id):
54
- print(f"⚡ Roteando para Groq: {model_id}")
 
 
55
  try:
56
  completion = groq_client.chat.completions.create(
57
  model=model_id,
@@ -64,40 +65,33 @@ def run_groq(messages, model_id):
64
  )
65
  return completion.choices[0].message.content
66
  except Exception as e:
67
- return f"❌ Erro no Groq (Verifique a API Key): {str(e)}"
68
 
69
- # --- O CÉREBRO (Roteador) ---
70
  def router(message, history, model_selector):
71
- # Formata histórico
72
  messages = []
73
  for user_msg, bot_msg in history:
74
  if user_msg: messages.append({"role": "user", "content": user_msg})
75
  if bot_msg: messages.append({"role": "assistant", "content": bot_msg})
76
  messages.append({"role": "user", "content": message})
77
 
78
- # Mapa de Roteamento
79
  if "H200" in model_selector:
80
  return run_local_h200(messages)
81
-
82
- elif "llama-3.1-8b-instant" in model_selector:
 
83
  return run_groq(messages, "llama-3.1-8b-instant")
84
-
85
- elif "llama-3.3-70b-versatile" in model_selector:
86
- return run_groq(messages, "llama-3.3-70b-versatile")
87
-
88
- elif "openai/gpt-oss-120b" in model_selector:
89
- return run_groq(messages, "openai/gpt-oss-120b")
90
-
91
- elif "openai/gpt-oss-20b" in model_selector:
92
- return run_groq(messages, "openai/gpt-oss-20b")
93
-
94
  else:
95
- return "⚠️ Modelo não configurado no roteador."
96
 
97
- # --- Interface Gráfica ---
98
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
99
- gr.Markdown("# 🔀 APIDOST: O Hub do Gabriel")
100
- gr.Markdown("Selecione o cérebro que você quer usar.")
101
 
102
  with gr.Row():
103
  model_dropdown = gr.Dropdown(
@@ -109,11 +103,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
109
  "🔥 Local H200: Qwen 2.5 Coder 32B (Gasta Cota!)"
110
  ],
111
  value="☁️ Groq: Llama 3.1 70B (Inteligente & Rápido)",
112
- label="🤖 Escolha o Modelo",
113
  interactive=True
114
  )
115
 
116
- # A interface de chat conecta no roteador
117
  chat = gr.ChatInterface(
118
  fn=router,
119
  additional_inputs=[model_dropdown]
 
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  from groq import Groq
7
 
8
+ # --- Configurações ---
 
9
  LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
10
  local_model = None
11
  local_tokenizer = None
12
 
13
+ # Tenta pegar a chave, se não tiver, avisa (evita crash)
14
+ api_key = os.environ.get("GROQ_API_KEY")
15
+ if api_key:
16
+ groq_client = Groq(api_key=api_key)
17
+ else:
18
+ groq_client = None
19
+ print("⚠️ AVISO: GROQ_API_KEY não encontrada nos Secrets!")
20
 
21
+ # --- Função H200 (ZeroGPU) ---
22
+ @spaces.GPU(duration=60)
23
  def run_local_h200(messages):
24
  global local_model, local_tokenizer
25
 
26
  if local_model is None:
27
+ print(f"🐢 Cold Start: Carregando {LOCAL_MODEL_ID}...")
28
  local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID)
29
  local_model = AutoModelForCausalLM.from_pretrained(
30
  LOCAL_MODEL_ID,
 
32
  device_map="cuda"
33
  )
34
 
 
35
  text = local_tokenizer.apply_chat_template(
36
  messages,
37
  tokenize=False,
 
39
  )
40
  inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device)
41
 
 
42
  outputs = local_model.generate(
43
  **inputs,
44
+ max_new_tokens=2048,
45
  temperature=0.6,
46
  do_sample=True
47
  )
48
 
49
+ return local_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
 
50
 
51
+ # --- Função Groq ---
52
  def run_groq(messages, model_id):
53
+ if not groq_client:
54
+ return "❌ Erro: Configure a GROQ_API_KEY nos Settings do Space."
55
+
56
  try:
57
  completion = groq_client.chat.completions.create(
58
  model=model_id,
 
65
  )
66
  return completion.choices[0].message.content
67
  except Exception as e:
68
+ return f"❌ Erro no Groq: {str(e)}"
69
 
70
+ # --- Roteador ---
71
  def router(message, history, model_selector):
 
72
  messages = []
73
  for user_msg, bot_msg in history:
74
  if user_msg: messages.append({"role": "user", "content": user_msg})
75
  if bot_msg: messages.append({"role": "assistant", "content": bot_msg})
76
  messages.append({"role": "user", "content": message})
77
 
 
78
  if "H200" in model_selector:
79
  return run_local_h200(messages)
80
+ elif "Llama 3.1 70B" in model_selector:
81
+ return run_groq(messages, "llama-3.1-70b-versatile")
82
+ elif "Llama 3.1 8B" in model_selector:
83
  return run_groq(messages, "llama-3.1-8b-instant")
84
+ elif "Gemma 2 9B" in model_selector:
85
+ return run_groq(messages, "gemma2-9b-it")
86
+ elif "Mixtral" in model_selector:
87
+ return run_groq(messages, "mixtral-8x7b-32768")
 
 
 
 
 
 
88
  else:
89
+ return "⚠️ Modelo não encontrado."
90
 
91
+ # --- Interface (SIMPLIFICADA) ---
92
+ # Removi o theme=gr.themes.Soft() que estava quebrando
93
+ with gr.Blocks() as demo:
94
+ gr.Markdown("# 🔀 APIDOST Router")
95
 
96
  with gr.Row():
97
  model_dropdown = gr.Dropdown(
 
103
  "🔥 Local H200: Qwen 2.5 Coder 32B (Gasta Cota!)"
104
  ],
105
  value="☁️ Groq: Llama 3.1 70B (Inteligente & Rápido)",
106
+ label="Escolha o Modelo",
107
  interactive=True
108
  )
109
 
 
110
  chat = gr.ChatInterface(
111
  fn=router,
112
  additional_inputs=[model_dropdown]