Madras1 commited on
Commit
694a0f1
·
verified ·
1 Parent(s): d61ba2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -24
app.py CHANGED
@@ -8,12 +8,14 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
8
  from groq import Groq
9
  from mistralai import Mistral
10
  import google.generativeai as genai
 
11
 
12
  # --- CONFIGURAÇÕES ---
13
  LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
14
  local_model = None
15
  local_tokenizer = None
16
 
 
17
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) if os.environ.get("GROQ_API_KEY") else None
18
  mistral_client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY")) if os.environ.get("MISTRAL_API_KEY") else None
19
  if os.environ.get("GEMINI_API_KEY"):
@@ -27,25 +29,43 @@ def encode_image(image_path):
27
  except Exception:
28
  return None
29
 
 
 
 
 
 
 
 
 
 
30
  # --- BACKENDS ---
31
 
32
- @spaces.GPU(duration=60)
33
  def run_local_h200(messages):
34
  for m in messages:
35
  if isinstance(m['content'], list):
36
- return "⚠️ Qwen H200 não suporta imagens. Use Gemini ou Pixtral."
37
 
38
  global local_model, local_tokenizer
 
39
  if local_model is None:
40
- print(f"🐢 Carregando {LOCAL_MODEL_ID}...")
41
  local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID)
42
  local_model = AutoModelForCausalLM.from_pretrained(
43
- LOCAL_MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda"
 
 
44
  )
45
 
46
  text = local_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
47
  inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device)
48
- outputs = local_model.generate(**inputs, max_new_tokens=2048, temperature=0.6, do_sample=True)
 
 
 
 
 
 
49
  return local_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
50
 
51
  def run_groq(messages, model_id):
@@ -112,7 +132,8 @@ def run_gemini(messages, model_id):
112
  elif item.get('type') == 'image_url':
113
  path = item['image_url']['url']
114
  if os.path.exists(path): parts.append(Image.open(path))
115
- chat_history.append({"role": role, "parts": parts})
 
116
 
117
  last_msg = messages[-1]
118
  current_parts = []
@@ -130,17 +151,32 @@ def run_gemini(messages, model_id):
130
  return response.text
131
  except Exception as e: return f"❌ Gemini Error ({model_id}): {e}"
132
 
133
- # --- ROTEADOR ---
134
  def router(message, history, model_selector):
135
  formatted_history = []
136
 
137
- # Processa histórico
138
- for user_turn, bot_turn in history:
139
- u_text = str(user_turn)
140
- if isinstance(user_turn, tuple): u_text = user_turn[0]
141
- formatted_history.append({"role": "user", "content": u_text})
142
- if bot_turn:
143
- formatted_history.append({"role": "assistant", "content": str(bot_turn)})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  # Processa mensagem ATUAL
146
  current_content = []
@@ -161,9 +197,9 @@ def router(message, history, model_selector):
161
  # Roteamento
162
  if "Gemini" in model_selector:
163
  tid = "gemini-1.5-flash"
164
- if "3.0" in model_selector: tid = "gemini-3.0-pro-preview"
165
- elif "2.5 Pro" in model_selector: tid = "gemini-2.5-pro"
166
- elif "2.5 Flash" in model_selector: tid = "gemini-2.5-flash"
167
  elif "2.0 Flash" in model_selector: tid = "gemini-2.0-flash-exp"
168
  return run_gemini(formatted_history, tid)
169
 
@@ -183,30 +219,35 @@ def router(message, history, model_selector):
183
 
184
  return "Modelo desconhecido."
185
 
186
- # --- INTERFACE (SEM TEMA - CORREÇÃO DE VERDADE) ---
187
- with gr.Blocks() as demo: # <--- TEMA REMOVIDO AQUI
188
- gr.Markdown("# 🔀 APIDOST (Safe Mode)")
189
 
190
  with gr.Row():
191
  model_dropdown = gr.Dropdown(
192
  choices=[
193
  "✨ Google: Gemini 3.0 Pro (Experimental)",
 
194
  "✨ Google: Gemini 2.5 Flash",
 
195
  "☁️ Groq: Llama 3.3 70B",
196
  "🇫🇷 Mistral: Pixtral Large (Vision) 🖼️",
197
  "🇫🇷 Mistral: Large 2512 (Dez/25)",
198
- "🔥 Local H200: Qwen 2.5 Coder"
 
 
199
  ],
200
- value="🇫🇷 Mistral: Pixtral Large (Vision) 🖼️",
201
- label="Cérebro",
202
  interactive=True
203
  )
204
 
205
  chat = gr.ChatInterface(
206
  fn=router,
207
  additional_inputs=[model_dropdown],
208
- multimodal=True,
209
  )
210
 
211
  if __name__ == "__main__":
 
212
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
8
  from groq import Groq
9
  from mistralai import Mistral
10
  import google.generativeai as genai
11
+ from huggingface_hub import snapshot_download
12
 
13
  # --- CONFIGURAÇÕES ---
14
  LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
15
  local_model = None
16
  local_tokenizer = None
17
 
18
+ # Clientes de API
19
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) if os.environ.get("GROQ_API_KEY") else None
20
  mistral_client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY")) if os.environ.get("MISTRAL_API_KEY") else None
21
  if os.environ.get("GEMINI_API_KEY"):
 
29
  except Exception:
30
  return None
31
 
32
+ # --- FUNÇÃO DE DOWNLOAD PREVENTIVO ---
33
+ def download_local_model():
34
+ print(f"⏳ Berta: Baixando {LOCAL_MODEL_ID} para o cache...")
35
+ try:
36
+ snapshot_download(repo_id=LOCAL_MODEL_ID)
37
+ print("✅ Download concluído!")
38
+ except Exception as e:
39
+ print(f"⚠️ Aviso: Falha no pré-download: {e}")
40
+
41
  # --- BACKENDS ---
42
 
43
+ @spaces.GPU(duration=120)
44
  def run_local_h200(messages):
45
  for m in messages:
46
  if isinstance(m['content'], list):
47
+ return "⚠️ Qwen H200 (Local) não suporta imagens. Use Gemini ou Pixtral."
48
 
49
  global local_model, local_tokenizer
50
+
51
  if local_model is None:
52
+ print(f"🐢 Carregando {LOCAL_MODEL_ID} na VRAM H200...")
53
  local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID)
54
  local_model = AutoModelForCausalLM.from_pretrained(
55
+ LOCAL_MODEL_ID,
56
+ torch_dtype=torch.bfloat16,
57
+ device_map="cuda"
58
  )
59
 
60
  text = local_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
61
  inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device)
62
+
63
+ outputs = local_model.generate(
64
+ **inputs,
65
+ max_new_tokens=4096,
66
+ temperature=0.6,
67
+ do_sample=True
68
+ )
69
  return local_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
70
 
71
  def run_groq(messages, model_id):
 
132
  elif item.get('type') == 'image_url':
133
  path = item['image_url']['url']
134
  if os.path.exists(path): parts.append(Image.open(path))
135
+ if parts:
136
+ chat_history.append({"role": role, "parts": parts})
137
 
138
  last_msg = messages[-1]
139
  current_parts = []
 
151
  return response.text
152
  except Exception as e: return f"❌ Gemini Error ({model_id}): {e}"
153
 
154
+ # --- ROTEADOR (AGORA BLINDADO!) ---
155
  def router(message, history, model_selector):
156
  formatted_history = []
157
 
158
+ # --- BERTA FIX: Tratamento Universal de Histórico ---
159
+ # Isso resolve o erro "too many values to unpack"
160
+ for turn in history:
161
+ # CASO 1: Formato Antigo [[user, bot]]
162
+ if isinstance(turn, (list, tuple)) and len(turn) >= 2:
163
+ user_content = turn[0]
164
+ bot_content = turn[1]
165
+
166
+ # Extrai texto se for complexo
167
+ if isinstance(user_content, dict) and 'text' in user_content:
168
+ user_content = user_content['text']
169
+
170
+ formatted_history.append({"role": "user", "content": str(user_content)})
171
+ if bot_content:
172
+ formatted_history.append({"role": "assistant", "content": str(bot_content)})
173
+
174
+ # CASO 2: Formato Novo/Messages (Dicionário)
175
+ elif isinstance(turn, dict):
176
+ # Já está no formato certo, só copiamos
177
+ formatted_history.append(turn)
178
+
179
+ # --- FIM DO FIX ---
180
 
181
  # Processa mensagem ATUAL
182
  current_content = []
 
197
  # Roteamento
198
  if "Gemini" in model_selector:
199
  tid = "gemini-1.5-flash"
200
+ if "3.0" in model_selector: tid = "gemini-3.0-pro-preview"
201
+ elif "2.5 Pro" in model_selector: tid = "gemini-2.5-pro"
202
+ elif "2.5 Flash" in model_selector: tid = "gemini-2.5-flash"
203
  elif "2.0 Flash" in model_selector: tid = "gemini-2.0-flash-exp"
204
  return run_gemini(formatted_history, tid)
205
 
 
219
 
220
  return "Modelo desconhecido."
221
 
222
+ # --- INTERFACE ---
223
+ with gr.Blocks() as demo:
224
+ gr.Markdown("# 🔀 APIDOST (Robust Mode)")
225
 
226
  with gr.Row():
227
  model_dropdown = gr.Dropdown(
228
  choices=[
229
  "✨ Google: Gemini 3.0 Pro (Experimental)",
230
+ "✨ Google: Gemini 2.5 Pro",
231
  "✨ Google: Gemini 2.5 Flash",
232
+ "✨ Google: Gemini 2.0 Flash",
233
  "☁️ Groq: Llama 3.3 70B",
234
  "🇫🇷 Mistral: Pixtral Large (Vision) 🖼️",
235
  "🇫🇷 Mistral: Large 2512 (Dez/25)",
236
+ "🇫🇷 Mistral: Magistral Medium",
237
+ "🇫🇷 Mistral: Codestral 2508",
238
+ "🔥 Local H200: Qwen 2.5 Coder 32B"
239
  ],
240
+ value="🔥 Local H200: Qwen 2.5 Coder 32B",
241
+ label="Cérebro Escolhido",
242
  interactive=True
243
  )
244
 
245
  chat = gr.ChatInterface(
246
  fn=router,
247
  additional_inputs=[model_dropdown],
248
+ multimodal=True,
249
  )
250
 
251
  if __name__ == "__main__":
252
+ download_local_model()
253
  demo.launch(server_name="0.0.0.0", server_port=7860)