Madras1 commited on
Commit
33141da
·
verified ·
1 Parent(s): ec5d8d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -119
app.py CHANGED
@@ -4,64 +4,72 @@ import torch
4
  import os
5
  import time
6
  import base64
7
- from collections import defaultdict
8
- from PIL import Image
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
10
  from groq import Groq
11
  from mistralai import Mistral
12
  import google.generativeai as genai
13
- from huggingface_hub import snapshot_download
14
 
15
- # --- SEGURANÇA: RATE LIMITER ---
 
16
  MAX_REQUESTS_PER_MINUTE = 15
17
  BLOCK_TIME_SECONDS = 60
18
- ip_access_log = defaultdict(list)
19
 
20
- def verify_rate_limit(request: gr.Request):
21
- if not request: return True
22
  client_ip = request.client.host
23
- current_time = time.time()
24
- ip_access_log[client_ip] = [t for t in ip_access_log[client_ip] if current_time - t < BLOCK_TIME_SECONDS]
25
- if len(ip_access_log[client_ip]) >= MAX_REQUESTS_PER_MINUTE:
26
- print(f"⛔ BLOQUEIO: IP {client_ip} barrado.")
 
 
 
 
27
  return False
28
- ip_access_log[client_ip].append(current_time)
 
 
 
29
  return True
30
 
31
- # --- LOCAL: QWEN CODER H200 ---
 
 
32
  LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
33
  local_model = None
34
  local_tokenizer = None
35
 
36
- # Clientes API
37
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) if os.environ.get("GROQ_API_KEY") else None
38
  mistral_client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY")) if os.environ.get("MISTRAL_API_KEY") else None
39
  if os.environ.get("GEMINI_API_KEY"):
40
  genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
41
 
42
- # --- HELPER IMAGEM ---
43
  def encode_image(image_path):
44
  try:
45
  with open(image_path, "rb") as image_file:
46
  return base64.b64encode(image_file.read()).decode('utf-8')
47
- except Exception: return None
48
 
49
- # --- DOWNLOADER ---
50
- def download_local_model():
51
- print(f"⏳ Cache: Verificando {LOCAL_MODEL_ID}...")
52
- try: snapshot_download(repo_id=LOCAL_MODEL_ID)
53
- except Exception as e: print(f"⚠️ Aviso: {e}")
54
-
55
- # --- BACKENDS ---
56
 
57
- @spaces.GPU(duration=120)
58
  def run_local_h200(messages):
59
- for m in messages:
60
- if isinstance(m['content'], list): return "⚠️ Qwen Local não lê imagens. Use Gemini/Pixtral."
61
  global local_model, local_tokenizer
 
 
 
 
 
62
  if local_model is None:
 
63
  local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID)
64
- local_model = AutoModelForCausalLM.from_pretrained(LOCAL_MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda")
 
 
65
 
66
  text = local_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
67
  inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device)
@@ -70,8 +78,10 @@ def run_local_h200(messages):
70
 
71
  def run_groq(messages, model_id):
72
  for m in messages:
73
- if isinstance(m['content'], list): return "⚠️ Groq não imagens. Use Gemini/Pixtral."
74
- if not groq_client: return "❌ Erro: API Key Groq ausente."
 
 
75
  clean_msgs = [{"role": m['role'], "content": m['content']} for m in messages]
76
  try:
77
  completion = groq_client.chat.completions.create(
@@ -81,20 +91,23 @@ def run_groq(messages, model_id):
81
  except Exception as e: return f"❌ Groq Error: {e}"
82
 
83
  def run_mistral(messages, model_id):
84
- if not mistral_client: return "❌ Erro: API Key Mistral ausente."
 
 
85
  formatted_msgs = []
86
  for m in messages:
 
87
  new_content = []
88
- if isinstance(m['content'], str): new_content = m['content']
89
- elif isinstance(m['content'], list):
90
- for item in m['content']:
91
- if item.get('type') == 'text': new_content.append({"type": "text", "text": item['text']})
92
  elif item.get('type') == 'image_url':
93
  url = item['image_url']['url']
94
  if not url.startswith("data:") and os.path.exists(url):
95
  b64 = encode_image(url)
96
  new_content.append({"type": "image_url", "image_url": f"data:image/jpeg;base64,{b64}"})
97
- else: new_content.append({"type": "image_url", "image_url": url})
98
  formatted_msgs.append({"role": m['role'], "content": new_content})
99
 
100
  try:
@@ -103,130 +116,145 @@ def run_mistral(messages, model_id):
103
  except Exception as e: return f"❌ Mistral Error: {e}"
104
 
105
  def run_gemini(messages, model_id):
106
- if not os.environ.get("GEMINI_API_KEY"): return "❌ Erro: API Key Gemini ausente."
107
  try:
108
  model = genai.GenerativeModel(model_id)
109
  chat_history = []
 
 
110
  for m in messages[:-1]:
111
  role = "user" if m['role'] == "user" else "model"
112
  parts = []
113
- if isinstance(m['content'], str): parts.append(m['content'])
114
- elif isinstance(m['content'], list):
115
- for item in m['content']:
 
116
  if item.get('type') == 'text': parts.append(item['text'])
117
  elif item.get('type') == 'image_url':
118
  path = item['image_url']['url']
119
  if os.path.exists(path): parts.append(Image.open(path))
120
  if parts: chat_history.append({"role": role, "parts": parts})
121
-
122
- last_msg = messages[-1]['content']
123
- current_parts = []
124
- if isinstance(last_msg, str): current_parts.append(last_msg)
125
- elif isinstance(last_msg, list):
126
- for item in last_msg:
127
- if item.get('type') == 'text': current_parts.append(item['text'])
 
128
  elif item.get('type') == 'image_url':
129
  path = item['image_url']['url']
130
- if os.path.exists(path): current_parts.append(Image.open(path))
131
-
132
  chat = model.start_chat(history=chat_history)
133
- response = chat.send_message(current_parts)
134
  return response.text
135
- except Exception as e: return f"❌ Gemini Error ({model_id}): {e}"
136
 
137
- # --- ROTEADOR ---
138
  def router(message, history, model_selector, request: gr.Request):
139
- if not verify_rate_limit(request):
140
- return f"⛔ LIMITADO: Aguarde para enviar mais mensagens."
 
141
 
142
- formatted_history = []
 
143
  if history:
144
  for turn in history:
145
- if isinstance(turn, dict): formatted_history.append(turn)
146
- elif isinstance(turn, (list, tuple)) and len(turn) >= 2:
147
- u = turn[0]['text'] if isinstance(turn[0], dict) and 'text' in turn[0] else str(turn[0])
148
- b = str(turn[1]) if turn[1] else ""
149
- formatted_history.append({"role": "user", "content": u})
150
- if b: formatted_history.append({"role": "assistant", "content": b})
151
-
 
 
 
 
 
 
152
  current_content = []
153
- text = message.get("text", "")
154
- files = message.get("files", [])
155
- if text: current_content.append({"type": "text", "text": text})
156
- for f in files: current_content.append({"type": "image_url", "image_url": {"url": f}})
157
-
158
- if not files: formatted_history.append({"role": "user", "content": text})
159
- else: formatted_history.append({"role": "user", "content": current_content})
 
 
 
160
 
161
- # SELEÇÃO (IDs CORRIGIDOS)
 
 
 
162
  if "Gemini" in model_selector:
163
  tid = "gemini-1.5-flash"
164
- if "3.0" in model_selector: tid = "gemini-3.0-pro-preview"
165
- elif "2.5 Pro" in model_selector: tid = "gemini-2.5-pro"
166
- elif "2.5 Flash" in model_selector: tid = "gemini-2.5-flash"
167
- elif "2.0 Flash" in model_selector: tid = "gemini-2.0-flash-exp"
168
- return run_gemini(formatted_history, tid)
169
 
 
170
  elif "Mistral" in model_selector:
171
  tid = "mistral-large-latest"
172
  if "Pixtral" in model_selector: tid = "pixtral-large-latest"
173
- elif "2509" in model_selector: tid = "magistral-medium-2509"
174
  elif "2512" in model_selector: tid = "mistral-large-2512"
175
  elif "Codestral" in model_selector: tid = "codestral-2508"
176
- return run_mistral(formatted_history, tid)
177
 
 
178
  elif "Groq" in model_selector:
179
- if "120B" in model_selector: tid = "openai/gpt-oss-120b"
180
- elif "20B" in model_selector: tid = "openai/gpt-oss-20b"
181
- else: tid = "llama-3.3-70b-versatile"
182
- return run_groq(formatted_history, tid)
183
 
 
184
  elif "H200" in model_selector:
185
- return run_local_h200(formatted_history)
186
 
187
- return "Modelo desconhecido."
188
 
189
- # --- INTERFACE ---
190
- with gr.Blocks() as demo:
191
- gr.Markdown("# 🔀 APIDOST v7 (Stable)")
192
 
193
- models_list = [
194
- "✨ Google: Gemini 3.0 Pro (Experimental)",
195
- "✨ Google: Gemini 2.5 Pro",
196
- "✨ Google: Gemini 2.5 Flash",
197
- "✨ Google: Gemini 2.0 Flash",
198
- "☁️ Groq: GPT OSS 120B (OpenAI) 🆕",
199
- "☁️ Groq: GPT OSS 20B (OpenAI) 🆕",
200
- "☁️ Groq: Llama 3.3 70B",
201
- "🇫🇷 Mistral: Magistral Medium 2509 🆕",
202
- "🇫🇷 Mistral: Pixtral Large (Vision) 🖼️",
203
- "🇫🇷 Mistral: Large 2512 (Dez/25)",
204
- "🇫🇷 Mistral: Codestral 2508",
205
- "🔥 Local H200: Qwen 2.5 Coder 32B"
206
- ]
207
-
208
  with gr.Row():
209
- model_dropdown = gr.Dropdown(choices=models_list, value=models_list[-1], label="Cérebro", interactive=True)
210
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  chat = gr.ChatInterface(
212
- fn=router,
213
  additional_inputs=[model_dropdown],
214
- multimodal=True,
215
- )
216
-
217
- # CORREÇÃO FINAL AQUI:
218
- # Substituí 'gr.State' por 'gr.JSON' para não exigir retorno de estado.
219
- api_bridge = gr.Interface(
220
- fn=router,
221
- inputs=[
222
- gr.MultimodalTextbox(label="message"),
223
- gr.JSON(value=[], label="history"), # <--- MUDANÇA: JSON não trava o output
224
- gr.Dropdown(choices=models_list, label="model_selector")
225
- ],
226
- outputs=[gr.Textbox(label="response")],
227
- api_name="chat"
228
  )
229
 
230
  if __name__ == "__main__":
231
- download_local_model()
232
- demo.queue(api_open=True).launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
4
  import os
5
  import time
6
  import base64
 
 
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
  from groq import Groq
9
  from mistralai import Mistral
10
  import google.generativeai as genai
11
+ from huggingface_hub import snapshot_download
12
 
13
+ # --- 1. SEGURANÇA (RATE LIMIT) ---
14
+ # Simples e eficiente: bloqueia spammer sem quebrar o app.
15
  MAX_REQUESTS_PER_MINUTE = 15
16
  BLOCK_TIME_SECONDS = 60
17
+ ip_tracker = {}
18
 
19
+ def check_spam(request: gr.Request):
20
+ if not request: return True # Local run
21
  client_ip = request.client.host
22
+ now = time.time()
23
+
24
+ # Limpa histórico antigo do IP
25
+ if client_ip in ip_tracker:
26
+ ip_tracker[client_ip] = [t for t in ip_tracker[client_ip] if now - t < BLOCK_TIME_SECONDS]
27
+
28
+ # Verifica bloqueio
29
+ if client_ip in ip_tracker and len(ip_tracker[client_ip]) >= MAX_REQUESTS_PER_MINUTE:
30
  return False
31
+
32
+ # Registra
33
+ if client_ip not in ip_tracker: ip_tracker[client_ip] = []
34
+ ip_tracker[client_ip].append(now)
35
  return True
36
 
37
+ # --- 2. CONFIGURAÇÕES GLOBAIS ---
38
+
39
+ # LOCAL (H200 - ZeroGPU)
40
  LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
41
  local_model = None
42
  local_tokenizer = None
43
 
44
+ # CLIENTES API
45
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) if os.environ.get("GROQ_API_KEY") else None
46
  mistral_client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY")) if os.environ.get("MISTRAL_API_KEY") else None
47
  if os.environ.get("GEMINI_API_KEY"):
48
  genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
49
 
50
+ # --- 3. HELPER (IMAGEM) ---
51
  def encode_image(image_path):
52
  try:
53
  with open(image_path, "rb") as image_file:
54
  return base64.b64encode(image_file.read()).decode('utf-8')
55
+ except: return None
56
 
57
+ # --- 4. FUNÇÕES DE EXECUÇÃO ---
 
 
 
 
 
 
58
 
59
+ @spaces.GPU(duration=120)
60
  def run_local_h200(messages):
 
 
61
  global local_model, local_tokenizer
62
+
63
+ # Validação rápida de imagem
64
+ for m in messages:
65
+ if isinstance(m['content'], list): return "⚠️ Modelo Local não suporta imagens. Use Gemini/Pixtral."
66
+
67
  if local_model is None:
68
+ print(f"🐢 Carregando {LOCAL_MODEL_ID}...")
69
  local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID)
70
+ local_model = AutoModelForCausalLM.from_pretrained(
71
+ LOCAL_MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda"
72
+ )
73
 
74
  text = local_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
75
  inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device)
 
78
 
79
  def run_groq(messages, model_id):
80
  for m in messages:
81
+ if isinstance(m['content'], list): return "⚠️ Groq não suporta imagens. Use Gemini/Pixtral."
82
+ if not groq_client: return "❌ Erro: GROQ_API_KEY ausente."
83
+
84
+ # Limpa formato para Groq
85
  clean_msgs = [{"role": m['role'], "content": m['content']} for m in messages]
86
  try:
87
  completion = groq_client.chat.completions.create(
 
91
  except Exception as e: return f"❌ Groq Error: {e}"
92
 
93
  def run_mistral(messages, model_id):
94
+ if not mistral_client: return "❌ Erro: MISTRAL_API_KEY ausente."
95
+
96
+ # Formata imagens para Mistral
97
  formatted_msgs = []
98
  for m in messages:
99
+ content = m['content']
100
  new_content = []
101
+ if isinstance(content, str): new_content = content
102
+ elif isinstance(content, list):
103
+ for item in content:
104
+ if item.get('type') == 'text': new_content.append(item)
105
  elif item.get('type') == 'image_url':
106
  url = item['image_url']['url']
107
  if not url.startswith("data:") and os.path.exists(url):
108
  b64 = encode_image(url)
109
  new_content.append({"type": "image_url", "image_url": f"data:image/jpeg;base64,{b64}"})
110
+ else: new_content.append(item)
111
  formatted_msgs.append({"role": m['role'], "content": new_content})
112
 
113
  try:
 
116
  except Exception as e: return f"❌ Mistral Error: {e}"
117
 
118
  def run_gemini(messages, model_id):
119
+ if not os.environ.get("GEMINI_API_KEY"): return "❌ Erro: GEMINI_API_KEY ausente."
120
  try:
121
  model = genai.GenerativeModel(model_id)
122
  chat_history = []
123
+
124
+ # Converte histórico para Gemini
125
  for m in messages[:-1]:
126
  role = "user" if m['role'] == "user" else "model"
127
  parts = []
128
+ c = m['content']
129
+ if isinstance(c, str): parts.append(c)
130
+ elif isinstance(c, list):
131
+ for item in c:
132
  if item.get('type') == 'text': parts.append(item['text'])
133
  elif item.get('type') == 'image_url':
134
  path = item['image_url']['url']
135
  if os.path.exists(path): parts.append(Image.open(path))
136
  if parts: chat_history.append({"role": role, "parts": parts})
137
+
138
+ # Última mensagem
139
+ last_parts = []
140
+ lc = messages[-1]['content']
141
+ if isinstance(lc, str): last_parts.append(lc)
142
+ elif isinstance(lc, list):
143
+ for item in lc:
144
+ if item.get('type') == 'text': last_parts.append(item['text'])
145
  elif item.get('type') == 'image_url':
146
  path = item['image_url']['url']
147
+ if os.path.exists(path): last_parts.append(Image.open(path))
148
+
149
  chat = model.start_chat(history=chat_history)
150
+ response = chat.send_message(last_parts)
151
  return response.text
152
+ except Exception as e: return f"❌ Gemini Error: {e}"
153
 
154
+ # --- 5. ROTEADOR CENTRAL ---
155
  def router(message, history, model_selector, request: gr.Request):
156
+ # Check Spam
157
+ if not check_spam(request):
158
+ return "⛔ BLOQUEADO: Limite de mensagens excedido. Aguarde."
159
 
160
+ # Prepara Histórico (Blindado contra formatos variados do Gradio)
161
+ messages = []
162
  if history:
163
  for turn in history:
164
+ # Formato antigo [user, bot]
165
+ if isinstance(turn, (list, tuple)):
166
+ u_text = turn[0]
167
+ if isinstance(u_text, dict) and 'text' in u_text: u_text = u_text['text'] # Extrai texto se for dict
168
+
169
+ messages.append({"role": "user", "content": str(u_text)})
170
+ if len(turn) > 1 and turn[1]:
171
+ messages.append({"role": "assistant", "content": str(turn[1])})
172
+ # Formato novo {role: user...}
173
+ elif isinstance(turn, dict):
174
+ messages.append(turn)
175
+
176
+ # Prepara Mensagem Atual (Multimodal ou Texto)
177
  current_content = []
178
+ if isinstance(message, dict): # Multimodal
179
+ text = message.get("text", "")
180
+ files = message.get("files", [])
181
+ if text: current_content.append({"type": "text", "text": text})
182
+ for f in files: current_content.append({"type": "image_url", "image_url": {"url": f}})
183
+
184
+ if not files: messages.append({"role": "user", "content": text})
185
+ else: messages.append({"role": "user", "content": current_content})
186
+ else: # Texto puro
187
+ messages.append({"role": "user", "content": str(message)})
188
 
189
+ # Roteamento
190
+ print(f"🔀 Roteando para: {model_selector}")
191
+
192
+ # GEMINI
193
  if "Gemini" in model_selector:
194
  tid = "gemini-1.5-flash"
195
+ if "3.0" in model_selector: tid = "gemini-3.0-pro-preview"
196
+ elif "2.5 Pro" in model_selector: tid = "gemini-2.5-pro"
197
+ elif "2.5 Flash" in model_selector: tid = "gemini-2.5-flash"
198
+ elif "2.0" in model_selector: tid = "gemini-2.0-flash-exp"
199
+ return run_gemini(messages, tid)
200
 
201
+ # MISTRAL
202
  elif "Mistral" in model_selector:
203
  tid = "mistral-large-latest"
204
  if "Pixtral" in model_selector: tid = "pixtral-large-latest"
205
+ elif "2509" in model_selector: tid = "magistral-medium-2509" # <--- Seu Magistral VIP
206
  elif "2512" in model_selector: tid = "mistral-large-2512"
207
  elif "Codestral" in model_selector: tid = "codestral-2508"
208
+ return run_mistral(messages, tid)
209
 
210
+ # GROQ
211
  elif "Groq" in model_selector:
212
+ tid = "llama-3.3-70b-versatile"
213
+ if "120B" in model_selector: tid = "openai/gpt-oss-120b" # <--- GPT OSS 120B
214
+ elif "20B" in model_selector: tid = "openai/gpt-oss-20b" # <--- GPT OSS 20B
215
+ return run_groq(messages, tid)
216
 
217
+ # LOCAL
218
  elif "H200" in model_selector:
219
+ return run_local_h200(messages)
220
 
221
+ return "⚠️ Modelo não reconhecido."
222
 
223
+ # --- 6. INTERFACE ---
224
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
225
+ gr.Markdown("# 🔀 APIDOST v8: The Arsenal")
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  with gr.Row():
228
+ model_dropdown = gr.Dropdown(
229
+ choices=[
230
+ "✨ Google: Gemini 3.0 Pro (Experimental)",
231
+ "✨ Google: Gemini 2.5 Pro",
232
+ "✨ Google: Gemini 2.5 Flash",
233
+ "✨ Google: Gemini 2.0 Flash",
234
+ "☁️ Groq: GPT OSS 120B (OpenAI) 🆕",
235
+ "☁️ Groq: GPT OSS 20B (OpenAI) 🆕",
236
+ "☁️ Groq: Llama 3.3 70B",
237
+ "🇫🇷 Mistral: Magistral Medium 2509 🆕",
238
+ "🇫🇷 Mistral: Pixtral Large (Vision) 🖼️",
239
+ "🇫🇷 Mistral: Large 2512 (Dez/25)",
240
+ "🇫🇷 Mistral: Codestral 2508",
241
+ "🔥 Local H200: Qwen 2.5 Coder 32B"
242
+ ],
243
+ value="🔥 Local H200: Qwen 2.5 Coder 32B",
244
+ label="Escolha o Cérebro",
245
+ interactive=True
246
+ )
247
+
248
+ # Berta: multimodal=True é vital para as imagens funcionarem no seu index.html
249
  chat = gr.ChatInterface(
250
+ fn=router,
251
  additional_inputs=[model_dropdown],
252
+ multimodal=True
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  )
254
 
255
  if __name__ == "__main__":
256
+ # Pré-download do modelo local para não travar no primeiro uso
257
+ try: snapshot_download(repo_id=LOCAL_MODEL_ID)
258
+ except: pass
259
+
260
+ demo.queue().launch()