Spaces:

Madras1
/

APIDOST

Sleeping

App Files Files Community

Madras1 commited on Dec 4, 2025

Commit

33141da

verified ·

1 Parent(s): ec5d8d0

Update app.py

Browse files

Files changed (1) hide show

app.py +147 -119

app.py CHANGED Viewed

@@ -4,64 +4,72 @@ import torch
 import os
 import time
 import base64
-from collections import defaultdict
-from PIL import Image
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from groq import Groq
 from mistralai import Mistral
 import google.generativeai as genai
-from huggingface_hub import snapshot_download
-# --- SEGURANÇA: RATE LIMITER ---
 MAX_REQUESTS_PER_MINUTE = 15
 BLOCK_TIME_SECONDS = 60
-ip_access_log = defaultdict(list)
-def verify_rate_limit(request: gr.Request):
-    if not request: return True
     client_ip = request.client.host
-    current_time = time.time()
-    ip_access_log[client_ip] = [t for t in ip_access_log[client_ip] if current_time - t < BLOCK_TIME_SECONDS]
-    if len(ip_access_log[client_ip]) >= MAX_REQUESTS_PER_MINUTE:
-        print(f"⛔ BLOQUEIO: IP {client_ip} barrado.")
         return False
-    ip_access_log[client_ip].append(current_time)
     return True
-# --- LOCAL: QWEN CODER H200 ---
 LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
 local_model = None
 local_tokenizer = None
-# Clientes API
 groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) if os.environ.get("GROQ_API_KEY") else None
 mistral_client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY")) if os.environ.get("MISTRAL_API_KEY") else None
 if os.environ.get("GEMINI_API_KEY"):
     genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
-# --- HELPER IMAGEM ---
 def encode_image(image_path):
     try:
         with open(image_path, "rb") as image_file:
             return base64.b64encode(image_file.read()).decode('utf-8')
-    except Exception: return None
-# --- DOWNLOADER ---
-def download_local_model():
-    print(f"⏳ Cache: Verificando {LOCAL_MODEL_ID}...")
-    try: snapshot_download(repo_id=LOCAL_MODEL_ID)
-    except Exception as e: print(f"⚠️ Aviso: {e}")
-# --- BACKENDS ---
-@spaces.GPU(duration=120)
 def run_local_h200(messages):
-    for m in messages:
-        if isinstance(m['content'], list): return "⚠️ Qwen Local não lê imagens. Use Gemini/Pixtral."
     global local_model, local_tokenizer
     if local_model is None:
         local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID)
-        local_model = AutoModelForCausalLM.from_pretrained(LOCAL_MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda")
     text = local_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device)
@@ -70,8 +78,10 @@ def run_local_h200(messages):
 def run_groq(messages, model_id):
     for m in messages:
-        if isinstance(m['content'], list): return "⚠️ Groq não lê imagens. Use Gemini/Pixtral."
-    if not groq_client: return "❌ Erro: API Key Groq ausente."
     clean_msgs = [{"role": m['role'], "content": m['content']} for m in messages]
     try:
         completion = groq_client.chat.completions.create(
@@ -81,20 +91,23 @@ def run_groq(messages, model_id):
     except Exception as e: return f"❌ Groq Error: {e}"
 def run_mistral(messages, model_id):
-    if not mistral_client: return "❌ Erro: API Key Mistral ausente."
     formatted_msgs = []
     for m in messages:
         new_content = []
-        if isinstance(m['content'], str): new_content = m['content']
-        elif isinstance(m['content'], list):
-            for item in m['content']:
-                if item.get('type') == 'text': new_content.append({"type": "text", "text": item['text']})
                 elif item.get('type') == 'image_url':
                     url = item['image_url']['url']
                     if not url.startswith("data:") and os.path.exists(url):
                         b64 = encode_image(url)
                         new_content.append({"type": "image_url", "image_url": f"data:image/jpeg;base64,{b64}"})
-                    else: new_content.append({"type": "image_url", "image_url": url})
         formatted_msgs.append({"role": m['role'], "content": new_content})
     try:
@@ -103,130 +116,145 @@ def run_mistral(messages, model_id):
     except Exception as e: return f"❌ Mistral Error: {e}"
 def run_gemini(messages, model_id):
-    if not os.environ.get("GEMINI_API_KEY"): return "❌ Erro: API Key Gemini ausente."
     try:
         model = genai.GenerativeModel(model_id)
         chat_history = []
         for m in messages[:-1]:
             role = "user" if m['role'] == "user" else "model"
             parts = []
-            if isinstance(m['content'], str): parts.append(m['content'])
-            elif isinstance(m['content'], list):
-                for item in m['content']:
                     if item.get('type') == 'text': parts.append(item['text'])
                     elif item.get('type') == 'image_url':
                         path = item['image_url']['url']
                         if os.path.exists(path): parts.append(Image.open(path))
             if parts: chat_history.append({"role": role, "parts": parts})
-        last_msg = messages[-1]['content']
-        current_parts = []
-        if isinstance(last_msg, str): current_parts.append(last_msg)
-        elif isinstance(last_msg, list):
-            for item in last_msg:
-                if item.get('type') == 'text': current_parts.append(item['text'])
                 elif item.get('type') == 'image_url':
                     path = item['image_url']['url']
-                    if os.path.exists(path): current_parts.append(Image.open(path))
         chat = model.start_chat(history=chat_history)
-        response = chat.send_message(current_parts)
         return response.text
-    except Exception as e: return f"❌ Gemini Error ({model_id}): {e}"
-# --- ROTEADOR ---
 def router(message, history, model_selector, request: gr.Request):
-    if not verify_rate_limit(request):
-        return f"⛔ LIMITADO: Aguarde para enviar mais mensagens."
-    formatted_history = []
     if history:
         for turn in history:
-            if isinstance(turn, dict): formatted_history.append(turn)
-            elif isinstance(turn, (list, tuple)) and len(turn) >= 2:
-                u = turn[0]['text'] if isinstance(turn[0], dict) and 'text' in turn[0] else str(turn[0])
-                b = str(turn[1]) if turn[1] else ""
-                formatted_history.append({"role": "user", "content": u})
-                if b: formatted_history.append({"role": "assistant", "content": b})
     current_content = []
-    text = message.get("text", "")
-    files = message.get("files", [])
-    if text: current_content.append({"type": "text", "text": text})
-    for f in files: current_content.append({"type": "image_url", "image_url": {"url": f}})
-    if not files: formatted_history.append({"role": "user", "content": text})
-    else: formatted_history.append({"role": "user", "content": current_content})
-    # SELEÇÃO (IDs CORRIGIDOS)
     if "Gemini" in model_selector:
         tid = "gemini-1.5-flash"
-        if "3.0" in model_selector: tid = "gemini-3.0-pro-preview"
-        elif "2.5 Pro" in model_selector: tid = "gemini-2.5-pro"
-        elif "2.5 Flash" in model_selector: tid = "gemini-2.5-flash"
-        elif "2.0 Flash" in model_selector: tid = "gemini-2.0-flash-exp"
-        return run_gemini(formatted_history, tid)
     elif "Mistral" in model_selector:
         tid = "mistral-large-latest"
         if "Pixtral" in model_selector: tid = "pixtral-large-latest"
-        elif "2509" in model_selector: tid = "magistral-medium-2509"
         elif "2512" in model_selector: tid = "mistral-large-2512"
         elif "Codestral" in model_selector: tid = "codestral-2508"
-        return run_mistral(formatted_history, tid)
     elif "Groq" in model_selector:
-        if "120B" in model_selector: tid = "openai/gpt-oss-120b"
-        elif "20B" in model_selector: tid = "openai/gpt-oss-20b"
-        else: tid = "llama-3.3-70b-versatile"
-        return run_groq(formatted_history, tid)
     elif "H200" in model_selector:
-        return run_local_h200(formatted_history)
-    return "Modelo desconhecido."
-# --- INTERFACE ---
-with gr.Blocks() as demo:
-    gr.Markdown("# 🔀 APIDOST v7 (Stable)")
-    models_list = [
-        "✨ Google: Gemini 3.0 Pro (Experimental)",
-        "✨ Google: Gemini 2.5 Pro",
-        "✨ Google: Gemini 2.5 Flash",
-        "✨ Google: Gemini 2.0 Flash",
-        "☁️ Groq: GPT OSS 120B (OpenAI) 🆕",
-        "☁️ Groq: GPT OSS 20B (OpenAI) 🆕",
-        "☁️ Groq: Llama 3.3 70B",
-        "🇫🇷 Mistral: Magistral Medium 2509 🆕",
-        "🇫🇷 Mistral: Pixtral Large (Vision) 🖼️",
-        "🇫🇷 Mistral: Large 2512 (Dez/25)",
-        "🇫🇷 Mistral: Codestral 2508",
-        "🔥 Local H200: Qwen 2.5 Coder 32B"
-    ]
     with gr.Row():
-        model_dropdown = gr.Dropdown(choices=models_list, value=models_list[-1], label="Cérebro", interactive=True)
     chat = gr.ChatInterface(
-        fn=router,
         additional_inputs=[model_dropdown],
-        multimodal=True,
-    )
-    # CORREÇÃO FINAL AQUI:
-    # Substituí 'gr.State' por 'gr.JSON' para não exigir retorno de estado.
-    api_bridge = gr.Interface(
-        fn=router,
-        inputs=[
-            gr.MultimodalTextbox(label="message"),
-            gr.JSON(value=[], label="history"), # <--- MUDANÇA: JSON não trava o output
-            gr.Dropdown(choices=models_list, label="model_selector")
-        ],
-        outputs=[gr.Textbox(label="response")],
-        api_name="chat"
     )
 if __name__ == "__main__":
-    download_local_model()
-    demo.queue(api_open=True).launch(server_name="0.0.0.0", server_port=7860)

 import os
 import time
 import base64
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from groq import Groq
 from mistralai import Mistral
 import google.generativeai as genai
+from huggingface_hub import snapshot_download
+# --- 1. SEGURANÇA (RATE LIMIT) ---
+# Simples e eficiente: bloqueia spammer sem quebrar o app.
 MAX_REQUESTS_PER_MINUTE = 15
 BLOCK_TIME_SECONDS = 60
+ip_tracker = {}
+def check_spam(request: gr.Request):
+    if not request: return True # Local run
     client_ip = request.client.host
+    now = time.time()
+    # Limpa histórico antigo do IP
+    if client_ip in ip_tracker:
+        ip_tracker[client_ip] = [t for t in ip_tracker[client_ip] if now - t < BLOCK_TIME_SECONDS]
+    # Verifica bloqueio
+    if client_ip in ip_tracker and len(ip_tracker[client_ip]) >= MAX_REQUESTS_PER_MINUTE:
         return False
+    # Registra
+    if client_ip not in ip_tracker: ip_tracker[client_ip] = []
+    ip_tracker[client_ip].append(now)
     return True
+# --- 2. CONFIGURAÇÕES GLOBAIS ---
+# LOCAL (H200 - ZeroGPU)
 LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
 local_model = None
 local_tokenizer = None
+# CLIENTES API
 groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) if os.environ.get("GROQ_API_KEY") else None
 mistral_client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY")) if os.environ.get("MISTRAL_API_KEY") else None
 if os.environ.get("GEMINI_API_KEY"):
     genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
+# --- 3. HELPER (IMAGEM) ---
 def encode_image(image_path):
     try:
         with open(image_path, "rb") as image_file:
             return base64.b64encode(image_file.read()).decode('utf-8')
+    except: return None
+# --- 4. FUNÇÕES DE EXECUÇÃO ---
+@spaces.GPU(duration=120)
 def run_local_h200(messages):
     global local_model, local_tokenizer
+    # Validação rápida de imagem
+    for m in messages:
+        if isinstance(m['content'], list): return "⚠️ Modelo Local não suporta imagens. Use Gemini/Pixtral."
     if local_model is None:
+        print(f"🐢 Carregando {LOCAL_MODEL_ID}...")
         local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID)
+        local_model = AutoModelForCausalLM.from_pretrained(
+            LOCAL_MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda"
+        )
     text = local_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device)
 def run_groq(messages, model_id):
     for m in messages:
+        if isinstance(m['content'], list): return "⚠️ Groq não suporta imagens. Use Gemini/Pixtral."
+    if not groq_client: return "❌ Erro: GROQ_API_KEY ausente."
+    # Limpa formato para Groq
     clean_msgs = [{"role": m['role'], "content": m['content']} for m in messages]
     try:
         completion = groq_client.chat.completions.create(
     except Exception as e: return f"❌ Groq Error: {e}"
 def run_mistral(messages, model_id):
+    if not mistral_client: return "❌ Erro: MISTRAL_API_KEY ausente."
+    # Formata imagens para Mistral
     formatted_msgs = []
     for m in messages:
+        content = m['content']
         new_content = []
+        if isinstance(content, str): new_content = content
+        elif isinstance(content, list):
+            for item in content:
+                if item.get('type') == 'text': new_content.append(item)
                 elif item.get('type') == 'image_url':
                     url = item['image_url']['url']
                     if not url.startswith("data:") and os.path.exists(url):
                         b64 = encode_image(url)
                         new_content.append({"type": "image_url", "image_url": f"data:image/jpeg;base64,{b64}"})
+                    else: new_content.append(item)
         formatted_msgs.append({"role": m['role'], "content": new_content})
     try:
     except Exception as e: return f"❌ Mistral Error: {e}"
 def run_gemini(messages, model_id):
+    if not os.environ.get("GEMINI_API_KEY"): return "❌ Erro: GEMINI_API_KEY ausente."
     try:
         model = genai.GenerativeModel(model_id)
         chat_history = []
+        # Converte histórico para Gemini
         for m in messages[:-1]:
             role = "user" if m['role'] == "user" else "model"
             parts = []
+            c = m['content']
+            if isinstance(c, str): parts.append(c)
+            elif isinstance(c, list):
+                for item in c:
                     if item.get('type') == 'text': parts.append(item['text'])
                     elif item.get('type') == 'image_url':
                         path = item['image_url']['url']
                         if os.path.exists(path): parts.append(Image.open(path))
             if parts: chat_history.append({"role": role, "parts": parts})
+        # Última mensagem
+        last_parts = []
+        lc = messages[-1]['content']
+        if isinstance(lc, str): last_parts.append(lc)
+        elif isinstance(lc, list):
+            for item in lc:
+                if item.get('type') == 'text': last_parts.append(item['text'])
                 elif item.get('type') == 'image_url':
                     path = item['image_url']['url']
+                    if os.path.exists(path): last_parts.append(Image.open(path))
         chat = model.start_chat(history=chat_history)
+        response = chat.send_message(last_parts)
         return response.text
+    except Exception as e: return f"❌ Gemini Error: {e}"
+# --- 5. ROTEADOR CENTRAL ---
 def router(message, history, model_selector, request: gr.Request):
+    # Check Spam
+    if not check_spam(request):
+        return "⛔ BLOQUEADO: Limite de mensagens excedido. Aguarde."
+    # Prepara Histórico (Blindado contra formatos variados do Gradio)
+    messages = []
     if history:
         for turn in history:
+            # Formato antigo [user, bot]
+            if isinstance(turn, (list, tuple)):
+                u_text = turn[0]
+                if isinstance(u_text, dict) and 'text' in u_text: u_text = u_text['text'] # Extrai texto se for dict
+                messages.append({"role": "user", "content": str(u_text)})
+                if len(turn) > 1 and turn[1]:
+                    messages.append({"role": "assistant", "content": str(turn[1])})
+            # Formato novo {role: user...}
+            elif isinstance(turn, dict):
+                messages.append(turn)
+    # Prepara Mensagem Atual (Multimodal ou Texto)
     current_content = []
+    if isinstance(message, dict): # Multimodal
+        text = message.get("text", "")
+        files = message.get("files", [])
+        if text: current_content.append({"type": "text", "text": text})
+        for f in files: current_content.append({"type": "image_url", "image_url": {"url": f}})
+        if not files: messages.append({"role": "user", "content": text})
+        else: messages.append({"role": "user", "content": current_content})
+    else: # Texto puro
+        messages.append({"role": "user", "content": str(message)})
+    # Roteamento
+    print(f"🔀 Roteando para: {model_selector}")
+    # GEMINI
     if "Gemini" in model_selector:
         tid = "gemini-1.5-flash"
+        if "3.0" in model_selector: tid = "gemini-3.0-pro-preview"
+        elif "2.5 Pro" in model_selector: tid = "gemini-2.5-pro"
+        elif "2.5 Flash" in model_selector: tid = "gemini-2.5-flash"
+        elif "2.0" in model_selector: tid = "gemini-2.0-flash-exp"
+        return run_gemini(messages, tid)
+    # MISTRAL
     elif "Mistral" in model_selector:
         tid = "mistral-large-latest"
         if "Pixtral" in model_selector: tid = "pixtral-large-latest"
+        elif "2509" in model_selector: tid = "magistral-medium-2509" # <--- Seu Magistral VIP
         elif "2512" in model_selector: tid = "mistral-large-2512"
         elif "Codestral" in model_selector: tid = "codestral-2508"
+        return run_mistral(messages, tid)
+    # GROQ
     elif "Groq" in model_selector:
+        tid = "llama-3.3-70b-versatile"
+        if "120B" in model_selector: tid = "openai/gpt-oss-120b" # <--- GPT OSS 120B
+        elif "20B" in model_selector: tid = "openai/gpt-oss-20b"   # <--- GPT OSS 20B
+        return run_groq(messages, tid)
+    # LOCAL
     elif "H200" in model_selector:
+        return run_local_h200(messages)
+    return "⚠️ Modelo não reconhecido."
+# --- 6. INTERFACE ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🔀 APIDOST v8: The Arsenal")
     with gr.Row():
+        model_dropdown = gr.Dropdown(
+            choices=[
+                "✨ Google: Gemini 3.0 Pro (Experimental)",
+                "✨ Google: Gemini 2.5 Pro",
+                "✨ Google: Gemini 2.5 Flash",
+                "✨ Google: Gemini 2.0 Flash",
+                "☁️ Groq: GPT OSS 120B (OpenAI) 🆕",
+                "☁️ Groq: GPT OSS 20B (OpenAI) 🆕",
+                "☁️ Groq: Llama 3.3 70B",
+                "🇫🇷 Mistral: Magistral Medium 2509 🆕",
+                "🇫🇷 Mistral: Pixtral Large (Vision) 🖼️",
+                "🇫🇷 Mistral: Large 2512 (Dez/25)",
+                "🇫🇷 Mistral: Codestral 2508",
+                "🔥 Local H200: Qwen 2.5 Coder 32B"
+            ],
+            value="🔥 Local H200: Qwen 2.5 Coder 32B",
+            label="Escolha o Cérebro",
+            interactive=True
+        )
+    # Berta: multimodal=True é vital para as imagens funcionarem no seu index.html
     chat = gr.ChatInterface(
+        fn=router,
         additional_inputs=[model_dropdown],
+        multimodal=True
     )
 if __name__ == "__main__":
+    # Pré-download do modelo local para não travar no primeiro uso
+    try: snapshot_download(repo_id=LOCAL_MODEL_ID)
+    except: pass
+    demo.queue().launch()