APIDOST / app.py
Madras1's picture
Update app.py
05fb591 verified
import gradio as gr
import spaces
import torch
import os
import time
import base64
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer
from groq import Groq
from mistralai import Mistral
import google.generativeai as genai
from huggingface_hub import snapshot_download
# --- 1. SEGURANÇA (ANTI-SPAM) ---
MAX_REQUESTS_PER_MINUTE = 15
BLOCK_TIME_SECONDS = 60
ip_tracker = {}
def check_spam(request: gr.Request):
if not request: return True
client_ip = request.client.host
now = time.time()
if client_ip in ip_tracker:
ip_tracker[client_ip] = [t for t in ip_tracker[client_ip] if now - t < BLOCK_TIME_SECONDS]
if client_ip in ip_tracker and len(ip_tracker[client_ip]) >= MAX_REQUESTS_PER_MINUTE:
return False
if client_ip not in ip_tracker: ip_tracker[client_ip] = []
ip_tracker[client_ip].append(now)
return True
# --- 2. CONFIGURAÇÕES GLOBAIS ---
LOCAL_MODEL_ID = "Madras1/Qwq-32b-Mix-Coder"
local_model = None
local_tokenizer = None
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) if os.environ.get("GROQ_API_KEY") else None
mistral_client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY")) if os.environ.get("MISTRAL_API_KEY") else None
if os.environ.get("GEMINI_API_KEY"):
genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
# --- 3. HELPER IMAGEM ---
def encode_image(image_path):
try:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
except: return None
# --- 4. EXECUTORES ---
@spaces.GPU(duration=120)
def run_local_h200(messages):
global local_model, local_tokenizer
for m in messages:
if isinstance(m['content'], list): return "⚠️ Berta avisa: Modelo Local não suporta imagens. Use Gemini ou Pixtral."
if local_model is None:
print(f"🐢 Carregando {LOCAL_MODEL_ID}...")
local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID)
local_model = AutoModelForCausalLM.from_pretrained(
LOCAL_MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda"
)
text = local_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device)
outputs = local_model.generate(**inputs, max_new_tokens=4096, temperature=0.6, do_sample=True)
return local_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
def run_groq(messages, model_id):
for m in messages:
if isinstance(m['content'], list): return "⚠️ Groq não suporta imagens."
if not groq_client: return "❌ Erro: GROQ_API_KEY ausente."
clean_msgs = [{"role": m['role'], "content": m['content']} for m in messages]
try:
completion = groq_client.chat.completions.create(
model=model_id, messages=clean_msgs, temperature=0.7, max_tokens=8192
)
return completion.choices[0].message.content
except Exception as e: return f"❌ Groq Error: {e}"
def run_mistral(messages, model_id):
if not mistral_client: return "❌ Erro: MISTRAL_API_KEY ausente."
formatted_msgs = []
for m in messages:
content = m['content']
new_content = []
if isinstance(content, str): new_content = content
elif isinstance(content, list):
for item in content:
if item.get('type') == 'text': new_content.append(item)
elif item.get('type') == 'image_url':
url = item['image_url']['url']
if not url.startswith("data:") and os.path.exists(url):
b64 = encode_image(url)
new_content.append({"type": "image_url", "image_url": f"data:image/jpeg;base64,{b64}"})
else: new_content.append(item)
formatted_msgs.append({"role": m['role'], "content": new_content})
try:
res = mistral_client.chat.complete(model=model_id, messages=formatted_msgs)
return res.choices[0].message.content
except Exception as e: return f"❌ Mistral Error: {e}"
def run_gemini(messages, model_id):
if not os.environ.get("GEMINI_API_KEY"): return "❌ Erro: GEMINI_API_KEY ausente."
try:
model = genai.GenerativeModel(model_id)
chat_history = []
for m in messages[:-1]:
role = "user" if m['role'] == "user" else "model"
parts = []
c = m['content']
if isinstance(c, str): parts.append(c)
elif isinstance(c, list):
for item in c:
if item.get('type') == 'text': parts.append(item['text'])
elif item.get('type') == 'image_url':
path = item['image_url']['url']
if os.path.exists(path): parts.append(Image.open(path))
if parts: chat_history.append({"role": role, "parts": parts})
last_parts = []
lc = messages[-1]['content']
if isinstance(lc, str): last_parts.append(lc)
elif isinstance(lc, list):
for item in lc:
if item.get('type') == 'text': last_parts.append(item['text'])
elif item.get('type') == 'image_url':
path = item['image_url']['url']
if os.path.exists(path): last_parts.append(Image.open(path))
chat = model.start_chat(history=chat_history)
response = chat.send_message(last_parts)
return response.text
except Exception as e: return f"❌ Gemini Error: {e}"
# --- 5. ROTEADOR CENTRAL ---
def router(message, history, model_selector, request: gr.Request):
if not check_spam(request): return "⛔ BLOQUEADO: Spam detectado."
# Normaliza histórico
messages = []
if history:
for turn in history:
if isinstance(turn, (list, tuple)):
u_text = turn[0]
if isinstance(u_text, dict) and 'text' in u_text: u_text = u_text['text']
messages.append({"role": "user", "content": str(u_text)})
if len(turn) > 1 and turn[1]:
messages.append({"role": "assistant", "content": str(turn[1])})
elif isinstance(turn, dict):
messages.append(turn)
# Processa mensagem atual
current_content = []
if isinstance(message, dict):
text = message.get("text", "")
files = message.get("files", [])
if text: current_content.append({"type": "text", "text": text})
for f in files: current_content.append({"type": "image_url", "image_url": {"url": f}})
if not files: messages.append({"role": "user", "content": text})
else: messages.append({"role": "user", "content": current_content})
else:
messages.append({"role": "user", "content": str(message)})
# Seleção de Modelos (Limpa e corrigida)
if "Gemini" in model_selector:
tid = "gemini-1.5-flash"
# Mapeamento
if "2.5 Lite" in model_selector: tid = "gemini-2.5-flash-lite"
elif "3.0" in model_selector: tid = "gemini-3.0-pro-preview"
elif "2.5 Pro" in model_selector: tid = "gemini-2.5-pro"
elif "2.5 Flash" in model_selector: tid = "gemini-2.5-flash"
elif "2.0" in model_selector: tid = "gemini-2.0-flash-exp"
return run_gemini(messages, tid)
elif "Mistral" in model_selector:
tid = "mistral-large-latest"
if "Pixtral" in model_selector: tid = "pixtral-large-latest"
elif "2509" in model_selector: tid = "magistral-medium-2509"
elif "2512" in model_selector: tid = "mistral-large-2512"
elif "Codestral" in model_selector: tid = "codestral-2508"
return run_mistral(messages, tid)
elif "Groq" in model_selector:
tid = "llama-3.3-70b-versatile"
if "120B" in model_selector: tid = "openai/gpt-oss-120b"
elif "20B" in model_selector: tid = "openai/gpt-oss-20b"
return run_groq(messages, tid)
elif "H200" in model_selector:
return run_local_h200(messages)
return "⚠️ Modelo não reconhecido."
# --- 6. INTERFACE ---
with gr.Blocks() as demo:
gr.Markdown("# 🔀 APIDOST v12 - Estável")
models_list = [
"✨ Google: Gemini 2.5 Flash Lite ⚡",
"✨ Google: Gemini 3.0 Pro (Experimental)",
"✨ Google: Gemini 2.5 Pro",
"✨ Google: Gemini 2.5 Flash",
"✨ Google: Gemini 2.0 Flash",
"☁️ Groq: GPT OSS 120B (OpenAI)",
"☁️ Groq: GPT OSS 20B (OpenAI)",
"☁️ Groq: Llama 3.3 70B",
"🇫🇷 Mistral: Magistral Medium 2509",
"🇫🇷 Mistral: Pixtral Large (Vision) 🖼️",
"🇫🇷 Mistral: Large 2512 (Dez/25)",
"🇫🇷 Mistral: Codestral 2508",
"🔥 Local H200: Qwen 2.5 Coder 32B"
]
with gr.Row():
model_dropdown = gr.Dropdown(choices=models_list, value=models_list[0], label="Cérebro", interactive=True)
chat = gr.ChatInterface(
fn=router,
additional_inputs=[model_dropdown],
multimodal=True
)
api_bridge = gr.Interface(
fn=router,
inputs=[
gr.MultimodalTextbox(label="message"),
gr.JSON(value=[], label="history"),
gr.Dropdown(choices=models_list, label="model_selector", value=models_list[0])
],
outputs=[gr.Textbox(label="response")],
api_name="chat"
)
if __name__ == "__main__":
try: snapshot_download(repo_id=LOCAL_MODEL_ID)
except: pass
demo.queue(api_open=True).launch(server_name="0.0.0.0", server_port=7860)