import gradio as gr import spaces import torch import os import time import base64 from PIL import Image from transformers import AutoModelForCausalLM, AutoTokenizer from groq import Groq from mistralai import Mistral import google.generativeai as genai from huggingface_hub import snapshot_download # --- 1. SEGURANÇA (ANTI-SPAM) --- MAX_REQUESTS_PER_MINUTE = 15 BLOCK_TIME_SECONDS = 60 ip_tracker = {} def check_spam(request: gr.Request): if not request: return True client_ip = request.client.host now = time.time() if client_ip in ip_tracker: ip_tracker[client_ip] = [t for t in ip_tracker[client_ip] if now - t < BLOCK_TIME_SECONDS] if client_ip in ip_tracker and len(ip_tracker[client_ip]) >= MAX_REQUESTS_PER_MINUTE: return False if client_ip not in ip_tracker: ip_tracker[client_ip] = [] ip_tracker[client_ip].append(now) return True # --- 2. CONFIGURAÇÕES GLOBAIS --- LOCAL_MODEL_ID = "Madras1/Qwq-32b-Mix-Coder" local_model = None local_tokenizer = None groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) if os.environ.get("GROQ_API_KEY") else None mistral_client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY")) if os.environ.get("MISTRAL_API_KEY") else None if os.environ.get("GEMINI_API_KEY"): genai.configure(api_key=os.environ.get("GEMINI_API_KEY")) # --- 3. HELPER IMAGEM --- def encode_image(image_path): try: with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') except: return None # --- 4. EXECUTORES --- @spaces.GPU(duration=120) def run_local_h200(messages): global local_model, local_tokenizer for m in messages: if isinstance(m['content'], list): return "⚠️ Berta avisa: Modelo Local não suporta imagens. Use Gemini ou Pixtral." if local_model is None: print(f"🐢 Carregando {LOCAL_MODEL_ID}...") local_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_ID) local_model = AutoModelForCausalLM.from_pretrained( LOCAL_MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda" ) text = local_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = local_tokenizer([text], return_tensors="pt").to(local_model.device) outputs = local_model.generate(**inputs, max_new_tokens=4096, temperature=0.6, do_sample=True) return local_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) def run_groq(messages, model_id): for m in messages: if isinstance(m['content'], list): return "⚠️ Groq não suporta imagens." if not groq_client: return "❌ Erro: GROQ_API_KEY ausente." clean_msgs = [{"role": m['role'], "content": m['content']} for m in messages] try: completion = groq_client.chat.completions.create( model=model_id, messages=clean_msgs, temperature=0.7, max_tokens=8192 ) return completion.choices[0].message.content except Exception as e: return f"❌ Groq Error: {e}" def run_mistral(messages, model_id): if not mistral_client: return "❌ Erro: MISTRAL_API_KEY ausente." formatted_msgs = [] for m in messages: content = m['content'] new_content = [] if isinstance(content, str): new_content = content elif isinstance(content, list): for item in content: if item.get('type') == 'text': new_content.append(item) elif item.get('type') == 'image_url': url = item['image_url']['url'] if not url.startswith("data:") and os.path.exists(url): b64 = encode_image(url) new_content.append({"type": "image_url", "image_url": f"data:image/jpeg;base64,{b64}"}) else: new_content.append(item) formatted_msgs.append({"role": m['role'], "content": new_content}) try: res = mistral_client.chat.complete(model=model_id, messages=formatted_msgs) return res.choices[0].message.content except Exception as e: return f"❌ Mistral Error: {e}" def run_gemini(messages, model_id): if not os.environ.get("GEMINI_API_KEY"): return "❌ Erro: GEMINI_API_KEY ausente." try: model = genai.GenerativeModel(model_id) chat_history = [] for m in messages[:-1]: role = "user" if m['role'] == "user" else "model" parts = [] c = m['content'] if isinstance(c, str): parts.append(c) elif isinstance(c, list): for item in c: if item.get('type') == 'text': parts.append(item['text']) elif item.get('type') == 'image_url': path = item['image_url']['url'] if os.path.exists(path): parts.append(Image.open(path)) if parts: chat_history.append({"role": role, "parts": parts}) last_parts = [] lc = messages[-1]['content'] if isinstance(lc, str): last_parts.append(lc) elif isinstance(lc, list): for item in lc: if item.get('type') == 'text': last_parts.append(item['text']) elif item.get('type') == 'image_url': path = item['image_url']['url'] if os.path.exists(path): last_parts.append(Image.open(path)) chat = model.start_chat(history=chat_history) response = chat.send_message(last_parts) return response.text except Exception as e: return f"❌ Gemini Error: {e}" # --- 5. ROTEADOR CENTRAL --- def router(message, history, model_selector, request: gr.Request): if not check_spam(request): return "⛔ BLOQUEADO: Spam detectado." # Normaliza histórico messages = [] if history: for turn in history: if isinstance(turn, (list, tuple)): u_text = turn[0] if isinstance(u_text, dict) and 'text' in u_text: u_text = u_text['text'] messages.append({"role": "user", "content": str(u_text)}) if len(turn) > 1 and turn[1]: messages.append({"role": "assistant", "content": str(turn[1])}) elif isinstance(turn, dict): messages.append(turn) # Processa mensagem atual current_content = [] if isinstance(message, dict): text = message.get("text", "") files = message.get("files", []) if text: current_content.append({"type": "text", "text": text}) for f in files: current_content.append({"type": "image_url", "image_url": {"url": f}}) if not files: messages.append({"role": "user", "content": text}) else: messages.append({"role": "user", "content": current_content}) else: messages.append({"role": "user", "content": str(message)}) # Seleção de Modelos (Limpa e corrigida) if "Gemini" in model_selector: tid = "gemini-1.5-flash" # Mapeamento if "2.5 Lite" in model_selector: tid = "gemini-2.5-flash-lite" elif "3.0" in model_selector: tid = "gemini-3.0-pro-preview" elif "2.5 Pro" in model_selector: tid = "gemini-2.5-pro" elif "2.5 Flash" in model_selector: tid = "gemini-2.5-flash" elif "2.0" in model_selector: tid = "gemini-2.0-flash-exp" return run_gemini(messages, tid) elif "Mistral" in model_selector: tid = "mistral-large-latest" if "Pixtral" in model_selector: tid = "pixtral-large-latest" elif "2509" in model_selector: tid = "magistral-medium-2509" elif "2512" in model_selector: tid = "mistral-large-2512" elif "Codestral" in model_selector: tid = "codestral-2508" return run_mistral(messages, tid) elif "Groq" in model_selector: tid = "llama-3.3-70b-versatile" if "120B" in model_selector: tid = "openai/gpt-oss-120b" elif "20B" in model_selector: tid = "openai/gpt-oss-20b" return run_groq(messages, tid) elif "H200" in model_selector: return run_local_h200(messages) return "⚠️ Modelo não reconhecido." # --- 6. INTERFACE --- with gr.Blocks() as demo: gr.Markdown("# 🔀 APIDOST v12 - Estável") models_list = [ "✨ Google: Gemini 2.5 Flash Lite ⚡", "✨ Google: Gemini 3.0 Pro (Experimental)", "✨ Google: Gemini 2.5 Pro", "✨ Google: Gemini 2.5 Flash", "✨ Google: Gemini 2.0 Flash", "☁️ Groq: GPT OSS 120B (OpenAI)", "☁️ Groq: GPT OSS 20B (OpenAI)", "☁️ Groq: Llama 3.3 70B", "🇫🇷 Mistral: Magistral Medium 2509", "🇫🇷 Mistral: Pixtral Large (Vision) 🖼️", "🇫🇷 Mistral: Large 2512 (Dez/25)", "🇫🇷 Mistral: Codestral 2508", "🔥 Local H200: Qwen 2.5 Coder 32B" ] with gr.Row(): model_dropdown = gr.Dropdown(choices=models_list, value=models_list[0], label="Cérebro", interactive=True) chat = gr.ChatInterface( fn=router, additional_inputs=[model_dropdown], multimodal=True ) api_bridge = gr.Interface( fn=router, inputs=[ gr.MultimodalTextbox(label="message"), gr.JSON(value=[], label="history"), gr.Dropdown(choices=models_list, label="model_selector", value=models_list[0]) ], outputs=[gr.Textbox(label="response")], api_name="chat" ) if __name__ == "__main__": try: snapshot_download(repo_id=LOCAL_MODEL_ID) except: pass demo.queue(api_open=True).launch(server_name="0.0.0.0", server_port=7860)