import os import gradio as gr from openai import OpenAI from collections import defaultdict import time # Loads keys from .env file when running locally # On HuggingFace/Colab the env vars are already set so this just does nothing try: from dotenv import load_dotenv load_dotenv() except ImportError: pass # dotenv not installed (HF/Colab don't need it) # ── Read keys from environment variables (never hardcode these!) ── API_KEYS = { "groq": os.environ.get("GROQ_API_KEY"), "gemini": os.environ.get("GEMINI_API_KEY"), "mistral": os.environ.get("MISTRAL_API_KEY"), "cohere": os.environ.get("COHERE_API_KEY"), "huggingface": os.environ.get("HF_API_KEY"), } # ── Provider configs ── ALL_PROVIDERS = [ { "name": "groq", "base_url": "https://api.groq.com/openai/v1", "model": "llama-3.1-8b-instant", "daily_limit": 14400, "priority": 1, }, { "name": "gemini", "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/", "model": "gemini-2.5-flash", "daily_limit": 1500, "priority": 2, }, { "name": "mistral", "base_url": "https://api.mistral.ai/v1", "model": "mistral-small-latest", "daily_limit": 1400, "priority": 3, }, { "name": "cohere", "base_url": "https://api.cohere.com/compatibility/v1", "model": "command-r-plus", "daily_limit": 1000, "priority": 4, }, { "name": "huggingface", "base_url": "https://api-inference.huggingface.co/v1/", "model": "Qwen/Qwen2.5-Coder-1.5B-Instruct", "daily_limit": 1000, "priority": 5, # used last (slowest) }, ] # ── Filter to only providers where key was given ── ACTIVE_PROVIDERS = [ p for p in ALL_PROVIDERS if API_KEYS.get(p["name"]) not in (None, "") ] # ── Usage tracker ── usage_count = defaultdict(int) bad_keys = set() # permanently skip these for the session (bad key) last_reset = time.time() def pick_provider(exclude=None): global last_reset if exclude is None: exclude = set() if time.time() - last_reset > 86400: # reset counts every 24h usage_count.clear() bad_keys.clear() last_reset = time.time() available = [ p for p in ACTIVE_PROVIDERS if p["name"] not in exclude and p["name"] not in bad_keys ] if not available: return None return min( available, key=lambda p: ( usage_count[p["name"]] / p["daily_limit"], # % used p["priority"] # tiebreak by priority ) ) SYSTEM_PROMPT = "You are a Minecraft coding assistant. Be brief and direct." def respond(message, history): if not ACTIVE_PROVIDERS: yield "❌ No API keys provided! Add them in your Space's Settings → Variables and Secrets." return # Build messages once — reused across retries messages = [{"role": "system", "content": SYSTEM_PROMPT}] for exchange in history[-3:]: user_msg = exchange[0] if isinstance(exchange, (list, tuple)) else "" asst_msg = exchange[1] if isinstance(exchange, (list, tuple)) else "" if user_msg: messages.append({"role": "user", "content": user_msg}) if asst_msg: messages.append({"role": "assistant", "content": asst_msg}) messages.append({"role": "user", "content": message}) tried = set() # skip these for this request only while True: provider = pick_provider(exclude=tried) if provider is None: yield "❌ All providers failed or hit limits. Try again later." return print(f"Trying: {provider['name']} | uses today: {usage_count[provider['name']]}") client = OpenAI( api_key=API_KEYS[provider["name"]], base_url=provider["base_url"], ) try: stream = client.chat.completions.create( model=provider["model"], messages=messages, max_tokens=1024, temperature=0.7, stream=True, ) usage_count[provider["name"]] += 1 response_text = "" for chunk in stream: token = chunk.choices[0].delta.content or "" response_text += token yield response_text return # success — stop retrying except Exception as e: err = str(e).lower() tried.add(provider["name"]) if "401" in err or "unauthorized" in err or "invalid api key" in err: bad_keys.add(provider["name"]) # skip for whole session print(f"❌ {provider['name']} bad key — skipping for session") elif "429" in err or "rate limit" in err or "quota" in err or "exceeded" in err: print(f"⚠️ {provider['name']} rate limited — trying next provider") else: print(f"⚠️ {provider['name']} error: {e} — trying next provider") # ── Startup summary ── print("\n=== Provider Status ===") for p in ALL_PROVIDERS: key = API_KEYS.get(p["name"]) status = "✅ Active" if key not in (None, "") else "❌ No key" print(f" {p['name']:<14} {status}") print(f"\nActive providers: {len(ACTIVE_PROVIDERS)}/{len(ALL_PROVIDERS)}") if not ACTIVE_PROVIDERS: print("⚠️ WARNING: No API keys found in environment!") print("=" * 23 + "\n") # ── UI ── demo = gr.ChatInterface( fn=respond, title="⛏️ Minecraft Coding Assistant", description=f"Multi-provider · {len(ACTIVE_PROVIDERS)} active · ~{sum(p['daily_limit'] for p in ACTIVE_PROVIDERS):,} req/day", examples=[ "How do I create a custom recipe in a datapack?", "Write a Spigot plugin that teleports a player on command", "What's the syntax for a /execute command with conditions?", ], cache_examples=False, ) if __name__ == "__main__": demo.launch()