Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s | %(levelname)s | %(message)s", | |
| datefmt="%Y-%m-%d %H:%M:%S", | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Environment variables | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| logger.info(f"HF_TOKEN configured: {bool(HF_TOKEN)}") | |
| client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient() | |
| logger.info("InferenceClient initialized") | |
| # Models to compare (configurable via env vars) | |
| MODELS = { | |
| "Qwen 2.5 72B": os.environ.get("MODEL_1", "Qwen/Qwen2.5-72B-Instruct"), | |
| "Llama 3.2 3B": os.environ.get("MODEL_2", "meta-llama/Llama-3.2-3B-Instruct"), | |
| "Zephyr 7B": os.environ.get("MODEL_3", "HuggingFaceH4/zephyr-7b-beta"), | |
| } | |
| logger.info(f"Loaded {len(MODELS)} models for comparison") | |
| def query_model(model_id: str, prompt: str, max_tokens: int) -> str: | |
| """Query a single model.""" | |
| try: | |
| logger.info(f"Querying {model_id}...") | |
| response = client.chat.completions.create( | |
| model=model_id, | |
| messages=[{"role": "user", "content": prompt}], | |
| max_tokens=max_tokens, | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| logger.error(f"Error with {model_id}: {e}") | |
| return f"❌ Error: {e}" | |
| def compare_models(prompt: str, max_tokens: int) -> tuple: | |
| """Query all models and return responses.""" | |
| logger.info(f"compare_models() called | prompt_len={len(prompt)} | max_tokens={max_tokens}") | |
| if not prompt.strip(): | |
| return ("Enter a prompt!", "Enter a prompt!", "Enter a prompt!") | |
| results = [] | |
| for name, model_id in MODELS.items(): | |
| result = query_model(model_id, prompt, max_tokens) | |
| results.append(result) | |
| return tuple(results) | |
| logger.info("Building Gradio interface...") | |
| with gr.Blocks(title="Model Arena") as demo: | |
| gr.Markdown("""# 🏟️ Model Arena | |
| Compare responses from multiple LLMs side-by-side! | |
| *All models run via HuggingFace Inference API - no downloads required.* | |
| """) | |
| prompt = gr.Textbox( | |
| label="Your prompt", | |
| placeholder="Explain quantum computing in simple terms...", | |
| lines=3, | |
| autofocus=True, | |
| ) | |
| max_tokens = gr.Slider( | |
| minimum=50, maximum=500, value=200, step=50, | |
| label="Max tokens per response" | |
| ) | |
| btn = gr.Button("⚔️ Battle!", variant="primary", size="lg") | |
| with gr.Row(equal_height=True): | |
| outputs = [] | |
| for name in MODELS.keys(): | |
| outputs.append(gr.Textbox(label=name, lines=10, interactive=False)) | |
| btn.click(compare_models, inputs=[prompt, max_tokens], outputs=outputs) | |
| prompt.submit(compare_models, inputs=[prompt, max_tokens], outputs=outputs) | |
| gr.Examples( | |
| examples=[ | |
| ["Explain quantum computing to a 5-year-old", 200], | |
| ["Write a haiku about machine learning", 100], | |
| ["What are 3 creative startup ideas?", 300], | |
| ["Debate: Is AI good or bad for humanity?", 400], | |
| ], | |
| inputs=[prompt, max_tokens], | |
| ) | |
| demo.queue() | |
| logger.info("Starting Gradio server...") | |
| demo.launch() | |