Spaces:

eduard76
/

Multi_AI_Competition

Sleeping

App Files Files Community

eduard76 commited on Jul 25, 2025

Commit

2329c05

verified ·

1 Parent(s): b6e9c46

Create app.py

Browse files

Files changed (1) hide show

app.py +270 -0

app.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import gradio as gr
+import openai
+import anthropic
+import threading
+import json
+import time
+# --- Hardcoded API Keys ---
+# As requested, the API keys are now part of the script.
+API_KEYS = {
+    "openai_api_key": "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",
+    "anthropic_api_key": "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",
+    "deepseek_api_key": "sk-84ff2cd7665a430d9e098f51dcc9d109",
+    "google_api_key": "AIzaSyCAcmOLv2Q8YIhb2opede9l-QQUAjzlBiY",
+    "groq_api_key": "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",
+    "ollama_api_key": "ollama" # Static key for local Ollama
+}
+# --- Model & API Configuration ---
+# This configuration is based on your reference notebook.
+COMPETITOR_MODELS = [
+    {
+        "name": "gpt-4o-mini",
+        "api_client": "openai",
+        "key_name": "openai_api_key"
+    },
+    {
+        "name": "claude-sonnet-4-20250514", # Corrected model name
+        "api_client": "anthropic",
+        "key_name": "anthropic_api_key"
+    },
+    {
+        "name": "deepseek-chat",
+        "api_client": "openai_compatible",
+        "base_url": "https://api.deepseek.com/v1",
+        "key_name": "deepseek_api_key"
+    },
+    {
+        "name": "llama3-8b-8192", # Using a smaller Llama3 model on Groq for speed
+        "api_client": "openai_compatible",
+        "base_url": "https://api.groq.com/openai/v1",
+        "key_name": "groq_api_key"
+    },
+    {
+        "name": "llama3", # Ensure you have 'llama3' pulled via 'ollama pull llama3'
+        "api_client": "ollama",
+        "base_url": "http://localhost:11434/v1",
+        "key_name": "ollama_api_key"
+    },
+    {
+        # Re-integrating Gemini with a standard OpenAI-compatible configuration
+         "name": "gemini-2.0-flash",
+        "api_client": "openai_compatible",
+        "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/",
+        "key_name": "google_api_key"
+    }
+]
+# --- UI Configuration ---
+# FIX: This line was likely missing in your local file, causing the NameError.
+MODEL_COLORS = ["#FF6347", "#4682B4", "#32CD32", "#FFD700", "#6A5ACD", "#00CED1"]
+JUDGE_MODEL = "o3-mini" # Corrected judge model name
+# --- Helper Function to Query APIs ---
+def get_model_response(model_config, api_keys, prompt, results_list):
+    """
+    Queries an LLM API based on the provided configuration and appends the result to a list.
+    """
+    model_name = model_config["name"]
+    api_client_type = model_config["api_client"]
+    api_key = api_keys.get(model_config["key_name"])
+    response_content = f"Error: Model {model_name} did not respond."
+    try:
+        if not api_key and api_client_type != "ollama":
+            raise ValueError("API key is missing.")
+        messages = [{"role": "user", "content": prompt}]
+        if api_client_type == "openai":
+            client = openai.OpenAI(api_key=api_key)
+            response = client.chat.completions.create(model=model_name, messages=messages)
+            response_content = response.choices[0].message.content
+        elif api_client_type == "anthropic":
+            client = anthropic.Anthropic(api_key=api_key)
+            response = client.messages.create(model=model_name, max_tokens=2048, messages=messages)
+            response_content = response.content[0].text
+        elif api_client_type in ["openai_compatible", "ollama"]:
+            # For Google's endpoint, the model name is part of the path, so we construct the URL here.
+            base_url = model_config.get("base_url", "")
+            if "googleapis.com" in base_url:
+                full_url = f"{base_url}/models/{model_config['name']}:generateContent"
+                # This is a simplified example; a real implementation would use Google's own client library
+                # or handle the different API structure. For now, we'll try the OpenAI client.
+                client = openai.OpenAI(api_key=api_key, base_url=base_url)
+                # The model name for the client needs to be just the model identifier
+                response = client.chat.completions.create(model=model_config['name'], messages=messages)
+            else:
+                client = openai.OpenAI(api_key=api_key, base_url=base_url)
+                response = client.chat.completions.create(model=model_name, messages=messages)
+            response_content = response.choices[0].message.content
+    except Exception as e:
+        response_content = f"Error for {model_name}: {str(e)}"
+    results_list.append({"model": model_name, "response": response_content})
+# --- Main Logic for the Arena (as a Generator) ---
+def run_competition(question, progress=gr.Progress(track_tqdm=True)):
+    """
+    A generator function that runs the competition and yields UI updates at each stage,
+    including the state of the button.
+    """
+    # --- Stage 1: Initial UI State ---
+    # Disable button and set "Thinking..." message for all competitor boxes
+    button_update_running = gr.Button("⚙️ Running Competition...", interactive=False)
+    initial_text_outputs = ["The winning answer will be displayed here..."] + ["⏳ Thinking..."] * len(COMPETITOR_MODELS)
+    yield [button_update_running] + initial_text_outputs
+    if not question:
+        # If the question is empty, clear the UI and re-enable the button.
+        button_update_idle = gr.Button("Run Competition", interactive=True)
+        blank_outputs = [""] * (1 + len(COMPETITOR_MODELS))
+        yield [button_update_idle] + blank_outputs
+        return
+    # --- Stage 2: Get Competitor Responses Concurrently ---
+    progress(0, desc="Querying Competitor Models...")
+    threads = []
+    competitor_responses = [] # This list will be populated by the threads
+    for model_config in COMPETITOR_MODELS:
+        thread = threading.Thread(
+            target=get_model_response,
+            args=(model_config, API_KEYS, question, competitor_responses)
+        )
+        threads.append(thread)
+        thread.start()
+    # Wait for all threads to complete
+    for thread in threads:
+        thread.join()
+    # --- Stage 3: Update UI with Competitor Responses ---
+    progress(0.7, desc="All models responded. Awaiting judgment...")
+    button_update_judging = gr.Button("⚖️ Judging...", interactive=False)
+    # Prepare the text outputs for the UI boxes
+    text_outputs = ["The winning answer will be displayed here..."] # Best answer is still pending
+    response_dict = {r['model']: r['response'] for r in competitor_responses}
+    responses_text_for_judge = ""
+    # Fill the output list in the correct UI order
+    for i, model_config in enumerate(COMPETITOR_MODELS):
+        response = response_dict.get(model_config['name'], f"Error: {model_config['name']} response not found.")
+        text_outputs.append(response)
+        responses_text_for_judge += f"# Response from competitor {i+1} ({model_config['name']})\n\n{response}\n\n"
+    yield [button_update_judging] + text_outputs
+    time.sleep(1) # Small delay for better UX
+    # --- Stage 4: Get the Judge's Ranking ---
+    judge_prompt = f"""You are a fair and impartial judge in a competition between {len(competitor_responses)} LLM assistants.
+Each model was given this question:
+---
+{question}
+---
+Your task is to evaluate each response for clarity, accuracy, and depth of reasoning. Then, you must rank them in order from best to worst.
+You must respond with JSON, and only JSON, with the following format:
+{{"results": ["best competitor number", "second best competitor number", ...]}}
+Here are the responses from each competitor:
+---
+{responses_text_for_judge}
+---
+Now, provide your judgment as a JSON object with the ranked order of the competitors. Do not include any other text, markdown formatting, or code blocks."""
+    best_answer_text = "Error: Judge failed to provide a valid ranking."
+    try:
+        judge_client = openai.OpenAI(api_key=API_KEYS["openai_api_key"])
+        judge_messages = [{"role": "user", "content": judge_prompt}]
+        response = judge_client.chat.completions.create(
+            model=JUDGE_MODEL,
+            messages=judge_messages,
+            response_format={"type": "json_object"}
+        )
+        results_json = response.choices[0].message.content
+        results_dict = json.loads(results_json)
+        ranked_indices = results_dict.get("results", [])
+        if ranked_indices:
+            # Find the best answer based on the judge's ranking
+            best_competitor_num = int(ranked_indices[0]) - 1
+            # The model name and response are retrieved from the ordered `text_outputs` list
+            best_model_name = COMPETITOR_MODELS[best_competitor_num]['name']
+            best_model_color = MODEL_COLORS[best_competitor_num % len(MODEL_COLORS)]
+            best_answer = text_outputs[best_competitor_num + 1] # +1 to account for best_answer_box at index 0
+            best_answer_text = f"## 🏆 Best Answer (from <span style='color:{best_model_color}; font-weight:bold;'>{best_model_name}</span>)\n\n"
+            best_answer_text += best_answer
+    except Exception as e:
+        best_answer_text = f"## Error\n\nAn error occurred during judgment: {str(e)}"
+    # --- Stage 5: Final UI Update ---
+    progress(1, desc="Competition Complete!")
+    button_update_idle = gr.Button("Run Competition", interactive=True)
+    text_outputs[0] = best_answer_text # Add the final best answer to our output list
+    yield [button_update_idle] + text_outputs
+# --- Gradio User Interface ---
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue")) as demo:
+    gr.Markdown("# Advanced Multi-Model LLM Arena")
+    # --- Top Half of the Screen ---
+    with gr.Row():
+        with gr.Column(scale=1):
+            question_box = gr.Textbox(
+                label="Enter Your Question Here",
+                lines=6,
+                placeholder="e.g., Explain the concept of emergent properties in complex systems and provide three distinct examples."
+            )
+            run_button = gr.Button("Run Competition", variant="primary")
+            # FIX: Removed the 'label' argument from gr.Progress
+            progress_bar = gr.Progress()
+        with gr.Column(scale=2):
+            best_answer_box = gr.Markdown("The winning answer will be displayed here...")
+    gr.Markdown("---")
+    gr.Markdown("### Competitor Responses")
+    # --- Bottom Half of the Screen ---
+    response_boxes = []
+    # Create rows with 3 models each
+    for i in range(0, len(COMPETITOR_MODELS), 3):
+        with gr.Row():
+            # Create a column for each model in the row
+            for j in range(3):
+                model_index = i + j
+                if model_index < len(COMPETITOR_MODELS):
+                    with gr.Column():
+                        model_config = COMPETITOR_MODELS[model_index]
+                        model_name = model_config['name']
+                        # Assign color from the list, cycling through if necessary
+                        color = MODEL_COLORS[model_index % len(MODEL_COLORS)]
+                        # Styled Markdown for the label
+                        gr.Markdown(f"<h3 style='color:{color}; margin-bottom: -10px; text-align:center;'>{model_name}</h3>")
+                        # Textbox for the response, no label needed here
+                        box = gr.Textbox(lines=10, interactive=False)
+                        response_boxes.append(box)
+    # --- Connect the Button to the Logic ---
+    # The button itself is now an output component that gets updated.
+    all_outputs = [run_button, best_answer_box] + response_boxes
+    run_button.click(
+        fn=run_competition,
+        inputs=[question_box],
+        outputs=all_outputs
+    )
+if __name__ == "__main__":
+    demo.launch(debug=True)