Spaces:

eduard76
/

Multi_AI_Competition

Sleeping

App Files Files Community

eduard76 commited on Aug 29, 2025

Commit

d052a06

verified ·

1 Parent(s): 2329c05

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -70

app.py CHANGED Viewed

@@ -1,23 +1,27 @@
 import gradio as gr
 import openai
 import anthropic
 import threading
 import json
 import time
-# --- Hardcoded API Keys ---
-# As requested, the API keys are now part of the script.
 API_KEYS = {
-    "openai_api_key": "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",
-    "anthropic_api_key": "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",
-    "deepseek_api_key": "sk-84ff2cd7665a430d9e098f51dcc9d109",
-    "google_api_key": "AIzaSyCAcmOLv2Q8YIhb2opede9l-QQUAjzlBiY",
-    "groq_api_key": "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",
-    "ollama_api_key": "ollama" # Static key for local Ollama
 }
 # --- Model & API Configuration ---
-# This configuration is based on your reference notebook.
 COMPETITOR_MODELS = [
     {
         "name": "gpt-4o-mini",
@@ -25,7 +29,7 @@ COMPETITOR_MODELS = [
         "key_name": "openai_api_key"
     },
     {
-        "name": "claude-sonnet-4-20250514", # Corrected model name
         "api_client": "anthropic",
         "key_name": "anthropic_api_key"
     },
@@ -36,29 +40,26 @@ COMPETITOR_MODELS = [
         "key_name": "deepseek_api_key"
     },
     {
-        "name": "llama3-8b-8192", # Using a smaller Llama3 model on Groq for speed
         "api_client": "openai_compatible",
         "base_url": "https://api.groq.com/openai/v1",
         "key_name": "groq_api_key"
     },
     {
-        "name": "llama3", # Ensure you have 'llama3' pulled via 'ollama pull llama3'
         "api_client": "ollama",
         "base_url": "http://localhost:11434/v1",
         "key_name": "ollama_api_key"
     },
     {
-        # Re-integrating Gemini with a standard OpenAI-compatible configuration
-         "name": "gemini-2.0-flash",
-        "api_client": "openai_compatible",
-        "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/",
         "key_name": "google_api_key"
     }
 ]
 # --- UI Configuration ---
-# FIX: This line was likely missing in your local file, causing the NameError.
-MODEL_COLORS = ["#FF6347", "#4682B4", "#32CD32", "#FFD700", "#6A5ACD", "#00CED1"]
-JUDGE_MODEL = "o3-mini" # Corrected judge model name
 # --- Helper Function to Query APIs ---
 def get_model_response(model_config, api_keys, prompt, results_list):
@@ -73,7 +74,7 @@ def get_model_response(model_config, api_keys, prompt, results_list):
     try:
         if not api_key and api_client_type != "ollama":
-            raise ValueError("API key is missing.")
         messages = [{"role": "user", "content": prompt}]
@@ -84,23 +85,20 @@ def get_model_response(model_config, api_keys, prompt, results_list):
         elif api_client_type == "anthropic":
             client = anthropic.Anthropic(api_key=api_key)
-            response = client.messages.create(model=model_name, max_tokens=2048, messages=messages)
             response_content = response.content[0].text
         elif api_client_type in ["openai_compatible", "ollama"]:
-            # For Google's endpoint, the model name is part of the path, so we construct the URL here.
-            base_url = model_config.get("base_url", "")
-            if "googleapis.com" in base_url:
-                full_url = f"{base_url}/models/{model_config['name']}:generateContent"
-                # This is a simplified example; a real implementation would use Google's own client library
-                # or handle the different API structure. For now, we'll try the OpenAI client.
-                client = openai.OpenAI(api_key=api_key, base_url=base_url)
-                # The model name for the client needs to be just the model identifier
-                response = client.chat.completions.create(model=model_config['name'], messages=messages)
-            else:
-                client = openai.OpenAI(api_key=api_key, base_url=base_url)
-                response = client.chat.completions.create(model=model_name, messages=messages)
             response_content = response.choices[0].message.content
     except Exception as e:
@@ -111,26 +109,23 @@ def get_model_response(model_config, api_keys, prompt, results_list):
 # --- Main Logic for the Arena (as a Generator) ---
 def run_competition(question, progress=gr.Progress(track_tqdm=True)):
     """
-    A generator function that runs the competition and yields UI updates at each stage,
-    including the state of the button.
     """
-    # --- Stage 1: Initial UI State ---
-    # Disable button and set "Thinking..." message for all competitor boxes
     button_update_running = gr.Button("⚙️ Running Competition...", interactive=False)
     initial_text_outputs = ["The winning answer will be displayed here..."] + ["⏳ Thinking..."] * len(COMPETITOR_MODELS)
     yield [button_update_running] + initial_text_outputs
     if not question:
-        # If the question is empty, clear the UI and re-enable the button.
         button_update_idle = gr.Button("Run Competition", interactive=True)
         blank_outputs = [""] * (1 + len(COMPETITOR_MODELS))
         yield [button_update_idle] + blank_outputs
         return
-    # --- Stage 2: Get Competitor Responses Concurrently ---
     progress(0, desc="Querying Competitor Models...")
     threads = []
-    competitor_responses = [] # This list will be populated by the threads
     for model_config in COMPETITOR_MODELS:
         thread = threading.Thread(
             target=get_model_response,
@@ -139,29 +134,26 @@ def run_competition(question, progress=gr.Progress(track_tqdm=True)):
         threads.append(thread)
         thread.start()
-    # Wait for all threads to complete
     for thread in threads:
         thread.join()
-    # --- Stage 3: Update UI with Competitor Responses ---
     progress(0.7, desc="All models responded. Awaiting judgment...")
     button_update_judging = gr.Button("⚖️ Judging...", interactive=False)
-    # Prepare the text outputs for the UI boxes
-    text_outputs = ["The winning answer will be displayed here..."] # Best answer is still pending
     response_dict = {r['model']: r['response'] for r in competitor_responses}
     responses_text_for_judge = ""
-    # Fill the output list in the correct UI order
     for i, model_config in enumerate(COMPETITOR_MODELS):
         response = response_dict.get(model_config['name'], f"Error: {model_config['name']} response not found.")
         text_outputs.append(response)
         responses_text_for_judge += f"# Response from competitor {i+1} ({model_config['name']})\n\n{response}\n\n"
     yield [button_update_judging] + text_outputs
-    time.sleep(1) # Small delay for better UX
-    # --- Stage 4: Get the Judge's Ranking ---
     judge_prompt = f"""You are a fair and impartial judge in a competition between {len(competitor_responses)} LLM assistants.
 Each model was given this question:
 ---
@@ -179,6 +171,10 @@ Now, provide your judgment as a JSON object with the ranked order of the competi
     best_answer_text = "Error: Judge failed to provide a valid ranking."
     try:
         judge_client = openai.OpenAI(api_key=API_KEYS["openai_api_key"])
         judge_messages = [{"role": "user", "content": judge_prompt}]
@@ -190,25 +186,27 @@ Now, provide your judgment as a JSON object with the ranked order of the competi
         results_json = response.choices[0].message.content
         results_dict = json.loads(results_json)
-        ranked_indices = results_dict.get("results", [])
         if ranked_indices:
-            # Find the best answer based on the judge's ranking
-            best_competitor_num = int(ranked_indices[0]) - 1
-            # The model name and response are retrieved from the ordered `text_outputs` list
-            best_model_name = COMPETITOR_MODELS[best_competitor_num]['name']
-            best_model_color = MODEL_COLORS[best_competitor_num % len(MODEL_COLORS)]
-            best_answer = text_outputs[best_competitor_num + 1] # +1 to account for best_answer_box at index 0
             best_answer_text = f"## 🏆 Best Answer (from <span style='color:{best_model_color}; font-weight:bold;'>{best_model_name}</span>)\n\n"
             best_answer_text += best_answer
     except Exception as e:
         best_answer_text = f"## Error\n\nAn error occurred during judgment: {str(e)}"
-    # --- Stage 5: Final UI Update ---
     progress(1, desc="Competition Complete!")
     button_update_idle = gr.Button("Run Competition", interactive=True)
-    text_outputs[0] = best_answer_text # Add the final best answer to our output list
     yield [button_update_idle] + text_outputs
@@ -216,7 +214,6 @@ Now, provide your judgment as a JSON object with the ranked order of the competi
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue")) as demo:
     gr.Markdown("# Advanced Multi-Model LLM Arena")
-    # --- Top Half of the Screen ---
     with gr.Row():
         with gr.Column(scale=1):
             question_box = gr.Textbox(
@@ -225,8 +222,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue"))
                 placeholder="e.g., Explain the concept of emergent properties in complex systems and provide three distinct examples."
             )
             run_button = gr.Button("Run Competition", variant="primary")
-            # FIX: Removed the 'label' argument from gr.Progress
-            progress_bar = gr.Progress()
         with gr.Column(scale=2):
             best_answer_box = gr.Markdown("The winning answer will be displayed here...")
@@ -234,30 +230,22 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue"))
     gr.Markdown("---")
     gr.Markdown("### Competitor Responses")
-    # --- Bottom Half of the Screen ---
     response_boxes = []
-    # Create rows with 3 models each
     for i in range(0, len(COMPETITOR_MODELS), 3):
         with gr.Row():
-            # Create a column for each model in the row
             for j in range(3):
                 model_index = i + j
                 if model_index < len(COMPETITOR_MODELS):
                     with gr.Column():
                         model_config = COMPETITOR_MODELS[model_index]
                         model_name = model_config['name']
-                        # Assign color from the list, cycling through if necessary
                         color = MODEL_COLORS[model_index % len(MODEL_COLORS)]
-                        # Styled Markdown for the label
                         gr.Markdown(f"<h3 style='color:{color}; margin-bottom: -10px; text-align:center;'>{model_name}</h3>")
-                        # Textbox for the response, no label needed here
-                        box = gr.Textbox(lines=10, interactive=False)
                         response_boxes.append(box)
-    # --- Connect the Button to the Logic ---
-    # The button itself is now an output component that gets updated.
     all_outputs = [run_button, best_answer_box] + response_boxes
     run_button.click(
@@ -267,4 +255,4 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue"))
     )
 if __name__ == "__main__":
-    demo.launch(debug=True)

 import gradio as gr
 import openai
 import anthropic
+import google.generativeai as genai
 import threading
 import json
 import time
+import os
+# --- Securely Load API Keys from Environment Variables ---
+# IMPORTANT: Set these keys in your system's environment variables
+# or create a .env file and use a library like 'python-dotenv' to load them.
 API_KEYS = {
+    "openai_api_key": os.getenv("OPENAI_API_KEY"),
+    "anthropic_api_key": os.getenv("ANTHROPIC_API_KEY"),
+    "deepseek_api_key": os.getenv("DEEPSEEK_API_KEY"),
+    "google_api_key": os.getenv("GOOGLE_API_KEY"),
+    "groq_api_key": os.getenv("GROQ_API_KEY"),
+    "ollama_api_key": "ollama"  # Static key for local Ollama
 }
 # --- Model & API Configuration ---
+# FIX: Corrected model names for Claude, Gemini, and the Judge model.
+# FIX: Reconfigured Gemini to use its own 'gemini' api_client.
 COMPETITOR_MODELS = [
     {
         "name": "gpt-4o-mini",
         "key_name": "openai_api_key"
     },
     {
+        "name": "claude-3-5-sonnet-20240620",  # CORRECTED model name
         "api_client": "anthropic",
         "key_name": "anthropic_api_key"
     },
         "key_name": "deepseek_api_key"
     },
     {
+        "name": "llama3-8b-8192",
         "api_client": "openai_compatible",
         "base_url": "https://api.groq.com/openai/v1",
         "key_name": "groq_api_key"
     },
     {
+        "name": "llama3",  # Ensure you have 'llama3' pulled via 'ollama pull llama3'
         "api_client": "ollama",
         "base_url": "http://localhost:11434/v1",
         "key_name": "ollama_api_key"
     },
     {
+        "name": "gemini-1.5-flash-latest", # CORRECTED model name
+        "api_client": "gemini",           # CORRECTED client type
         "key_name": "google_api_key"
     }
 ]
 # --- UI Configuration ---
+MODEL_COLORS = ["#FF6347", "#D2691E", "#32CD32", "#FFD700", "#6A5ACD", "#00CED1"]
+JUDGE_MODEL = "gpt-4o-mini" # CORRECTED judge model name
 # --- Helper Function to Query APIs ---
 def get_model_response(model_config, api_keys, prompt, results_list):
     try:
         if not api_key and api_client_type != "ollama":
+            raise ValueError(f"API key '{model_config['key_name']}' is missing.")
         messages = [{"role": "user", "content": prompt}]
         elif api_client_type == "anthropic":
             client = anthropic.Anthropic(api_key=api_key)
+            response = client.messages.create(model=model_name, max_tokens=4096, messages=messages)
             response_content = response.content[0].text
+        # FIX: Added a dedicated block for the Gemini API
+        elif api_client_type == "gemini":
+            genai.configure(api_key=api_key)
+            model = genai.GenerativeModel(model_name)
+            response = model.generate_content(prompt)
+            response_content = response.text
         elif api_client_type in ["openai_compatible", "ollama"]:
+            base_url = model_config.get("base_url")
+            client = openai.OpenAI(api_key=api_key, base_url=base_url)
+            response = client.chat.completions.create(model=model_name, messages=messages)
             response_content = response.choices[0].message.content
     except Exception as e:
 # --- Main Logic for the Arena (as a Generator) ---
 def run_competition(question, progress=gr.Progress(track_tqdm=True)):
     """
+    A generator function that runs the competition and yields UI updates at each stage.
     """
+    # Stage 1: Initial UI State
     button_update_running = gr.Button("⚙️ Running Competition...", interactive=False)
     initial_text_outputs = ["The winning answer will be displayed here..."] + ["⏳ Thinking..."] * len(COMPETITOR_MODELS)
     yield [button_update_running] + initial_text_outputs
     if not question:
         button_update_idle = gr.Button("Run Competition", interactive=True)
         blank_outputs = [""] * (1 + len(COMPETITOR_MODELS))
         yield [button_update_idle] + blank_outputs
         return
+    # Stage 2: Get Competitor Responses Concurrently
     progress(0, desc="Querying Competitor Models...")
     threads = []
+    competitor_responses = []
     for model_config in COMPETITOR_MODELS:
         thread = threading.Thread(
             target=get_model_response,
         threads.append(thread)
         thread.start()
     for thread in threads:
         thread.join()
+    # Stage 3: Update UI with Competitor Responses
     progress(0.7, desc="All models responded. Awaiting judgment...")
     button_update_judging = gr.Button("⚖️ Judging...", interactive=False)
+    text_outputs = ["The winning answer will be displayed here..."]
     response_dict = {r['model']: r['response'] for r in competitor_responses}
     responses_text_for_judge = ""
     for i, model_config in enumerate(COMPETITOR_MODELS):
         response = response_dict.get(model_config['name'], f"Error: {model_config['name']} response not found.")
         text_outputs.append(response)
         responses_text_for_judge += f"# Response from competitor {i+1} ({model_config['name']})\n\n{response}\n\n"
     yield [button_update_judging] + text_outputs
+    time.sleep(1)
+    # Stage 4: Get the Judge's Ranking
     judge_prompt = f"""You are a fair and impartial judge in a competition between {len(competitor_responses)} LLM assistants.
 Each model was given this question:
 ---
     best_answer_text = "Error: Judge failed to provide a valid ranking."
     try:
+        # Ensure the OpenAI API key is available for the judge
+        if not API_KEYS["openai_api_key"]:
+             raise ValueError("OpenAI API key is missing for the judge model.")
         judge_client = openai.OpenAI(api_key=API_KEYS["openai_api_key"])
         judge_messages = [{"role": "user", "content": judge_prompt}]
         results_json = response.choices[0].message.content
         results_dict = json.loads(results_json)
+        # Handle potential string or integer values from the judge model
+        ranked_indices = [str(i) for i in results_dict.get("results", [])]
         if ranked_indices:
+            best_competitor_num_str = ranked_indices[0]
+            best_competitor_index = int(best_competitor_num_str) - 1
+            best_model_name = COMPETITOR_MODELS[best_competitor_index]['name']
+            best_model_color = MODEL_COLORS[best_competitor_index % len(MODEL_COLORS)]
+            best_answer = text_outputs[best_competitor_index + 1]
             best_answer_text = f"## 🏆 Best Answer (from <span style='color:{best_model_color}; font-weight:bold;'>{best_model_name}</span>)\n\n"
             best_answer_text += best_answer
     except Exception as e:
         best_answer_text = f"## Error\n\nAn error occurred during judgment: {str(e)}"
+    # Stage 5: Final UI Update
     progress(1, desc="Competition Complete!")
     button_update_idle = gr.Button("Run Competition", interactive=True)
+    text_outputs[0] = best_answer_text
     yield [button_update_idle] + text_outputs
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue")) as demo:
     gr.Markdown("# Advanced Multi-Model LLM Arena")
     with gr.Row():
         with gr.Column(scale=1):
             question_box = gr.Textbox(
                 placeholder="e.g., Explain the concept of emergent properties in complex systems and provide three distinct examples."
             )
             run_button = gr.Button("Run Competition", variant="primary")
+            progress_bar = gr.Progress() # This component is controlled by the `gr.Progress` in the function
         with gr.Column(scale=2):
             best_answer_box = gr.Markdown("The winning answer will be displayed here...")
     gr.Markdown("---")
     gr.Markdown("### Competitor Responses")
     response_boxes = []
     for i in range(0, len(COMPETITOR_MODELS), 3):
         with gr.Row():
             for j in range(3):
                 model_index = i + j
                 if model_index < len(COMPETITOR_MODELS):
                     with gr.Column():
                         model_config = COMPETITOR_MODELS[model_index]
                         model_name = model_config['name']
                         color = MODEL_COLORS[model_index % len(MODEL_COLORS)]
                         gr.Markdown(f"<h3 style='color:{color}; margin-bottom: -10px; text-align:center;'>{model_name}</h3>")
+                        box = gr.Textbox(lines=10, interactive=False, container=False)
                         response_boxes.append(box)
     all_outputs = [run_button, best_answer_box] + response_boxes
     run_button.click(
     )
 if __name__ == "__main__":
+    demo.launch(debug=True)