Spaces:

MrSimple01
/

RuSimulBench_arena

Sleeping

App Files Files Community

MrSimple01 commited on Mar 17, 2025

Commit

db0eaac

verified ·

1 Parent(s): 3c775cf

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -49

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import os
-import argparse
 import warnings
 import time
-from typing import Dict, Tuple, List, Optional
 from dataclasses import dataclass
 from pathlib import Path
@@ -345,6 +344,27 @@ def evaluate_single_response(gemini_api_key, prompt, response, model_name="Test
         }
 def create_gradio_interface():
     """Create Gradio interface for evaluation app"""
     with gr.Blocks(title="Model Response Evaluator") as app:
@@ -384,62 +404,54 @@ def create_gradio_interface():
             evaluate_batch_btn = gr.Button("Run Benchmark")
             benchmark_output = gr.DataFrame(label="Benchmark Results")
-            def evaluate_batch(api_key, file, prompt_column, models_text):
-                try:
-                    # Load the CSV file
-                    file_path = file.name
-                    df = pd.read_csv(file_path)
-                    # Process model names if provided
-                    models = None
-                    if models_text.strip():
-                        models = [m.strip() for m in models_text.split(',')]
-                    # Run the evaluation
-                    evaluator = BenchmarkEvaluator(api_key)
-                    results = evaluator.evaluate_all_models(df, models, prompt_column)
-                    return results
-                except Exception as e:
-                    return pd.DataFrame({'Error': [str(e)]})
             evaluate_batch_btn.click(
                 evaluate_batch,
                 inputs=[gemini_api_key_batch, csv_file, prompt_col, models_input],
                 outputs=benchmark_output
             )
     return app
 def main():
-    parser = argparse.ArgumentParser(description="Model Response Evaluator")
-    parser.add_argument("--gemini_api_key", type=str, help="Gemini API Key", default=os.environ.get("GEMINI_API_KEY"))
-    parser.add_argument("--input_file", type=str, help="Input CSV file with model responses")
-    parser.add_argument("--models", type=str, help="Comma-separated list of model names to evaluate")
-    parser.add_argument("--prompt_col", type=str, default="rus_prompt", help="Column name containing prompts")
-    parser.add_argument("--web", action="store_true", help="Launch web interface")
-    args = parser.parse_args()
-    if args.web:
-        app = create_gradio_interface()
-        app.launch(share=True)
-    elif args.input_file:
-        if not args.gemini_api_key:
-            print("Error: Gemini API key is required. Set GEMINI_API_KEY environment variable or pass --gemini_api_key")
-            return
-        df = pd.read_csv(args.input_file)
-        models = None
-        if args.models:
-            models = [m.strip() for m in args.models.split(',')]
-        evaluator = BenchmarkEvaluator(args.gemini_api_key)
-        evaluator.evaluate_all_models(df, models, args.prompt_col)
-    else:
-        print("Error: Either --input_file or --web argument is required")
-        print("Run with --help for usage information")
 if __name__ == "__main__":

 import os
 import warnings
 import time
+from typing import Dict, Tuple, List
 from dataclasses import dataclass
 from pathlib import Path
         }
+def evaluate_batch(api_key, file, prompt_column, models_text):
+    """Process batch evaluation from the UI"""
+    try:
+        # Load the CSV file
+        file_path = file.name
+        df = pd.read_csv(file_path)
+        # Process model names if provided
+        models = None
+        if models_text.strip():
+            models = [m.strip() for m in models_text.split(',')]
+        # Run the evaluation
+        evaluator = BenchmarkEvaluator(api_key)
+        results = evaluator.evaluate_all_models(df, models, prompt_column)
+        return results
+    except Exception as e:
+        return pd.DataFrame({'Error': [str(e)]})
 def create_gradio_interface():
     """Create Gradio interface for evaluation app"""
     with gr.Blocks(title="Model Response Evaluator") as app:
             evaluate_batch_btn = gr.Button("Run Benchmark")
             benchmark_output = gr.DataFrame(label="Benchmark Results")
             evaluate_batch_btn.click(
                 evaluate_batch,
                 inputs=[gemini_api_key_batch, csv_file, prompt_col, models_input],
                 outputs=benchmark_output
             )
+        # Add a new tab for configuration settings
+        with gr.Tab("Configuration"):
+            gr.Markdown("## Advanced Configuration")
+            gr.Markdown("Adjust evaluation parameters to customize the benchmarking process.")
+            with gr.Row():
+                batch_size = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Batch Size")
+                retry_attempts = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Retry Attempts")
+            with gr.Row():
+                min_wait = gr.Slider(minimum=1, maximum=30, value=4, step=1, label="Minimum Wait Time (seconds)")
+                max_wait = gr.Slider(minimum=10, maximum=300, value=60, step=10, label="Maximum Wait Time (seconds)")
+            with gr.Row():
+                gemini_model = gr.Dropdown(
+                    choices=["gemini-1.5-flash", "gemini-1.5-pro", "gemini-1.5-ultra"],
+                    value="gemini-1.5-flash",
+                    label="Gemini Model"
+                )
+            gr.Markdown("Note: Changes to configuration settings will apply to new evaluations.")
+            def update_config(batch_size, retry_attempts, min_wait, max_wait, gemini_model):
+                # This function doesn't actually do anything in the demo but would update global config
+                return f"Configuration updated: batch_size={batch_size}, retry_attempts={retry_attempts}, min_wait={min_wait}, max_wait={max_wait}, model={gemini_model}"
+            update_config_btn = gr.Button("Update Configuration")
+            config_status = gr.Textbox(label="Status", interactive=False)
+            update_config_btn.click(
+                update_config,
+                inputs=[batch_size, retry_attempts, min_wait, max_wait, gemini_model],
+                outputs=config_status
+            )
     return app
 def main():
+    """Main function to run the application"""
+    app = create_gradio_interface()
+    app.launch(share=True)
 if __name__ == "__main__":