Spaces:

MrSimple01
/

RuSimulBench_arena

Sleeping

App Files Files Community

MrSimple01 commited on Mar 18, 2025

Commit

e2b92e5

verified ·

1 Parent(s): 21711d5

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -50

app.py CHANGED Viewed

@@ -417,58 +417,58 @@ def create_gradio_interface():
         model_input_method.change(toggle_model_input, model_input_method, model_config_row)
-        def evaluate_batch(api_key, file, prompt_column, input_method, models_text, answer_cols_text):
-            try:
-                if not api_key:
-                    return None, None, gr.DataFrame(), gr.File()
-                # Load the CSV file
-                file_path = file.name
-                df = pd.read_csv(file_path)
-                # Initialize evaluator
-                state['evaluator'] = BenchmarkEvaluator(api_key)
-                # Process model names and columns if provided
-                if input_method == "Specify models and columns":
-                    if not models_text.strip() or not answer_cols_text.strip():
-                        return None, None, gr.DataFrame(), gr.File()
-                    models = [m.strip() for m in models_text.split(',')]
-                    answer_cols = [c.strip() for c in answer_cols_text.split(',')]
-                    if len(models) != len(answer_cols):
-                        return None, None, gr.DataFrame(pd.DataFrame({'Error': ['Number of models and answer columns must match']})), gr.File()
-                    results_df, leaderboard_df = state['evaluator'].evaluate_all_models(
-                        df, models=models, model_columns=answer_cols, prompt_col=prompt_column
-                    )
-                else:
-                    # Auto-detect mode
-                    results_df, leaderboard_df = state['evaluator'].evaluate_all_models(
-                        df, prompt_col=prompt_column
-                    )
-                timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
-                results_path = f'results/benchmark_results_{timestamp}.csv'
-                results_df.to_csv(results_path, index=False)
-                # Update state
-                state['last_results'] = results_df
-                state['leaderboard'] = leaderboard_df
-                return results_df, leaderboard_df, results_path, leaderboard_df
-            except Exception as e:
-                error_df = pd.DataFrame({'Error': [str(e)]})
-                return error_df, state['leaderboard'], gr.DataFrame(), gr.File()
-        def download_results():
-            if state['last_results'] is not None:
-                timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
-                file_path = f'results/benchmark_download_{timestamp}.csv'
-                state['last_results'].to_csv(file_path, index=False)
-                return file_path
-            return None
         def refresh_leaderboard():
             # Reload leaderboard from file
@@ -479,7 +479,7 @@ def create_gradio_interface():
         evaluate_btn.click(
             evaluate_batch,
             inputs=[gemini_api_key, csv_file, prompt_col, model_input_method, models_input, answer_cols_input],
-            outputs=[current_results, leaderboard_table, gr.DataFrame(), current_results_file]
         )
         download_btn.click(download_results, inputs=[], outputs=[current_results_file])

         model_input_method.change(toggle_model_input, model_input_method, model_config_row)
+    def evaluate_batch(api_key, file, prompt_column, input_method, models_text, answer_cols_text):
+        try:
+            if not api_key:
+                return None, None, None
+            # Load the CSV file
+            file_path = file.name
+            df = pd.read_csv(file_path)
+            # Initialize evaluator
+            state['evaluator'] = BenchmarkEvaluator(api_key)
+            # Process model names and columns if provided
+            if input_method == "Specify models and columns":
+                if not models_text.strip() or not answer_cols_text.strip():
+                    return None, None, None
+                models = [m.strip() for m in models_text.split(',')]
+                answer_cols = [c.strip() for c in answer_cols_text.split(',')]
+                if len(models) != len(answer_cols):
+                    return pd.DataFrame({'Error': ['Number of models and answer columns must match']}), state['leaderboard'], None
+                results_df, leaderboard_df = state['evaluator'].evaluate_all_models(
+                    df, models=models, model_columns=answer_cols, prompt_col=prompt_column
+                )
+            else:
+                # Auto-detect mode
+                results_df, leaderboard_df = state['evaluator'].evaluate_all_models(
+                    df, prompt_col=prompt_column
+                )
+            timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
+            results_path = f'results/benchmark_results_{timestamp}.csv'
+            results_df.to_csv(results_path, index=False)
+            # Update state
+            state['last_results'] = results_df
+            state['leaderboard'] = leaderboard_df
+            return results_df, leaderboard_df, results_path
+        except Exception as e:
+            error_df = pd.DataFrame({'Error': [str(e)]})
+            return error_df, state['leaderboard'], None
+            def download_results():
+                if state['last_results'] is not None:
+                    timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
+                    file_path = f'results/benchmark_download_{timestamp}.csv'
+                    state['last_results'].to_csv(file_path, index=False)
+                    return file_path
+                return None
         def refresh_leaderboard():
             # Reload leaderboard from file
         evaluate_btn.click(
             evaluate_batch,
             inputs=[gemini_api_key, csv_file, prompt_col, model_input_method, models_input, answer_cols_input],
+            outputs=[current_results, leaderboard_table, current_results_file]
         )
         download_btn.click(download_results, inputs=[], outputs=[current_results_file])