Spaces:
Sleeping
Sleeping
zhimin-z
commited on
Commit
·
996ba54
1
Parent(s):
bfeb8ad
add
Browse files
app.py
CHANGED
|
@@ -404,9 +404,9 @@ def get_leaderboard_data(vote_entry=None, use_cache=True):
|
|
| 404 |
leaderboard_data = leaderboard_data.round(
|
| 405 |
{
|
| 406 |
"Elo Score": 2,
|
|
|
|
| 407 |
"Conversation Efficiency Index": 2,
|
| 408 |
"Consistency Score": 2,
|
| 409 |
-
"Win Rate": 2,
|
| 410 |
"Bradley-Terry Coefficient": 2,
|
| 411 |
"Eigenvector Centrality Value": 2,
|
| 412 |
"Newman Modularity Score": 2,
|
|
@@ -442,9 +442,9 @@ def get_leaderboard_data(vote_entry=None, use_cache=True):
|
|
| 442 |
"Model",
|
| 443 |
"Website",
|
| 444 |
"Elo Score",
|
|
|
|
| 445 |
"Conversation Efficiency Index",
|
| 446 |
"Consistency Score",
|
| 447 |
-
"Win Rate",
|
| 448 |
"Bradley-Terry Coefficient",
|
| 449 |
"Eigenvector Centrality Value",
|
| 450 |
"Newman Modularity Score",
|
|
@@ -584,9 +584,9 @@ def get_leaderboard_data(vote_entry=None, use_cache=True):
|
|
| 584 |
"Model": elo_scores.index,
|
| 585 |
"Website": website_values,
|
| 586 |
"Elo Score": elo_scores.values,
|
|
|
|
| 587 |
"Conversation Efficiency Index": cei_result.values,
|
| 588 |
"Consistency Score": mcs_result.values,
|
| 589 |
-
"Win Rate": avr_scores.values,
|
| 590 |
"Bradley-Terry Coefficient": bt_scores.values,
|
| 591 |
"Eigenvector Centrality Value": eigen_scores.values,
|
| 592 |
"Newman Modularity Score": newman_scores.values,
|
|
@@ -727,6 +727,14 @@ with gr.Blocks(title="SWE-Model-Arena", theme=gr.themes.Soft()) as app:
|
|
| 727 |
type="slider",
|
| 728 |
label="Elo Score"
|
| 729 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 730 |
ColumnFilter(
|
| 731 |
"Conversation Efficiency Index",
|
| 732 |
min=-1.0,
|
|
@@ -743,14 +751,6 @@ with gr.Blocks(title="SWE-Model-Arena", theme=gr.themes.Soft()) as app:
|
|
| 743 |
type="slider",
|
| 744 |
label="Consistency Score"
|
| 745 |
),
|
| 746 |
-
ColumnFilter(
|
| 747 |
-
"Win Rate",
|
| 748 |
-
min=0.0,
|
| 749 |
-
max=1.0,
|
| 750 |
-
default=[0.0, 1.0],
|
| 751 |
-
type="slider",
|
| 752 |
-
label="Win Rate"
|
| 753 |
-
),
|
| 754 |
ColumnFilter(
|
| 755 |
"Bradley-Terry Coefficient",
|
| 756 |
min=-3.0,
|
|
|
|
| 404 |
leaderboard_data = leaderboard_data.round(
|
| 405 |
{
|
| 406 |
"Elo Score": 2,
|
| 407 |
+
"Win Rate": 2,
|
| 408 |
"Conversation Efficiency Index": 2,
|
| 409 |
"Consistency Score": 2,
|
|
|
|
| 410 |
"Bradley-Terry Coefficient": 2,
|
| 411 |
"Eigenvector Centrality Value": 2,
|
| 412 |
"Newman Modularity Score": 2,
|
|
|
|
| 442 |
"Model",
|
| 443 |
"Website",
|
| 444 |
"Elo Score",
|
| 445 |
+
"Win Rate",
|
| 446 |
"Conversation Efficiency Index",
|
| 447 |
"Consistency Score",
|
|
|
|
| 448 |
"Bradley-Terry Coefficient",
|
| 449 |
"Eigenvector Centrality Value",
|
| 450 |
"Newman Modularity Score",
|
|
|
|
| 584 |
"Model": elo_scores.index,
|
| 585 |
"Website": website_values,
|
| 586 |
"Elo Score": elo_scores.values,
|
| 587 |
+
"Win Rate": avr_scores.values,
|
| 588 |
"Conversation Efficiency Index": cei_result.values,
|
| 589 |
"Consistency Score": mcs_result.values,
|
|
|
|
| 590 |
"Bradley-Terry Coefficient": bt_scores.values,
|
| 591 |
"Eigenvector Centrality Value": eigen_scores.values,
|
| 592 |
"Newman Modularity Score": newman_scores.values,
|
|
|
|
| 727 |
type="slider",
|
| 728 |
label="Elo Score"
|
| 729 |
),
|
| 730 |
+
ColumnFilter(
|
| 731 |
+
"Win Rate",
|
| 732 |
+
min=0.0,
|
| 733 |
+
max=1.0,
|
| 734 |
+
default=[0.0, 1.0],
|
| 735 |
+
type="slider",
|
| 736 |
+
label="Win Rate"
|
| 737 |
+
),
|
| 738 |
ColumnFilter(
|
| 739 |
"Conversation Efficiency Index",
|
| 740 |
min=-1.0,
|
|
|
|
| 751 |
type="slider",
|
| 752 |
label="Consistency Score"
|
| 753 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 754 |
ColumnFilter(
|
| 755 |
"Bradley-Terry Coefficient",
|
| 756 |
min=-3.0,
|