Spaces:
Running
Running
Update leaderboard display
Browse files
app.py
CHANGED
|
@@ -153,143 +153,90 @@ def filter_and_process_results(results, n_versions, only_actual_versions):
|
|
| 153 |
"""Filter results by version and process them for display."""
|
| 154 |
if not results or "items" not in results:
|
| 155 |
return pd.DataFrame(), [], [], []
|
| 156 |
-
|
| 157 |
-
all_items = results["items"]
|
| 158 |
-
last_version_str = results.get("last_version", "1.0")
|
| 159 |
-
last_version = version.parse(last_version_str)
|
| 160 |
|
| 161 |
-
|
| 162 |
|
| 163 |
-
#
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
|
|
|
| 166 |
for version_str, version_items in all_items.items():
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
model_name = item.get("model_name", "Unknown")
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
# Filter versions based on selection
|
| 185 |
-
filtered_items = []
|
| 186 |
-
|
| 187 |
-
if only_actual_versions:
|
| 188 |
-
# Get the n most recent actual dataset versions
|
| 189 |
-
all_versions = sorted([version.parse(v_str) for v_str in all_items.keys()], reverse=True)
|
| 190 |
-
# Take at most n_versions
|
| 191 |
-
versions_to_consider = all_versions[:n_versions] if all_versions else []
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
# Create row with basic info
|
| 205 |
-
row = {
|
| 206 |
-
'Model': model_name,
|
| 207 |
-
'Embeddings': config.get('embedding_model', 'N/A'),
|
| 208 |
-
'Retriever': config.get('retriever_type', 'N/A'),
|
| 209 |
-
'Top-K': config.get('retrieval_config', {}).get('top_k', 'N/A'),
|
| 210 |
-
'Versions': ", ".join([item["version_str"] for item in filtered_items]),
|
| 211 |
-
'Last Updated': filtered_items[0].get("timestamp", "")
|
| 212 |
-
}
|
| 213 |
-
|
| 214 |
-
# Format timestamp if available
|
| 215 |
-
if row['Last Updated']:
|
| 216 |
-
try:
|
| 217 |
-
dt = datetime.fromisoformat(row['Last Updated'].replace('Z', '+00:00'))
|
| 218 |
-
row['Last Updated'] = dt.strftime("%Y-%m-%d")
|
| 219 |
-
except:
|
| 220 |
-
pass
|
| 221 |
-
|
| 222 |
-
# Process metrics based on categories
|
| 223 |
-
category_metrics = {
|
| 224 |
-
category: {
|
| 225 |
-
metric_type: {
|
| 226 |
-
"avg": 0.0,
|
| 227 |
-
"count": 0
|
| 228 |
-
} for metric_type in METRIC_TYPES
|
| 229 |
-
} for category in QUESTION_CATEGORIES
|
| 230 |
-
}
|
| 231 |
-
|
| 232 |
-
# Collect metrics by category
|
| 233 |
-
for item in filtered_items:
|
| 234 |
-
metrics = item.get("metrics", {})
|
| 235 |
for category in QUESTION_CATEGORIES:
|
| 236 |
if category in metrics:
|
| 237 |
for metric_type in METRIC_TYPES:
|
| 238 |
if metric_type in metrics[category]:
|
| 239 |
metric_values = metrics[category][metric_type]
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
# Calculate averages and add to row
|
| 247 |
-
for category in QUESTION_CATEGORIES:
|
| 248 |
-
for metric_type in METRIC_TYPES:
|
| 249 |
-
metric_data = category_metrics[category][metric_type]
|
| 250 |
-
if metric_data["count"] > 0:
|
| 251 |
-
avg_value = metric_data["avg"] / metric_data["count"]
|
| 252 |
-
# Add to row with appropriate column name
|
| 253 |
-
col_name = f"{category}_{metric_type}"
|
| 254 |
-
row[col_name] = round(avg_value, 4)
|
| 255 |
-
|
| 256 |
-
# Calculate overall averages for each metric type
|
| 257 |
-
for metric_type in METRIC_TYPES:
|
| 258 |
-
total_sum = 0
|
| 259 |
-
total_count = 0
|
| 260 |
|
| 261 |
-
for
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
total_count += metric_data["count"]
|
| 266 |
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
rows.append(row)
|
| 271 |
-
|
| 272 |
-
# Create DataFrame
|
| 273 |
df = pd.DataFrame(rows)
|
| 274 |
|
| 275 |
# Get lists of metrics for each category
|
| 276 |
category_metrics = []
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
|
|
|
| 286 |
# Define retrieval and generation columns for radar charts
|
| 287 |
-
retrieval_metrics = [
|
| 288 |
-
generation_metrics = [
|
|
|
|
|
|
|
|
|
|
| 289 |
|
| 290 |
return df, retrieval_metrics, generation_metrics, category_metrics
|
| 291 |
|
| 292 |
-
def create_radar_chart(df, selected_models, metrics, title):
|
| 293 |
"""Create a radar chart for the selected models and metrics."""
|
| 294 |
if not metrics or len(selected_models) == 0:
|
| 295 |
# Return empty figure if no metrics or models selected
|
|
@@ -335,7 +282,7 @@ def create_radar_chart(df, selected_models, metrics, title):
|
|
| 335 |
categories_loop = categories + [categories[0]]
|
| 336 |
|
| 337 |
fig.add_trace(go.Scatterpolar(
|
| 338 |
-
name=row[
|
| 339 |
r=values,
|
| 340 |
theta=categories_loop,
|
| 341 |
showlegend=True,
|
|
@@ -398,21 +345,23 @@ def create_summary_df(df, retrieval_metrics, generation_metrics):
|
|
| 398 |
generation_avg = summary_df[generation_metrics].mean(axis=1).round(4)
|
| 399 |
summary_df['Generation (avg)'] = generation_avg
|
| 400 |
|
| 401 |
-
# Add total score if
|
| 402 |
-
if 'Retrieval (avg)' in summary_df.columns and 'Generation (avg)' in summary_df.columns:
|
| 403 |
-
summary_df['Total Score'] = summary_df['Retrieval (avg)'
|
| 404 |
summary_df = summary_df.sort_values('Total Score', ascending=False)
|
| 405 |
|
| 406 |
# Select columns for display
|
| 407 |
-
summary_cols = ['Model', 'Embeddings', '
|
|
|
|
|
|
|
| 408 |
if 'Retrieval (avg)' in summary_df.columns:
|
| 409 |
summary_cols.append('Retrieval (avg)')
|
| 410 |
if 'Generation (avg)' in summary_df.columns:
|
| 411 |
summary_cols.append('Generation (avg)')
|
| 412 |
if 'Total Score' in summary_df.columns:
|
| 413 |
summary_cols.append('Total Score')
|
| 414 |
-
if '
|
| 415 |
-
summary_cols.append('
|
| 416 |
if 'Last Updated' in summary_df.columns:
|
| 417 |
summary_cols.append('Last Updated')
|
| 418 |
|
|
@@ -432,7 +381,7 @@ def create_category_df(df, category, retrieval_col, generation_col):
|
|
| 432 |
category_df = category_df.sort_values(f'Score', ascending=False)
|
| 433 |
|
| 434 |
# Select columns for display
|
| 435 |
-
category_cols = ['Model', 'Embeddings',
|
| 436 |
|
| 437 |
# Rename columns for display
|
| 438 |
category_df = category_df[category_cols].rename(columns={
|
|
@@ -458,7 +407,7 @@ default_models = df['Model'].head(5).tolist() if not df.empty else []
|
|
| 458 |
initial_gen_chart_title = LANGUAGES[DEFAULT_LANG]["radar_gen_title"]
|
| 459 |
initial_ret_chart_title = LANGUAGES[DEFAULT_LANG]["radar_ret_title"]
|
| 460 |
initial_gen_chart = create_radar_chart(df, default_models, generation_metrics, initial_gen_chart_title)
|
| 461 |
-
initial_ret_chart = create_radar_chart(df, default_models, retrieval_metrics, initial_ret_chart_title)
|
| 462 |
|
| 463 |
# Create summary dataframe
|
| 464 |
summary_df = create_summary_df(df, retrieval_metrics, generation_metrics)
|
|
@@ -526,7 +475,7 @@ with gr.Blocks(css="""
|
|
| 526 |
}
|
| 527 |
.info-text {
|
| 528 |
font-size: 0.9em;
|
| 529 |
-
font-style: italic;
|
| 530 |
color: #666;
|
| 531 |
margin-top: 5px;
|
| 532 |
}
|
|
@@ -771,7 +720,7 @@ with gr.Blocks(css="""
|
|
| 771 |
print(f"IndexError: row_idx {row_idx} out of bounds for the component's data.")
|
| 772 |
# Potentially return current state without changes
|
| 773 |
gen_chart = create_radar_chart(current_df, selected_models, current_gen_metrics, LANGUAGES[current_lang]["radar_gen_title"])
|
| 774 |
-
ret_chart = create_radar_chart(current_df, selected_models, current_ret_metrics, LANGUAGES[current_lang]["radar_ret_title"])
|
| 775 |
return selected_models, gen_chart, ret_chart
|
| 776 |
except Exception as e:
|
| 777 |
print(f"Error extracting model name: {e}")
|
|
@@ -802,7 +751,7 @@ with gr.Blocks(css="""
|
|
| 802 |
|
| 803 |
# Create radar charts using the current dataframe and metrics
|
| 804 |
gen_chart = create_radar_chart(current_df, selected_models, current_gen_metrics, LANGUAGES[current_lang]["radar_gen_title"])
|
| 805 |
-
ret_chart = create_radar_chart(current_df, selected_models, current_ret_metrics, LANGUAGES[current_lang]["radar_ret_title"])
|
| 806 |
|
| 807 |
return selected_models, gen_chart, ret_chart
|
| 808 |
except Exception as e:
|
|
@@ -810,7 +759,7 @@ with gr.Blocks(css="""
|
|
| 810 |
print(traceback.format_exc())
|
| 811 |
# Return potentially existing chart values if error occurs
|
| 812 |
current_gen_chart = create_radar_chart(df, selected_models, generation_metrics, LANGUAGES[current_lang]["radar_gen_title"])
|
| 813 |
-
current_ret_chart = create_radar_chart(df, selected_models, retrieval_metrics, LANGUAGES[current_lang]["radar_ret_title"])
|
| 814 |
return selected_models, current_gen_chart, current_ret_chart
|
| 815 |
|
| 816 |
|
|
@@ -852,7 +801,7 @@ with gr.Blocks(css="""
|
|
| 852 |
|
| 853 |
# Create charts with localized titles
|
| 854 |
gen_chart_val = create_radar_chart(df, filtered_selected_models, generation_metrics, LANGUAGES[current_lang]["radar_gen_title"])
|
| 855 |
-
ret_chart_val = create_radar_chart(df, filtered_selected_models, retrieval_metrics, LANGUAGES[current_lang]["radar_ret_title"])
|
| 856 |
|
| 857 |
# Create summary dataframe
|
| 858 |
summary_df_val = create_summary_df(df, retrieval_metrics, generation_metrics)
|
|
@@ -916,7 +865,7 @@ with gr.Blocks(css="""
|
|
| 916 |
empty_models = []
|
| 917 |
# Create empty charts with localized titles
|
| 918 |
empty_gen_chart = create_radar_chart(df, empty_models, generation_metrics, LANGUAGES[current_lang]["radar_gen_title"])
|
| 919 |
-
empty_ret_chart = create_radar_chart(df, empty_models, retrieval_metrics, LANGUAGES[current_lang]["radar_ret_title"])
|
| 920 |
return empty_models, empty_gen_chart, empty_ret_chart
|
| 921 |
|
| 922 |
# Connect clear charts button
|
|
@@ -960,7 +909,7 @@ with gr.Blocks(css="""
|
|
| 960 |
for category, title_md in category_title_mds.items()},
|
| 961 |
# Update chart titles dynamically by re-plotting (needed if chart titles change)
|
| 962 |
generation_chart: create_radar_chart(df, selected_models.value, generation_metrics, lang_dict["radar_gen_title"]),
|
| 963 |
-
retrieval_chart: create_radar_chart(df, selected_models.value, retrieval_metrics, lang_dict["radar_ret_title"])
|
| 964 |
}
|
| 965 |
|
| 966 |
# Return updates in the correct order based on outputs list below
|
|
|
|
| 153 |
"""Filter results by version and process them for display."""
|
| 154 |
if not results or "items" not in results:
|
| 155 |
return pd.DataFrame(), [], [], []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
+
all_items = results["items"]
|
| 158 |
|
| 159 |
+
# Get all versions and sort them
|
| 160 |
+
all_versions_sorted = sorted([version.parse(v_str) for v_str in all_items.keys()], reverse=True)
|
| 161 |
+
|
| 162 |
+
# Filter versions to consider based on n_versions slider
|
| 163 |
+
versions_to_consider = all_versions_sorted[:n_versions]
|
| 164 |
+
versions_to_consider_str = {str(v) for v in versions_to_consider}
|
| 165 |
|
| 166 |
+
rows = []
|
| 167 |
for version_str, version_items in all_items.items():
|
| 168 |
+
if version_str not in versions_to_consider_str:
|
| 169 |
+
continue
|
|
|
|
| 170 |
|
| 171 |
+
for guid, item in version_items.items():
|
| 172 |
+
config = item.get("config", {})
|
| 173 |
+
model_name = item.get("model_name", "N/A")
|
| 174 |
+
metrics = item.get("metrics", {})
|
| 175 |
+
judge_metrics = metrics.get("judge", {})
|
| 176 |
|
| 177 |
+
row = {
|
| 178 |
+
'Model': f"{model_name} ({guid[:6]})",
|
| 179 |
+
'Embeddings': config.get('embedding_model', 'N/A'),
|
| 180 |
+
'Top-K': config.get('retrieval_config', {}).get('top_k', 'N/A'),
|
| 181 |
+
'Judge': round(judge_metrics.get("judge_total_score", 0.0) / 2, 4),
|
| 182 |
+
'Version': version_str,
|
| 183 |
+
'Last Updated': item.get("timestamp", ""),
|
| 184 |
+
'guid': guid
|
| 185 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
+
if row['Last Updated']:
|
| 188 |
+
try:
|
| 189 |
+
dt = datetime.fromisoformat(row['Last Updated'].replace('Z', '+00:00'))
|
| 190 |
+
row['Last Updated'] = dt.strftime("%Y-%m-%d")
|
| 191 |
+
except (ValueError, TypeError):
|
| 192 |
+
pass
|
| 193 |
+
|
| 194 |
+
category_sums = {mtype: 0.0 for mtype in METRIC_TYPES}
|
| 195 |
+
category_counts = {mtype: 0 for mtype in METRIC_TYPES}
|
| 196 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
for category in QUESTION_CATEGORIES:
|
| 198 |
if category in metrics:
|
| 199 |
for metric_type in METRIC_TYPES:
|
| 200 |
if metric_type in metrics[category]:
|
| 201 |
metric_values = metrics[category][metric_type]
|
| 202 |
+
if metric_values and len(metric_values) > 0:
|
| 203 |
+
avg_value = sum(metric_values.values()) / len(metric_values)
|
| 204 |
+
col_name = f"{category}_{metric_type}"
|
| 205 |
+
row[col_name] = round(avg_value, 4)
|
| 206 |
+
category_sums[metric_type] += avg_value
|
| 207 |
+
category_counts[metric_type] += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
+
for metric_type in METRIC_TYPES:
|
| 210 |
+
if category_counts[metric_type] > 0:
|
| 211 |
+
avg = category_sums[metric_type] / category_counts[metric_type]
|
| 212 |
+
row[f"{metric_type}_avg"] = round(avg, 4)
|
|
|
|
| 213 |
|
| 214 |
+
rows.append(row)
|
| 215 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
df = pd.DataFrame(rows)
|
| 217 |
|
| 218 |
# Get lists of metrics for each category
|
| 219 |
category_metrics = []
|
| 220 |
+
if not df.empty:
|
| 221 |
+
for category in QUESTION_CATEGORIES:
|
| 222 |
+
metrics_list = []
|
| 223 |
+
for metric_type in METRIC_TYPES:
|
| 224 |
+
col_name = f"{category}_{metric_type}"
|
| 225 |
+
if col_name in df.columns:
|
| 226 |
+
metrics_list.append(col_name)
|
| 227 |
+
if metrics_list:
|
| 228 |
+
category_metrics.append((category, metrics_list))
|
| 229 |
+
|
| 230 |
# Define retrieval and generation columns for radar charts
|
| 231 |
+
retrieval_metrics = []
|
| 232 |
+
generation_metrics = []
|
| 233 |
+
if not df.empty:
|
| 234 |
+
retrieval_metrics = [f"{category}_retrieval" for category, _ in category_metrics if f"{category}_retrieval" in df.columns]
|
| 235 |
+
generation_metrics = [f"{category}_generation" for category, _ in category_metrics if f"{category}_generation" in df.columns]
|
| 236 |
|
| 237 |
return df, retrieval_metrics, generation_metrics, category_metrics
|
| 238 |
|
| 239 |
+
def create_radar_chart(df, selected_models, metrics, title, name_col="Model"):
|
| 240 |
"""Create a radar chart for the selected models and metrics."""
|
| 241 |
if not metrics or len(selected_models) == 0:
|
| 242 |
# Return empty figure if no metrics or models selected
|
|
|
|
| 282 |
categories_loop = categories + [categories[0]]
|
| 283 |
|
| 284 |
fig.add_trace(go.Scatterpolar(
|
| 285 |
+
name=row[name_col],
|
| 286 |
r=values,
|
| 287 |
theta=categories_loop,
|
| 288 |
showlegend=True,
|
|
|
|
| 345 |
generation_avg = summary_df[generation_metrics].mean(axis=1).round(4)
|
| 346 |
summary_df['Generation (avg)'] = generation_avg
|
| 347 |
|
| 348 |
+
# Add total score if all three columns exist
|
| 349 |
+
if 'Retrieval (avg)' in summary_df.columns and 'Generation (avg)' in summary_df.columns and 'Judge' in summary_df.columns:
|
| 350 |
+
summary_df['Total Score'] = summary_df[['Retrieval (avg)', 'Generation (avg)', 'Judge']].mean(axis=1).round(4)
|
| 351 |
summary_df = summary_df.sort_values('Total Score', ascending=False)
|
| 352 |
|
| 353 |
# Select columns for display
|
| 354 |
+
summary_cols = ['Model', 'Embeddings', 'Top-K']
|
| 355 |
+
if 'Judge' in summary_df.columns:
|
| 356 |
+
summary_cols.append('Judge')
|
| 357 |
if 'Retrieval (avg)' in summary_df.columns:
|
| 358 |
summary_cols.append('Retrieval (avg)')
|
| 359 |
if 'Generation (avg)' in summary_df.columns:
|
| 360 |
summary_cols.append('Generation (avg)')
|
| 361 |
if 'Total Score' in summary_df.columns:
|
| 362 |
summary_cols.append('Total Score')
|
| 363 |
+
if 'Version' in summary_df.columns:
|
| 364 |
+
summary_cols.append('Version')
|
| 365 |
if 'Last Updated' in summary_df.columns:
|
| 366 |
summary_cols.append('Last Updated')
|
| 367 |
|
|
|
|
| 381 |
category_df = category_df.sort_values(f'Score', ascending=False)
|
| 382 |
|
| 383 |
# Select columns for display
|
| 384 |
+
category_cols = ['Model', 'Embeddings', retrieval_col, generation_col, f'Score']
|
| 385 |
|
| 386 |
# Rename columns for display
|
| 387 |
category_df = category_df[category_cols].rename(columns={
|
|
|
|
| 407 |
initial_gen_chart_title = LANGUAGES[DEFAULT_LANG]["radar_gen_title"]
|
| 408 |
initial_ret_chart_title = LANGUAGES[DEFAULT_LANG]["radar_ret_title"]
|
| 409 |
initial_gen_chart = create_radar_chart(df, default_models, generation_metrics, initial_gen_chart_title)
|
| 410 |
+
initial_ret_chart = create_radar_chart(df, default_models, retrieval_metrics, initial_ret_chart_title, name_col='Embeddings')
|
| 411 |
|
| 412 |
# Create summary dataframe
|
| 413 |
summary_df = create_summary_df(df, retrieval_metrics, generation_metrics)
|
|
|
|
| 475 |
}
|
| 476 |
.info-text {
|
| 477 |
font-size: 0.9em;
|
| 478 |
+
font-style: italic;
|
| 479 |
color: #666;
|
| 480 |
margin-top: 5px;
|
| 481 |
}
|
|
|
|
| 720 |
print(f"IndexError: row_idx {row_idx} out of bounds for the component's data.")
|
| 721 |
# Potentially return current state without changes
|
| 722 |
gen_chart = create_radar_chart(current_df, selected_models, current_gen_metrics, LANGUAGES[current_lang]["radar_gen_title"])
|
| 723 |
+
ret_chart = create_radar_chart(current_df, selected_models, current_ret_metrics, LANGUAGES[current_lang]["radar_ret_title"], name_col='Embeddings')
|
| 724 |
return selected_models, gen_chart, ret_chart
|
| 725 |
except Exception as e:
|
| 726 |
print(f"Error extracting model name: {e}")
|
|
|
|
| 751 |
|
| 752 |
# Create radar charts using the current dataframe and metrics
|
| 753 |
gen_chart = create_radar_chart(current_df, selected_models, current_gen_metrics, LANGUAGES[current_lang]["radar_gen_title"])
|
| 754 |
+
ret_chart = create_radar_chart(current_df, selected_models, current_ret_metrics, LANGUAGES[current_lang]["radar_ret_title"], name_col='Embeddings')
|
| 755 |
|
| 756 |
return selected_models, gen_chart, ret_chart
|
| 757 |
except Exception as e:
|
|
|
|
| 759 |
print(traceback.format_exc())
|
| 760 |
# Return potentially existing chart values if error occurs
|
| 761 |
current_gen_chart = create_radar_chart(df, selected_models, generation_metrics, LANGUAGES[current_lang]["radar_gen_title"])
|
| 762 |
+
current_ret_chart = create_radar_chart(df, selected_models, retrieval_metrics, LANGUAGES[current_lang]["radar_ret_title"], name_col='Embeddings')
|
| 763 |
return selected_models, current_gen_chart, current_ret_chart
|
| 764 |
|
| 765 |
|
|
|
|
| 801 |
|
| 802 |
# Create charts with localized titles
|
| 803 |
gen_chart_val = create_radar_chart(df, filtered_selected_models, generation_metrics, LANGUAGES[current_lang]["radar_gen_title"])
|
| 804 |
+
ret_chart_val = create_radar_chart(df, filtered_selected_models, retrieval_metrics, LANGUAGES[current_lang]["radar_ret_title"], name_col='Embeddings')
|
| 805 |
|
| 806 |
# Create summary dataframe
|
| 807 |
summary_df_val = create_summary_df(df, retrieval_metrics, generation_metrics)
|
|
|
|
| 865 |
empty_models = []
|
| 866 |
# Create empty charts with localized titles
|
| 867 |
empty_gen_chart = create_radar_chart(df, empty_models, generation_metrics, LANGUAGES[current_lang]["radar_gen_title"])
|
| 868 |
+
empty_ret_chart = create_radar_chart(df, empty_models, retrieval_metrics, LANGUAGES[current_lang]["radar_ret_title"], name_col='Embeddings')
|
| 869 |
return empty_models, empty_gen_chart, empty_ret_chart
|
| 870 |
|
| 871 |
# Connect clear charts button
|
|
|
|
| 909 |
for category, title_md in category_title_mds.items()},
|
| 910 |
# Update chart titles dynamically by re-plotting (needed if chart titles change)
|
| 911 |
generation_chart: create_radar_chart(df, selected_models.value, generation_metrics, lang_dict["radar_gen_title"]),
|
| 912 |
+
retrieval_chart: create_radar_chart(df, selected_models.value, retrieval_metrics, lang_dict["radar_ret_title"], name_col='Embeddings')
|
| 913 |
}
|
| 914 |
|
| 915 |
# Return updates in the correct order based on outputs list below
|