Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app.py
|
| 2 |
import subprocess
|
| 3 |
import sys
|
| 4 |
import os
|
|
@@ -57,10 +57,6 @@ if not setup_salt():
|
|
| 57 |
print("π‘ Please check that git is available and GitHub is accessible")
|
| 58 |
sys.exit(1)
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
import gradio as gr
|
| 65 |
import pandas as pd
|
| 66 |
import json
|
|
@@ -74,7 +70,7 @@ from src.validation import validate_submission_complete
|
|
| 74 |
from src.evaluation import evaluate_predictions, generate_evaluation_report, get_google_translate_baseline
|
| 75 |
from src.leaderboard import (
|
| 76 |
load_leaderboard, add_model_to_leaderboard, get_leaderboard_stats,
|
| 77 |
-
filter_leaderboard, export_leaderboard, get_model_comparison
|
| 78 |
)
|
| 79 |
from src.plotting import (
|
| 80 |
create_leaderboard_ranking_plot, create_metrics_comparison_plot,
|
|
@@ -131,26 +127,26 @@ def download_test_set() -> Tuple[str, str]:
|
|
| 131 |
|
| 132 |
# Create info message
|
| 133 |
info_msg = f"""
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
"""
|
| 155 |
|
| 156 |
return download_path, info_msg
|
|
@@ -159,7 +155,6 @@ def download_test_set() -> Tuple[str, str]:
|
|
| 159 |
error_msg = f"β Error creating test set download: {str(e)}"
|
| 160 |
return None, error_msg
|
| 161 |
|
| 162 |
-
|
| 163 |
def validate_submission(file, model_name: str, author: str, description: str) -> Tuple[str, Optional[pd.DataFrame]]:
|
| 164 |
"""Validate uploaded prediction file, supporting str paths, bytes, and Gradio wrappers."""
|
| 165 |
try:
|
|
@@ -213,8 +208,6 @@ def validate_submission(file, model_name: str, author: str, description: str) ->
|
|
| 213 |
None,
|
| 214 |
)
|
| 215 |
|
| 216 |
-
|
| 217 |
-
|
| 218 |
def evaluate_submission(
|
| 219 |
predictions_df: pd.DataFrame,
|
| 220 |
model_name: str,
|
|
@@ -268,24 +261,24 @@ def evaluate_submission(
|
|
| 268 |
total_models = len(updated_leaderboard)
|
| 269 |
|
| 270 |
success_msg = f"""
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
"""
|
| 287 |
|
| 288 |
-
return success_msg, updated_leaderboard, summary_plot, ranking_plot
|
| 289 |
|
| 290 |
except Exception as e:
|
| 291 |
error_msg = f"β Evaluation failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
|
|
@@ -313,6 +306,9 @@ def refresh_leaderboard_display(
|
|
| 313 |
google_comparable_only=google_only
|
| 314 |
)
|
| 315 |
|
|
|
|
|
|
|
|
|
|
| 316 |
# Create plots
|
| 317 |
ranking_plot = create_leaderboard_ranking_plot(filtered_df)
|
| 318 |
comparison_plot = create_metrics_comparison_plot(filtered_df)
|
|
@@ -320,17 +316,17 @@ def refresh_leaderboard_display(
|
|
| 320 |
# Get stats
|
| 321 |
stats = get_leaderboard_stats(filtered_df)
|
| 322 |
stats_text = f"""
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
"""
|
| 332 |
|
| 333 |
-
return
|
| 334 |
|
| 335 |
except Exception as e:
|
| 336 |
error_msg = f"Error loading leaderboard: {str(e)}"
|
|
@@ -364,31 +360,31 @@ def get_model_details(model_name: str) -> Tuple[str, object]:
|
|
| 364 |
|
| 365 |
# Format model details
|
| 366 |
details_text = f"""
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
"""
|
| 393 |
|
| 394 |
return details_text, detail_plot
|
|
@@ -443,15 +439,11 @@ with gr.Blocks(
|
|
| 443 |
) as demo:
|
| 444 |
|
| 445 |
# Header
|
| 446 |
-
gr.
|
| 447 |
<div class="main-header">
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
{DESCRIPTION}
|
| 452 |
-
|
| 453 |
-
**Supported Languages**: {len(ALL_UG40_LANGUAGES)} Ugandan languages | **Google Comparable**: {len(GOOGLE_SUPPORTED_LANGUAGES)} languages
|
| 454 |
-
|
| 455 |
</div>
|
| 456 |
""")
|
| 457 |
|
|
@@ -541,7 +533,6 @@ with gr.Blocks(
|
|
| 541 |
gr.Markdown("### π€ Upload Predictions")
|
| 542 |
gr.Markdown("Upload a CSV/TSV/JSON file with your model's predictions")
|
| 543 |
|
| 544 |
-
|
| 545 |
predictions_file = gr.File(
|
| 546 |
label="π Predictions File",
|
| 547 |
file_types=[".csv", ".tsv", ".json"]
|
|
@@ -645,10 +636,10 @@ with gr.Blocks(
|
|
| 645 |
|
| 646 |
## π£οΈ Supported Languages
|
| 647 |
|
| 648 |
-
**All UG40 Languages ({len(ALL_UG40_LANGUAGES)} total):**
|
| 649 |
{', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in ALL_UG40_LANGUAGES])}
|
| 650 |
|
| 651 |
-
**Google Translate Comparable ({len(GOOGLE_SUPPORTED_LANGUAGES)} languages):**
|
| 652 |
{', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in GOOGLE_SUPPORTED_LANGUAGES])}
|
| 653 |
|
| 654 |
## π Evaluation Metrics
|
|
@@ -720,7 +711,7 @@ with gr.Blocks(
|
|
| 720 |
|
| 721 |
This leaderboard is maintained by [Sunbird AI](https://sunbird.ai).
|
| 722 |
|
| 723 |
-
**Contact**: [research@sunbird.ai](mailto:research@sunbird.ai)
|
| 724 |
**GitHub**: [Sunbird AI GitHub](https://github.com/sunbirdai)
|
| 725 |
|
| 726 |
## π Citation
|
|
@@ -753,21 +744,12 @@ with gr.Blocks(
|
|
| 753 |
outputs=[download_file, download_info]
|
| 754 |
)
|
| 755 |
|
| 756 |
-
#
|
| 757 |
-
# def handle_validation(file, model_name, author, description):
|
| 758 |
-
# report, predictions = validate_submission(file, model_name, author, description)
|
| 759 |
-
# is_valid = predictions is not None
|
| 760 |
-
# return report, predictions, predictions, is_valid
|
| 761 |
-
|
| 762 |
def handle_validation(file, model_name, author, description):
|
| 763 |
report, predictions = validate_submission(file, model_name, author, description)
|
| 764 |
valid = predictions is not None
|
| 765 |
|
| 766 |
# Build the four returns:
|
| 767 |
-
# 1) report Markdown
|
| 768 |
-
# 2) store predictions in state
|
| 769 |
-
# 3) store validation info in state
|
| 770 |
-
# 4) enable or disable the submit button
|
| 771 |
if valid:
|
| 772 |
return (
|
| 773 |
report,
|
|
@@ -782,16 +764,12 @@ with gr.Blocks(
|
|
| 782 |
None,
|
| 783 |
gr.update(interactive=False) # <β this *disables* the button
|
| 784 |
)
|
| 785 |
-
|
| 786 |
|
| 787 |
validate_btn.click(
|
| 788 |
fn=handle_validation,
|
| 789 |
inputs=[predictions_file, model_name_input, author_input, description_input],
|
| 790 |
outputs=[validation_output, predictions_validated, validation_info_state, submit_btn]
|
| 791 |
)
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
|
| 796 |
# Submit for evaluation
|
| 797 |
def handle_submission(predictions, model_name, author, description, validation_info):
|
|
@@ -817,7 +795,10 @@ with gr.Blocks(
|
|
| 817 |
table, plot1, plot2, stats = refresh_leaderboard_display(*args)
|
| 818 |
|
| 819 |
# Update model dropdown choices
|
| 820 |
-
|
|
|
|
|
|
|
|
|
|
| 821 |
|
| 822 |
return table, plot1, plot2, stats, gr.Dropdown(choices=model_choices)
|
| 823 |
|
|
|
|
| 1 |
+
# app.py - Fixed version with better formatting and display
|
| 2 |
import subprocess
|
| 3 |
import sys
|
| 4 |
import os
|
|
|
|
| 57 |
print("π‘ Please check that git is available and GitHub is accessible")
|
| 58 |
sys.exit(1)
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
import gradio as gr
|
| 61 |
import pandas as pd
|
| 62 |
import json
|
|
|
|
| 70 |
from src.evaluation import evaluate_predictions, generate_evaluation_report, get_google_translate_baseline
|
| 71 |
from src.leaderboard import (
|
| 72 |
load_leaderboard, add_model_to_leaderboard, get_leaderboard_stats,
|
| 73 |
+
filter_leaderboard, export_leaderboard, get_model_comparison, prepare_leaderboard_display
|
| 74 |
)
|
| 75 |
from src.plotting import (
|
| 76 |
create_leaderboard_ranking_plot, create_metrics_comparison_plot,
|
|
|
|
| 127 |
|
| 128 |
# Create info message
|
| 129 |
info_msg = f"""
|
| 130 |
+
## π₯ SALT Test Set Downloaded Successfully!
|
| 131 |
+
|
| 132 |
+
### Dataset Statistics:
|
| 133 |
+
- **Total Samples**: {stats['total_samples']:,}
|
| 134 |
+
- **Language Pairs**: {stats['language_pairs']}
|
| 135 |
+
- **Google Comparable**: {stats['google_comparable_samples']:,} samples
|
| 136 |
+
- **Languages**: {', '.join(stats['languages'])}
|
| 137 |
+
|
| 138 |
+
### File Format:
|
| 139 |
+
- `sample_id`: Unique identifier for each sample
|
| 140 |
+
- `source_text`: Text to be translated
|
| 141 |
+
- `source_language`: Source language code
|
| 142 |
+
- `target_language`: Target language code
|
| 143 |
+
- `domain`: Content domain (if available)
|
| 144 |
+
- `google_comparable`: Whether this pair can be compared with Google Translate
|
| 145 |
+
|
| 146 |
+
### Next Steps:
|
| 147 |
+
1. Run your model on the source texts
|
| 148 |
+
2. Create a CSV/JSON file with columns: `sample_id`, `prediction`
|
| 149 |
+
3. Upload your predictions using the "Submit Predictions" tab
|
| 150 |
"""
|
| 151 |
|
| 152 |
return download_path, info_msg
|
|
|
|
| 155 |
error_msg = f"β Error creating test set download: {str(e)}"
|
| 156 |
return None, error_msg
|
| 157 |
|
|
|
|
| 158 |
def validate_submission(file, model_name: str, author: str, description: str) -> Tuple[str, Optional[pd.DataFrame]]:
|
| 159 |
"""Validate uploaded prediction file, supporting str paths, bytes, and Gradio wrappers."""
|
| 160 |
try:
|
|
|
|
| 208 |
None,
|
| 209 |
)
|
| 210 |
|
|
|
|
|
|
|
| 211 |
def evaluate_submission(
|
| 212 |
predictions_df: pd.DataFrame,
|
| 213 |
model_name: str,
|
|
|
|
| 261 |
total_models = len(updated_leaderboard)
|
| 262 |
|
| 263 |
success_msg = f"""
|
| 264 |
+
## π Evaluation Complete!
|
| 265 |
+
|
| 266 |
+
### Your Results:
|
| 267 |
+
- **Model**: {model_name}
|
| 268 |
+
- **Rank**: #{rank} out of {total_models} models
|
| 269 |
+
- **Quality Score**: {evaluation_results['averages'].get('quality_score', 0):.4f}
|
| 270 |
+
- **BLEU**: {evaluation_results['averages'].get('bleu', 0):.2f}
|
| 271 |
+
- **ChrF**: {evaluation_results['averages'].get('chrf', 0):.4f}
|
| 272 |
+
|
| 273 |
+
### Coverage:
|
| 274 |
+
- **Samples Evaluated**: {evaluation_results['evaluated_samples']:,}
|
| 275 |
+
- **Language Pairs**: {evaluation_results['summary']['language_pairs_covered']}
|
| 276 |
+
- **Google Comparable**: {evaluation_results['summary']['google_comparable_pairs']} pairs
|
| 277 |
+
|
| 278 |
+
{report}
|
| 279 |
"""
|
| 280 |
|
| 281 |
+
return success_msg, prepare_leaderboard_display(updated_leaderboard), summary_plot, ranking_plot
|
| 282 |
|
| 283 |
except Exception as e:
|
| 284 |
error_msg = f"β Evaluation failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
|
|
|
|
| 306 |
google_comparable_only=google_only
|
| 307 |
)
|
| 308 |
|
| 309 |
+
# Prepare for display (removes detailed_metrics column)
|
| 310 |
+
display_df = prepare_leaderboard_display(filtered_df)
|
| 311 |
+
|
| 312 |
# Create plots
|
| 313 |
ranking_plot = create_leaderboard_ranking_plot(filtered_df)
|
| 314 |
comparison_plot = create_metrics_comparison_plot(filtered_df)
|
|
|
|
| 316 |
# Get stats
|
| 317 |
stats = get_leaderboard_stats(filtered_df)
|
| 318 |
stats_text = f"""
|
| 319 |
+
### π Leaderboard Statistics
|
| 320 |
+
|
| 321 |
+
- **Total Models**: {stats['total_models']}
|
| 322 |
+
- **Average Quality Score**: {stats['avg_quality_score']:.4f}
|
| 323 |
+
- **Google Comparable Models**: {stats['google_comparable_models']}
|
| 324 |
+
|
| 325 |
+
**Best Model**: {stats['best_model']['name'] if stats['best_model'] else 'None'}
|
| 326 |
+
**Latest Submission**: {stats['latest_submission'][:10] if stats['latest_submission'] else 'None'}
|
| 327 |
"""
|
| 328 |
|
| 329 |
+
return display_df, ranking_plot, comparison_plot, stats_text
|
| 330 |
|
| 331 |
except Exception as e:
|
| 332 |
error_msg = f"Error loading leaderboard: {str(e)}"
|
|
|
|
| 360 |
|
| 361 |
# Format model details
|
| 362 |
details_text = f"""
|
| 363 |
+
## π Model Details: {model_name}
|
| 364 |
+
|
| 365 |
+
### Basic Information:
|
| 366 |
+
- **Author**: {model_info['author']}
|
| 367 |
+
- **Submission Date**: {model_info['submission_date'][:10]}
|
| 368 |
+
- **Model Type**: {model_info['model_type']}
|
| 369 |
+
- **Description**: {model_info['description'] or 'No description provided'}
|
| 370 |
+
|
| 371 |
+
### Performance Metrics:
|
| 372 |
+
- **Quality Score**: {model_info['quality_score']:.4f}
|
| 373 |
+
- **BLEU**: {model_info['bleu']:.2f}
|
| 374 |
+
- **ChrF**: {model_info['chrf']:.4f}
|
| 375 |
+
- **ROUGE-1**: {model_info['rouge1']:.4f}
|
| 376 |
+
- **ROUGE-L**: {model_info['rougeL']:.4f}
|
| 377 |
+
|
| 378 |
+
### Coverage Information:
|
| 379 |
+
- **Total Samples**: {model_info['total_samples']:,}
|
| 380 |
+
- **Language Pairs Covered**: {model_info['language_pairs_covered']}
|
| 381 |
+
- **Google Comparable Pairs**: {model_info['google_pairs_covered']}
|
| 382 |
+
- **Coverage Rate**: {model_info['coverage_rate']:.1%}
|
| 383 |
+
|
| 384 |
+
### Google Translate Comparison:
|
| 385 |
+
- **Google Quality Score**: {model_info['google_quality_score']:.4f}
|
| 386 |
+
- **Google BLEU**: {model_info['google_bleu']:.2f}
|
| 387 |
+
- **Google ChrF**: {model_info['google_chrf']:.4f}
|
| 388 |
"""
|
| 389 |
|
| 390 |
return details_text, detail_plot
|
|
|
|
| 439 |
) as demo:
|
| 440 |
|
| 441 |
# Header
|
| 442 |
+
gr.HTML(f"""
|
| 443 |
<div class="main-header">
|
| 444 |
+
<h1>{TITLE}</h1>
|
| 445 |
+
<p>{DESCRIPTION}</p>
|
| 446 |
+
<p><strong>Supported Languages</strong>: {len(ALL_UG40_LANGUAGES)} Ugandan languages | <strong>Google Comparable</strong>: {len(GOOGLE_SUPPORTED_LANGUAGES)} languages</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
</div>
|
| 448 |
""")
|
| 449 |
|
|
|
|
| 533 |
gr.Markdown("### π€ Upload Predictions")
|
| 534 |
gr.Markdown("Upload a CSV/TSV/JSON file with your model's predictions")
|
| 535 |
|
|
|
|
| 536 |
predictions_file = gr.File(
|
| 537 |
label="π Predictions File",
|
| 538 |
file_types=[".csv", ".tsv", ".json"]
|
|
|
|
| 636 |
|
| 637 |
## π£οΈ Supported Languages
|
| 638 |
|
| 639 |
+
**All UG40 Languages ({len(ALL_UG40_LANGUAGES)} total):**
|
| 640 |
{', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in ALL_UG40_LANGUAGES])}
|
| 641 |
|
| 642 |
+
**Google Translate Comparable ({len(GOOGLE_SUPPORTED_LANGUAGES)} languages):**
|
| 643 |
{', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in GOOGLE_SUPPORTED_LANGUAGES])}
|
| 644 |
|
| 645 |
## π Evaluation Metrics
|
|
|
|
| 711 |
|
| 712 |
This leaderboard is maintained by [Sunbird AI](https://sunbird.ai).
|
| 713 |
|
| 714 |
+
**Contact**: [research@sunbird.ai](mailto:research@sunbird.ai)
|
| 715 |
**GitHub**: [Sunbird AI GitHub](https://github.com/sunbirdai)
|
| 716 |
|
| 717 |
## π Citation
|
|
|
|
| 744 |
outputs=[download_file, download_info]
|
| 745 |
)
|
| 746 |
|
| 747 |
+
# Validate predictions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 748 |
def handle_validation(file, model_name, author, description):
|
| 749 |
report, predictions = validate_submission(file, model_name, author, description)
|
| 750 |
valid = predictions is not None
|
| 751 |
|
| 752 |
# Build the four returns:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 753 |
if valid:
|
| 754 |
return (
|
| 755 |
report,
|
|
|
|
| 764 |
None,
|
| 765 |
gr.update(interactive=False) # <β this *disables* the button
|
| 766 |
)
|
|
|
|
| 767 |
|
| 768 |
validate_btn.click(
|
| 769 |
fn=handle_validation,
|
| 770 |
inputs=[predictions_file, model_name_input, author_input, description_input],
|
| 771 |
outputs=[validation_output, predictions_validated, validation_info_state, submit_btn]
|
| 772 |
)
|
|
|
|
|
|
|
|
|
|
| 773 |
|
| 774 |
# Submit for evaluation
|
| 775 |
def handle_submission(predictions, model_name, author, description, validation_info):
|
|
|
|
| 795 |
table, plot1, plot2, stats = refresh_leaderboard_display(*args)
|
| 796 |
|
| 797 |
# Update model dropdown choices
|
| 798 |
+
if current_leaderboard is not None and not current_leaderboard.empty:
|
| 799 |
+
model_choices = current_leaderboard['model_name'].tolist()
|
| 800 |
+
else:
|
| 801 |
+
model_choices = []
|
| 802 |
|
| 803 |
return table, plot1, plot2, stats, gr.Dropdown(choices=model_choices)
|
| 804 |
|