Spaces:

akera
/

leaderboard

Sleeping

App Files Files Community

akera commited on Jun 13, 2025

Commit

3739a4f

verified ·

1 Parent(s): ce626d3

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -106

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py
 import subprocess
 import sys
 import os
@@ -57,10 +57,6 @@ if not setup_salt():
     print("💡 Please check that git is available and GitHub is accessible")
     sys.exit(1)
 import gradio as gr
 import pandas as pd
 import json
@@ -74,7 +70,7 @@ from src.validation import validate_submission_complete
 from src.evaluation import evaluate_predictions, generate_evaluation_report, get_google_translate_baseline
 from src.leaderboard import (
     load_leaderboard, add_model_to_leaderboard, get_leaderboard_stats,
-    filter_leaderboard, export_leaderboard, get_model_comparison
 )
 from src.plotting import (
     create_leaderboard_ranking_plot, create_metrics_comparison_plot,
@@ -131,26 +127,26 @@ def download_test_set() -> Tuple[str, str]:
         # Create info message
         info_msg = f"""
-        📥 **SALT Test Set Downloaded Successfully!**
-        **Dataset Statistics:**
-        - **Total Samples**: {stats['total_samples']:,}
-        - **Language Pairs**: {stats['language_pairs']}
-        - **Google Comparable**: {stats['google_comparable_samples']:,} samples
-        - **Languages**: {', '.join(stats['languages'])}
-        **File Format:**
-        - `sample_id`: Unique identifier for each sample
-        - `source_text`: Text to be translated
-        - `source_language`: Source language code
-        - `target_language`: Target language code
-        - `domain`: Content domain (if available)
-        - `google_comparable`: Whether this pair can be compared with Google Translate
-        **Next Steps:**
-        1. Run your model on the source texts
-        2. Create a CSV/JSON file with columns: `sample_id`, `prediction`
-        3. Upload your predictions using the "Submit Predictions" tab
         """
         return download_path, info_msg
@@ -159,7 +155,6 @@ def download_test_set() -> Tuple[str, str]:
         error_msg = f"❌ Error creating test set download: {str(e)}"
         return None, error_msg
 def validate_submission(file, model_name: str, author: str, description: str) -> Tuple[str, Optional[pd.DataFrame]]:
     """Validate uploaded prediction file, supporting str paths, bytes, and Gradio wrappers."""
     try:
@@ -213,8 +208,6 @@ def validate_submission(file, model_name: str, author: str, description: str) ->
             None,
         )
 def evaluate_submission(
     predictions_df: pd.DataFrame,
     model_name: str,
@@ -268,24 +261,24 @@ def evaluate_submission(
         total_models = len(updated_leaderboard)
         success_msg = f"""
-        🎉 **Evaluation Complete!**
-        **Your Results:**
-        - **Model**: {model_name}
-        - **Rank**: #{rank} out of {total_models} models
-        - **Quality Score**: {evaluation_results['averages'].get('quality_score', 0):.4f}
-        - **BLEU**: {evaluation_results['averages'].get('bleu', 0):.2f}
-        - **ChrF**: {evaluation_results['averages'].get('chrf', 0):.4f}
-        **Coverage:**
-        - **Samples Evaluated**: {evaluation_results['evaluated_samples']:,}
-        - **Language Pairs**: {evaluation_results['summary']['language_pairs_covered']}
-        - **Google Comparable**: {evaluation_results['summary']['google_comparable_pairs']} pairs
-        {report}
         """
-        return success_msg, updated_leaderboard, summary_plot, ranking_plot
     except Exception as e:
         error_msg = f"❌ Evaluation failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
@@ -313,6 +306,9 @@ def refresh_leaderboard_display(
             google_comparable_only=google_only
         )
         # Create plots
         ranking_plot = create_leaderboard_ranking_plot(filtered_df)
         comparison_plot = create_metrics_comparison_plot(filtered_df)
@@ -320,17 +316,17 @@ def refresh_leaderboard_display(
         # Get stats
         stats = get_leaderboard_stats(filtered_df)
         stats_text = f"""
-        📊 **Leaderboard Statistics**
-        - **Total Models**: {stats['total_models']}
-        - **Average Quality Score**: {stats['avg_quality_score']:.4f}
-        - **Google Comparable Models**: {stats['google_comparable_models']}
-        **Best Model**: {stats['best_model']['name'] if stats['best_model'] else 'None'}
-        **Latest Submission**: {stats['latest_submission'][:10] if stats['latest_submission'] else 'None'}
         """
-        return filtered_df, ranking_plot, comparison_plot, stats_text
     except Exception as e:
         error_msg = f"Error loading leaderboard: {str(e)}"
@@ -364,31 +360,31 @@ def get_model_details(model_name: str) -> Tuple[str, object]:
         # Format model details
         details_text = f"""
-        # 🔍 Model Details: {model_name}
-        **Basic Information:**
-        - **Author**: {model_info['author']}
-        - **Submission Date**: {model_info['submission_date'][:10]}
-        - **Model Type**: {model_info['model_type']}
-        - **Description**: {model_info['description'] or 'No description provided'}
-        **Performance Metrics:**
-        - **Quality Score**: {model_info['quality_score']:.4f}
-        - **BLEU**: {model_info['bleu']:.2f}
-        - **ChrF**: {model_info['chrf']:.4f}
-        - **ROUGE-1**: {model_info['rouge1']:.4f}
-        - **ROUGE-L**: {model_info['rougeL']:.4f}
-        **Coverage Information:**
-        - **Total Samples**: {model_info['total_samples']:,}
-        - **Language Pairs Covered**: {model_info['language_pairs_covered']}
-        - **Google Comparable Pairs**: {model_info['google_pairs_covered']}
-        - **Coverage Rate**: {model_info['coverage_rate']:.1%}
-        **Google Translate Comparison:**
-        - **Google Quality Score**: {model_info['google_quality_score']:.4f}
-        - **Google BLEU**: {model_info['google_bleu']:.2f}
-        - **Google ChrF**: {model_info['google_chrf']:.4f}
         """
         return details_text, detail_plot
@@ -443,15 +439,11 @@ with gr.Blocks(
 ) as demo:
     # Header
-    gr.Markdown(f"""
     <div class="main-header">
-    # {TITLE}
-    {DESCRIPTION}
-    **Supported Languages**: {len(ALL_UG40_LANGUAGES)} Ugandan languages | **Google Comparable**: {len(GOOGLE_SUPPORTED_LANGUAGES)} languages
     </div>
     """)
@@ -541,7 +533,6 @@ with gr.Blocks(
                     gr.Markdown("### 📤 Upload Predictions")
                     gr.Markdown("Upload a CSV/TSV/JSON file with your model's predictions")
                     predictions_file = gr.File(
                         label="📂 Predictions File",
                         file_types=[".csv", ".tsv", ".json"]
@@ -645,10 +636,10 @@ with gr.Blocks(
             ## 🗣️ Supported Languages
-            **All UG40 Languages ({len(ALL_UG40_LANGUAGES)} total):**
             {', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in ALL_UG40_LANGUAGES])}
-            **Google Translate Comparable ({len(GOOGLE_SUPPORTED_LANGUAGES)} languages):**
             {', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in GOOGLE_SUPPORTED_LANGUAGES])}
             ## 📊 Evaluation Metrics
@@ -720,7 +711,7 @@ with gr.Blocks(
             This leaderboard is maintained by [Sunbird AI](https://sunbird.ai).
-            **Contact**: [research@sunbird.ai](mailto:research@sunbird.ai)
             **GitHub**: [Sunbird AI GitHub](https://github.com/sunbirdai)
             ## 📄 Citation
@@ -753,21 +744,12 @@ with gr.Blocks(
         outputs=[download_file, download_info]
     )
-    # # Validate predictions
-    # def handle_validation(file, model_name, author, description):
-    #     report, predictions = validate_submission(file, model_name, author, description)
-    #     is_valid = predictions is not None
-    #     return report, predictions, predictions, is_valid
     def handle_validation(file, model_name, author, description):
         report, predictions = validate_submission(file, model_name, author, description)
         valid = predictions is not None
         # Build the four returns:
-        #  1) report Markdown
-        #  2) store predictions in state
-        #  3) store validation info in state
-        #  4) enable or disable the submit button
         if valid:
             return (
                 report,
@@ -782,16 +764,12 @@ with gr.Blocks(
                 None,
                 gr.update(interactive=False)   # <— this *disables* the button
             )
     validate_btn.click(
         fn=handle_validation,
         inputs=[predictions_file, model_name_input, author_input, description_input],
         outputs=[validation_output, predictions_validated, validation_info_state, submit_btn]
     )
     # Submit for evaluation
     def handle_submission(predictions, model_name, author, description, validation_info):
@@ -817,7 +795,10 @@ with gr.Blocks(
         table, plot1, plot2, stats = refresh_leaderboard_display(*args)
         # Update model dropdown choices
-        model_choices = table['model_name'].tolist() if not table.empty else []
         return table, plot1, plot2, stats, gr.Dropdown(choices=model_choices)

+# app.py - Fixed version with better formatting and display
 import subprocess
 import sys
 import os
     print("💡 Please check that git is available and GitHub is accessible")
     sys.exit(1)
 import gradio as gr
 import pandas as pd
 import json
 from src.evaluation import evaluate_predictions, generate_evaluation_report, get_google_translate_baseline
 from src.leaderboard import (
     load_leaderboard, add_model_to_leaderboard, get_leaderboard_stats,
+    filter_leaderboard, export_leaderboard, get_model_comparison, prepare_leaderboard_display
 )
 from src.plotting import (
     create_leaderboard_ranking_plot, create_metrics_comparison_plot,
         # Create info message
         info_msg = f"""
+## 📥 SALT Test Set Downloaded Successfully!
+### Dataset Statistics:
+- **Total Samples**: {stats['total_samples']:,}
+- **Language Pairs**: {stats['language_pairs']}
+- **Google Comparable**: {stats['google_comparable_samples']:,} samples
+- **Languages**: {', '.join(stats['languages'])}
+### File Format:
+- `sample_id`: Unique identifier for each sample
+- `source_text`: Text to be translated
+- `source_language`: Source language code
+- `target_language`: Target language code
+- `domain`: Content domain (if available)
+- `google_comparable`: Whether this pair can be compared with Google Translate
+### Next Steps:
+1. Run your model on the source texts
+2. Create a CSV/JSON file with columns: `sample_id`, `prediction`
+3. Upload your predictions using the "Submit Predictions" tab
         """
         return download_path, info_msg
         error_msg = f"❌ Error creating test set download: {str(e)}"
         return None, error_msg
 def validate_submission(file, model_name: str, author: str, description: str) -> Tuple[str, Optional[pd.DataFrame]]:
     """Validate uploaded prediction file, supporting str paths, bytes, and Gradio wrappers."""
     try:
             None,
         )
 def evaluate_submission(
     predictions_df: pd.DataFrame,
     model_name: str,
         total_models = len(updated_leaderboard)
         success_msg = f"""
+## 🎉 Evaluation Complete!
+### Your Results:
+- **Model**: {model_name}
+- **Rank**: #{rank} out of {total_models} models
+- **Quality Score**: {evaluation_results['averages'].get('quality_score', 0):.4f}
+- **BLEU**: {evaluation_results['averages'].get('bleu', 0):.2f}
+- **ChrF**: {evaluation_results['averages'].get('chrf', 0):.4f}
+### Coverage:
+- **Samples Evaluated**: {evaluation_results['evaluated_samples']:,}
+- **Language Pairs**: {evaluation_results['summary']['language_pairs_covered']}
+- **Google Comparable**: {evaluation_results['summary']['google_comparable_pairs']} pairs
+{report}
         """
+        return success_msg, prepare_leaderboard_display(updated_leaderboard), summary_plot, ranking_plot
     except Exception as e:
         error_msg = f"❌ Evaluation failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
             google_comparable_only=google_only
         )
+        # Prepare for display (removes detailed_metrics column)
+        display_df = prepare_leaderboard_display(filtered_df)
         # Create plots
         ranking_plot = create_leaderboard_ranking_plot(filtered_df)
         comparison_plot = create_metrics_comparison_plot(filtered_df)
         # Get stats
         stats = get_leaderboard_stats(filtered_df)
         stats_text = f"""
+### 📊 Leaderboard Statistics
+- **Total Models**: {stats['total_models']}
+- **Average Quality Score**: {stats['avg_quality_score']:.4f}
+- **Google Comparable Models**: {stats['google_comparable_models']}
+**Best Model**: {stats['best_model']['name'] if stats['best_model'] else 'None'}
+**Latest Submission**: {stats['latest_submission'][:10] if stats['latest_submission'] else 'None'}
         """
+        return display_df, ranking_plot, comparison_plot, stats_text
     except Exception as e:
         error_msg = f"Error loading leaderboard: {str(e)}"
         # Format model details
         details_text = f"""
+## 🔍 Model Details: {model_name}
+### Basic Information:
+- **Author**: {model_info['author']}
+- **Submission Date**: {model_info['submission_date'][:10]}
+- **Model Type**: {model_info['model_type']}
+- **Description**: {model_info['description'] or 'No description provided'}
+### Performance Metrics:
+- **Quality Score**: {model_info['quality_score']:.4f}
+- **BLEU**: {model_info['bleu']:.2f}
+- **ChrF**: {model_info['chrf']:.4f}
+- **ROUGE-1**: {model_info['rouge1']:.4f}
+- **ROUGE-L**: {model_info['rougeL']:.4f}
+### Coverage Information:
+- **Total Samples**: {model_info['total_samples']:,}
+- **Language Pairs Covered**: {model_info['language_pairs_covered']}
+- **Google Comparable Pairs**: {model_info['google_pairs_covered']}
+- **Coverage Rate**: {model_info['coverage_rate']:.1%}
+### Google Translate Comparison:
+- **Google Quality Score**: {model_info['google_quality_score']:.4f}
+- **Google BLEU**: {model_info['google_bleu']:.2f}
+- **Google ChrF**: {model_info['google_chrf']:.4f}
         """
         return details_text, detail_plot
 ) as demo:
     # Header
+    gr.HTML(f"""
     <div class="main-header">
+    <h1>{TITLE}</h1>
+    <p>{DESCRIPTION}</p>
+    <p><strong>Supported Languages</strong>: {len(ALL_UG40_LANGUAGES)} Ugandan languages | <strong>Google Comparable</strong>: {len(GOOGLE_SUPPORTED_LANGUAGES)} languages</p>
     </div>
     """)
                     gr.Markdown("### 📤 Upload Predictions")
                     gr.Markdown("Upload a CSV/TSV/JSON file with your model's predictions")
                     predictions_file = gr.File(
                         label="📂 Predictions File",
                         file_types=[".csv", ".tsv", ".json"]
             ## 🗣️ Supported Languages
+            **All UG40 Languages ({len(ALL_UG40_LANGUAGES)} total):**
             {', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in ALL_UG40_LANGUAGES])}
+            **Google Translate Comparable ({len(GOOGLE_SUPPORTED_LANGUAGES)} languages):**
             {', '.join([f"{code} ({LANGUAGE_NAMES.get(code, code)})" for code in GOOGLE_SUPPORTED_LANGUAGES])}
             ## 📊 Evaluation Metrics
             This leaderboard is maintained by [Sunbird AI](https://sunbird.ai).
+            **Contact**: [research@sunbird.ai](mailto:research@sunbird.ai)
             **GitHub**: [Sunbird AI GitHub](https://github.com/sunbirdai)
             ## 📄 Citation
         outputs=[download_file, download_info]
     )
+    # Validate predictions
     def handle_validation(file, model_name, author, description):
         report, predictions = validate_submission(file, model_name, author, description)
         valid = predictions is not None
         # Build the four returns:
         if valid:
             return (
                 report,
                 None,
                 gr.update(interactive=False)   # <— this *disables* the button
             )
     validate_btn.click(
         fn=handle_validation,
         inputs=[predictions_file, model_name_input, author_input, description_input],
         outputs=[validation_output, predictions_validated, validation_info_state, submit_btn]
     )
     # Submit for evaluation
     def handle_submission(predictions, model_name, author, description, validation_info):
         table, plot1, plot2, stats = refresh_leaderboard_display(*args)
         # Update model dropdown choices
+        if current_leaderboard is not None and not current_leaderboard.empty:
+            model_choices = current_leaderboard['model_name'].tolist()
+        else:
+            model_choices = []
         return table, plot1, plot2, stats, gr.Dropdown(choices=model_choices)