""" Display utilities for the CodeReview Leaderboard """ from typing import List, Dict, Any, Optional, Tuple import json from datetime import datetime, timezone from src.envs import PROGRAMMING_LANGUAGES, COMMENT_LANGUAGES, TAXONOMY_CATEGORIES, QUALITY_METRICS from src.display.formatting import format_table_cell, format_timestamp def filter_leaderboard_data( data: List[Dict], programming_language: str = "All", comment_language: str = "All", taxonomy_category: str = "All", sort_by: str = "llm_pass_1", sort_order: str = "desc" ) -> List[Dict]: """Filter and sort leaderboard data based on criteria""" if not data: return [] # Apply filters filtered_data = data.copy() if programming_language != "All": filtered_data = [ entry for entry in filtered_data if entry.get("programming_language", "").lower() == programming_language.lower() ] if comment_language != "All": filtered_data = [ entry for entry in filtered_data if entry.get("comment_language", "").lower() == comment_language.lower() ] if taxonomy_category != "All": filtered_data = [ entry for entry in filtered_data if entry.get("taxonomy_category", "").lower() == taxonomy_category.lower() ] # Sort data reverse = sort_order.lower() == "desc" try: if sort_by in ["bleu", "llm_pass_1", "llm_pass_5", "llm_pass_10"]: filtered_data.sort(key=lambda x: x.get(sort_by, 0), reverse=reverse) elif sort_by in QUALITY_METRICS: filtered_data.sort(key=lambda x: x.get("metrics", {}).get(sort_by, 0), reverse=reverse) else: filtered_data.sort(key=lambda x: str(x.get(sort_by, "")), reverse=reverse) except Exception as e: print(f"Error sorting data: {e}") # Default sort by pass@1 filtered_data.sort(key=lambda x: x.get("llm_pass_1", 0), reverse=True) return filtered_data def get_main_leaderboard_data( data: List[Dict], programming_language: str = "All", comment_language: str = "All", taxonomy_category: str = "All", sort_by: str = "llm_pass_1" ) -> List[List[str]]: """Get formatted main leaderboard table data""" filtered_data = filter_leaderboard_data( data, programming_language, comment_language, taxonomy_category, sort_by ) table_rows = [] for entry in filtered_data: row = [ format_table_cell(entry.get("model_name", ""), "model"), format_table_cell(entry.get("programming_language", ""), "programming language"), format_table_cell(entry.get("comment_language", ""), "comment language"), format_table_cell(entry.get("taxonomy_category", ""), "taxonomy"), format_table_cell(entry.get("bleu", 0), "bleu"), format_table_cell(entry.get("llm_pass_1", 0), "pass@1"), format_table_cell(entry.get("llm_pass_5", 0), "pass@5"), format_table_cell(entry.get("llm_pass_10", 0), "pass@10"), ] table_rows.append(row) return table_rows def get_quality_metrics_data( data: List[Dict], programming_language: str = "All", comment_language: str = "All", taxonomy_category: str = "All", sort_by: str = "llm_pass_1" ) -> List[List[str]]: """Get formatted quality metrics table data""" filtered_data = filter_leaderboard_data( data, programming_language, comment_language, taxonomy_category, sort_by ) table_rows = [] for entry in filtered_data: metrics = entry.get("metrics", {}) row = [format_table_cell(entry.get("model_name", ""), "model")] for metric in QUALITY_METRICS: formatted_value = format_table_cell(metrics.get(metric, 0), metric.replace("_", " ")) row.append(formatted_value) table_rows.append(row) return table_rows def get_submission_history_data( data: List[Dict], programming_language: str = "All", comment_language: str = "All", taxonomy_category: str = "All", limit: int = 50 ) -> List[List[str]]: """Get formatted submission history data""" filtered_data = filter_leaderboard_data( data, programming_language, comment_language, taxonomy_category, "submission_date", "desc" ) # Limit results filtered_data = filtered_data[:limit] table_rows = [] for entry in filtered_data: row = [ format_table_cell(entry.get("model_name", ""), "model"), format_table_cell(entry.get("programming_language", ""), "programming language"), format_table_cell(entry.get("comment_language", ""), "comment language"), format_table_cell(entry.get("taxonomy_category", ""), "taxonomy"), format_table_cell(entry.get("llm_pass_1", 0), "pass@1"), format_timestamp(entry.get("submission_date", "")), entry.get("submission_ip", "").split(".")[0] + ".xxx.xxx.xxx" if entry.get("submission_ip") else "Unknown" ] table_rows.append(row) return table_rows def get_statistics_summary(data: List[Dict]) -> Dict[str, Any]: """Get summary statistics for the leaderboard""" if not data: return { "total_models": 0, "total_submissions": 0, "avg_pass_1": 0, "best_model": "None", "languages_covered": 0, "categories_covered": 0 } # Calculate statistics total_models = len(set(entry.get("model_name", "") for entry in data)) total_submissions = len(data) pass_1_scores = [entry.get("llm_pass_1", 0) for entry in data if entry.get("llm_pass_1") is not None] avg_pass_1 = sum(pass_1_scores) / len(pass_1_scores) if pass_1_scores else 0 best_entry = max(data, key=lambda x: x.get("llm_pass_1", 0)) if data else None best_model = best_entry.get("model_name", "None") if best_entry else "None" languages_covered = len(set(entry.get("programming_language", "") for entry in data if entry.get("programming_language"))) categories_covered = len(set(entry.get("taxonomy_category", "") for entry in data if entry.get("taxonomy_category"))) return { "total_models": total_models, "total_submissions": total_submissions, "avg_pass_1": avg_pass_1, "best_model": best_model, "languages_covered": languages_covered, "categories_covered": categories_covered } def validate_submission_data(data: Dict[str, Any]) -> Tuple[bool, str]: """Validate submission data""" required_fields = ["model_name", "programming_language", "comment_language", "taxonomy_category"] # Check required fields for field in required_fields: if not data.get(field): return False, f"Missing required field: {field}" # Validate scores score_fields = ["bleu", "llm_pass_1", "llm_pass_5", "llm_pass_10"] for field in score_fields: value = data.get(field) if value is None: return False, f"Missing score: {field}" if not isinstance(value, (int, float)): return False, f"Invalid score format: {field}" if not 0 <= value <= 1: return False, f"Score out of range (0-1): {field}" # Validate metrics metrics = data.get("metrics", {}) for metric in QUALITY_METRICS: value = metrics.get(metric) if value is None: return False, f"Missing metric: {metric}" if not isinstance(value, (int, float)): return False, f"Invalid metric format: {metric}" if not 0 <= value <= 10: return False, f"Metric out of range (0-10): {metric}" # Validate language and category choices if data.get("programming_language") not in PROGRAMMING_LANGUAGES: return False, "Invalid programming language" if data.get("comment_language") not in COMMENT_LANGUAGES: return False, "Invalid comment language" if data.get("taxonomy_category") not in TAXONOMY_CATEGORIES: return False, "Invalid taxonomy category" return True, "Valid submission" def get_leaderboard_insights(data: List[Dict]) -> Dict[str, Any]: """Get insights and trends from leaderboard data""" if not data: return {} # Language performance analysis lang_performance = {} for lang in PROGRAMMING_LANGUAGES[1:]: # Skip "All" lang_data = [entry for entry in data if entry.get("programming_language") == lang] if lang_data: avg_score = sum(entry.get("llm_pass_1", 0) for entry in lang_data) / len(lang_data) lang_performance[lang] = { "avg_score": avg_score, "model_count": len(lang_data), "best_model": max(lang_data, key=lambda x: x.get("llm_pass_1", 0)).get("model_name", "") } # Category performance analysis category_performance = {} for category in TAXONOMY_CATEGORIES[1:]: # Skip "All" cat_data = [entry for entry in data if entry.get("taxonomy_category") == category] if cat_data: avg_score = sum(entry.get("llm_pass_1", 0) for entry in cat_data) / len(cat_data) category_performance[category] = { "avg_score": avg_score, "model_count": len(cat_data), "best_model": max(cat_data, key=lambda x: x.get("llm_pass_1", 0)).get("model_name", "") } return { "language_performance": lang_performance, "category_performance": category_performance, "top_performers": sorted(data, key=lambda x: x.get("llm_pass_1", 0), reverse=True)[:5] } def export_leaderboard_data(data: List[Dict], format_type: str = "json") -> str: """Export leaderboard data in specified format""" if format_type.lower() == "json": return json.dumps(data, indent=2, ensure_ascii=False) elif format_type.lower() == "csv": # Simple CSV export if not data: return "" # Get headers headers = ["model_name", "programming_language", "comment_language", "taxonomy_category", "bleu", "llm_pass_1", "llm_pass_5", "llm_pass_10"] headers.extend(QUALITY_METRICS) lines = [",".join(headers)] for entry in data: row = [] for header in headers: if header in QUALITY_METRICS: value = entry.get("metrics", {}).get(header, "") else: value = entry.get(header, "") row.append(str(value)) lines.append(",".join(row)) return "\n".join(lines) else: return "Unsupported format"