Spaces:
Sleeping
Sleeping
| """ | |
| Display utilities for the CodeReview Leaderboard | |
| """ | |
| from typing import List, Dict, Any, Optional, Tuple | |
| import json | |
| from datetime import datetime, timezone | |
| from src.envs import PROGRAMMING_LANGUAGES, COMMENT_LANGUAGES, TAXONOMY_CATEGORIES, QUALITY_METRICS | |
| from src.display.formatting import format_table_cell, format_timestamp | |
| def filter_leaderboard_data( | |
| data: List[Dict], | |
| programming_language: str = "All", | |
| comment_language: str = "All", | |
| taxonomy_category: str = "All", | |
| sort_by: str = "llm_pass_1", | |
| sort_order: str = "desc" | |
| ) -> List[Dict]: | |
| """Filter and sort leaderboard data based on criteria""" | |
| if not data: | |
| return [] | |
| # Apply filters | |
| filtered_data = data.copy() | |
| if programming_language != "All": | |
| filtered_data = [ | |
| entry for entry in filtered_data | |
| if entry.get("programming_language", "").lower() == programming_language.lower() | |
| ] | |
| if comment_language != "All": | |
| filtered_data = [ | |
| entry for entry in filtered_data | |
| if entry.get("comment_language", "").lower() == comment_language.lower() | |
| ] | |
| if taxonomy_category != "All": | |
| filtered_data = [ | |
| entry for entry in filtered_data | |
| if entry.get("taxonomy_category", "").lower() == taxonomy_category.lower() | |
| ] | |
| # Sort data | |
| reverse = sort_order.lower() == "desc" | |
| try: | |
| if sort_by in ["bleu", "llm_pass_1", "llm_pass_5", "llm_pass_10"]: | |
| filtered_data.sort(key=lambda x: x.get(sort_by, 0), reverse=reverse) | |
| elif sort_by in QUALITY_METRICS: | |
| filtered_data.sort(key=lambda x: x.get("metrics", {}).get(sort_by, 0), reverse=reverse) | |
| else: | |
| filtered_data.sort(key=lambda x: str(x.get(sort_by, "")), reverse=reverse) | |
| except Exception as e: | |
| print(f"Error sorting data: {e}") | |
| # Default sort by pass@1 | |
| filtered_data.sort(key=lambda x: x.get("llm_pass_1", 0), reverse=True) | |
| return filtered_data | |
| def get_main_leaderboard_data( | |
| data: List[Dict], | |
| programming_language: str = "All", | |
| comment_language: str = "All", | |
| taxonomy_category: str = "All", | |
| sort_by: str = "llm_pass_1" | |
| ) -> List[List[str]]: | |
| """Get formatted main leaderboard table data""" | |
| filtered_data = filter_leaderboard_data( | |
| data, programming_language, comment_language, taxonomy_category, sort_by | |
| ) | |
| table_rows = [] | |
| for entry in filtered_data: | |
| row = [ | |
| format_table_cell(entry.get("model_name", ""), "model"), | |
| format_table_cell(entry.get("programming_language", ""), "programming language"), | |
| format_table_cell(entry.get("comment_language", ""), "comment language"), | |
| format_table_cell(entry.get("taxonomy_category", ""), "taxonomy"), | |
| format_table_cell(entry.get("bleu", 0), "bleu"), | |
| format_table_cell(entry.get("llm_pass_1", 0), "pass@1"), | |
| format_table_cell(entry.get("llm_pass_5", 0), "pass@5"), | |
| format_table_cell(entry.get("llm_pass_10", 0), "pass@10"), | |
| ] | |
| table_rows.append(row) | |
| return table_rows | |
| def get_quality_metrics_data( | |
| data: List[Dict], | |
| programming_language: str = "All", | |
| comment_language: str = "All", | |
| taxonomy_category: str = "All", | |
| sort_by: str = "llm_pass_1" | |
| ) -> List[List[str]]: | |
| """Get formatted quality metrics table data""" | |
| filtered_data = filter_leaderboard_data( | |
| data, programming_language, comment_language, taxonomy_category, sort_by | |
| ) | |
| table_rows = [] | |
| for entry in filtered_data: | |
| metrics = entry.get("metrics", {}) | |
| row = [format_table_cell(entry.get("model_name", ""), "model")] | |
| for metric in QUALITY_METRICS: | |
| formatted_value = format_table_cell(metrics.get(metric, 0), metric.replace("_", " ")) | |
| row.append(formatted_value) | |
| table_rows.append(row) | |
| return table_rows | |
| def get_submission_history_data( | |
| data: List[Dict], | |
| programming_language: str = "All", | |
| comment_language: str = "All", | |
| taxonomy_category: str = "All", | |
| limit: int = 50 | |
| ) -> List[List[str]]: | |
| """Get formatted submission history data""" | |
| filtered_data = filter_leaderboard_data( | |
| data, programming_language, comment_language, taxonomy_category, "submission_date", "desc" | |
| ) | |
| # Limit results | |
| filtered_data = filtered_data[:limit] | |
| table_rows = [] | |
| for entry in filtered_data: | |
| row = [ | |
| format_table_cell(entry.get("model_name", ""), "model"), | |
| format_table_cell(entry.get("programming_language", ""), "programming language"), | |
| format_table_cell(entry.get("comment_language", ""), "comment language"), | |
| format_table_cell(entry.get("taxonomy_category", ""), "taxonomy"), | |
| format_table_cell(entry.get("llm_pass_1", 0), "pass@1"), | |
| format_timestamp(entry.get("submission_date", "")), | |
| entry.get("submission_ip", "").split(".")[0] + ".xxx.xxx.xxx" if entry.get("submission_ip") else "Unknown" | |
| ] | |
| table_rows.append(row) | |
| return table_rows | |
| def get_statistics_summary(data: List[Dict]) -> Dict[str, Any]: | |
| """Get summary statistics for the leaderboard""" | |
| if not data: | |
| return { | |
| "total_models": 0, | |
| "total_submissions": 0, | |
| "avg_pass_1": 0, | |
| "best_model": "None", | |
| "languages_covered": 0, | |
| "categories_covered": 0 | |
| } | |
| # Calculate statistics | |
| total_models = len(set(entry.get("model_name", "") for entry in data)) | |
| total_submissions = len(data) | |
| pass_1_scores = [entry.get("llm_pass_1", 0) for entry in data if entry.get("llm_pass_1") is not None] | |
| avg_pass_1 = sum(pass_1_scores) / len(pass_1_scores) if pass_1_scores else 0 | |
| best_entry = max(data, key=lambda x: x.get("llm_pass_1", 0)) if data else None | |
| best_model = best_entry.get("model_name", "None") if best_entry else "None" | |
| languages_covered = len(set(entry.get("programming_language", "") for entry in data if entry.get("programming_language"))) | |
| categories_covered = len(set(entry.get("taxonomy_category", "") for entry in data if entry.get("taxonomy_category"))) | |
| return { | |
| "total_models": total_models, | |
| "total_submissions": total_submissions, | |
| "avg_pass_1": avg_pass_1, | |
| "best_model": best_model, | |
| "languages_covered": languages_covered, | |
| "categories_covered": categories_covered | |
| } | |
| def validate_submission_data(data: Dict[str, Any]) -> Tuple[bool, str]: | |
| """Validate submission data""" | |
| required_fields = ["model_name", "programming_language", "comment_language", "taxonomy_category"] | |
| # Check required fields | |
| for field in required_fields: | |
| if not data.get(field): | |
| return False, f"Missing required field: {field}" | |
| # Validate scores | |
| score_fields = ["bleu", "llm_pass_1", "llm_pass_5", "llm_pass_10"] | |
| for field in score_fields: | |
| value = data.get(field) | |
| if value is None: | |
| return False, f"Missing score: {field}" | |
| if not isinstance(value, (int, float)): | |
| return False, f"Invalid score format: {field}" | |
| if not 0 <= value <= 1: | |
| return False, f"Score out of range (0-1): {field}" | |
| # Validate metrics | |
| metrics = data.get("metrics", {}) | |
| for metric in QUALITY_METRICS: | |
| value = metrics.get(metric) | |
| if value is None: | |
| return False, f"Missing metric: {metric}" | |
| if not isinstance(value, (int, float)): | |
| return False, f"Invalid metric format: {metric}" | |
| if not 0 <= value <= 10: | |
| return False, f"Metric out of range (0-10): {metric}" | |
| # Validate language and category choices | |
| if data.get("programming_language") not in PROGRAMMING_LANGUAGES: | |
| return False, "Invalid programming language" | |
| if data.get("comment_language") not in COMMENT_LANGUAGES: | |
| return False, "Invalid comment language" | |
| if data.get("taxonomy_category") not in TAXONOMY_CATEGORIES: | |
| return False, "Invalid taxonomy category" | |
| return True, "Valid submission" | |
| def get_leaderboard_insights(data: List[Dict]) -> Dict[str, Any]: | |
| """Get insights and trends from leaderboard data""" | |
| if not data: | |
| return {} | |
| # Language performance analysis | |
| lang_performance = {} | |
| for lang in PROGRAMMING_LANGUAGES[1:]: # Skip "All" | |
| lang_data = [entry for entry in data if entry.get("programming_language") == lang] | |
| if lang_data: | |
| avg_score = sum(entry.get("llm_pass_1", 0) for entry in lang_data) / len(lang_data) | |
| lang_performance[lang] = { | |
| "avg_score": avg_score, | |
| "model_count": len(lang_data), | |
| "best_model": max(lang_data, key=lambda x: x.get("llm_pass_1", 0)).get("model_name", "") | |
| } | |
| # Category performance analysis | |
| category_performance = {} | |
| for category in TAXONOMY_CATEGORIES[1:]: # Skip "All" | |
| cat_data = [entry for entry in data if entry.get("taxonomy_category") == category] | |
| if cat_data: | |
| avg_score = sum(entry.get("llm_pass_1", 0) for entry in cat_data) / len(cat_data) | |
| category_performance[category] = { | |
| "avg_score": avg_score, | |
| "model_count": len(cat_data), | |
| "best_model": max(cat_data, key=lambda x: x.get("llm_pass_1", 0)).get("model_name", "") | |
| } | |
| return { | |
| "language_performance": lang_performance, | |
| "category_performance": category_performance, | |
| "top_performers": sorted(data, key=lambda x: x.get("llm_pass_1", 0), reverse=True)[:5] | |
| } | |
| def export_leaderboard_data(data: List[Dict], format_type: str = "json") -> str: | |
| """Export leaderboard data in specified format""" | |
| if format_type.lower() == "json": | |
| return json.dumps(data, indent=2, ensure_ascii=False) | |
| elif format_type.lower() == "csv": | |
| # Simple CSV export | |
| if not data: | |
| return "" | |
| # Get headers | |
| headers = ["model_name", "programming_language", "comment_language", "taxonomy_category", | |
| "bleu", "llm_pass_1", "llm_pass_5", "llm_pass_10"] | |
| headers.extend(QUALITY_METRICS) | |
| lines = [",".join(headers)] | |
| for entry in data: | |
| row = [] | |
| for header in headers: | |
| if header in QUALITY_METRICS: | |
| value = entry.get("metrics", {}).get(header, "") | |
| else: | |
| value = entry.get(header, "") | |
| row.append(str(value)) | |
| lines.append(",".join(row)) | |
| return "\n".join(lines) | |
| else: | |
| return "Unsupported format" |