Spaces:
Build error
Build error
| """ | |
| Leaderboard processing. | |
| """ | |
| import datetime | |
| import os | |
| from src.agents.parser_agent import process_leaderboard | |
| from src.file_utils import create_category_slug, split_combined_id | |
| def normalize_category(category_name): | |
| """ | |
| Normalizes a category name by replacing spaces and underscores with hyphens and converting to lowercase. | |
| Args: | |
| category_name: The category name to normalize | |
| Returns: | |
| The normalized category | |
| """ | |
| # Use the create_category_slug function from file_utils.py | |
| return create_category_slug(category_name) | |
| def process_single_leaderboard(uid, host, model, index, all_results, additional_rules=None, category=None): | |
| """ | |
| Process a single leaderboard and update the results. | |
| Args: | |
| uid: The UID of the leaderboard to process | |
| host: The URL of the leaderboard | |
| model: The model to use | |
| index: The index of the leaderboard | |
| all_results: The list of all results | |
| additional_rules: Additional specific rules for this leaderboard | |
| category: The category of the leaderboard (for combined identifier) | |
| Returns: | |
| The updated list of results | |
| """ | |
| print(f"\n\nProcessing leaderboard: {uid} - {host}") | |
| if additional_rules: | |
| print(f"Additional rules for this leaderboard: {additional_rules}") | |
| if category: | |
| normalized_category = normalize_category(category) | |
| print(f"Category: {category} (normalized: {normalized_category})") | |
| else: | |
| normalized_category = None | |
| # Get the maximum number of retries from environment variables | |
| max_retries = int(os.getenv("LEADERBOARD_MAX_RETRIES", "3")) | |
| print(f"Maximum number of retries configured: {max_retries}") | |
| attempt = 0 | |
| last_error = None | |
| # Try to process the leaderboard multiple times | |
| while attempt < max_retries: | |
| attempt += 1 | |
| if attempt > 1: | |
| print(f"Retry attempt {attempt}/{max_retries} for leaderboard {uid} - {host}") | |
| # Process the leaderboard | |
| result = process_leaderboard(host, model, index, uid, additional_rules) | |
| # If the parsing was successful or we've reached the maximum number of retries | |
| if result.get("parsing_status") == "success" or attempt >= max_retries: | |
| break | |
| # If there was an error, save it for later | |
| if result.get("parsing_status") == "error": | |
| last_error = result.get("parsing_message", "Unknown error") | |
| print(f"Error during attempt {attempt}: {last_error}") | |
| # Get parsing date from result or generate a new one if not available | |
| if result and "parsed_at" in result: | |
| parsed_at = result["parsed_at"] | |
| else: | |
| # Fallback to current time if not provided by process_leaderboard | |
| now = datetime.datetime.now() | |
| parsed_at = now.isoformat() | |
| # Create combined ID if category is provided | |
| result_uid = uid | |
| if normalized_category: | |
| # Format of the combined UID: category_uid | |
| # The category is already normalized (slugified) by normalize_category | |
| # The underscore "_" is the ONLY separator between the category and the UID | |
| result_uid = f"{normalized_category}_{uid}" | |
| # Create base result object with uid, host, and thumbnail | |
| leaderboard_result = { | |
| "uid": result_uid, | |
| "original_uid": uid, | |
| "category": normalized_category, | |
| "host": host, | |
| "parsing_status": "rejected", # Default to rejected | |
| "parsed_at": parsed_at | |
| } | |
| # Check if we have valid results | |
| valid_result = False | |
| if result and result.get("results"): | |
| if isinstance(result["results"], dict): | |
| # Check if we have top models with required fields | |
| if "top_models" in result["results"] and len(result["results"]["top_models"]) > 0: | |
| valid_models = True | |
| for model_info in result["results"]["top_models"]: | |
| # Each model must have at least rank and name | |
| if not model_info.get("rank") or not model_info.get("name"): | |
| valid_models = False | |
| break | |
| # Check if we have evaluation criteria | |
| if valid_models and "evaluation_criteria" in result["results"] and result["results"]["evaluation_criteria"]: | |
| valid_result = True | |
| else: | |
| print(f"Invalid results format: {type(result['results']).__name__}, expected dict") | |
| else: | |
| print(f"Missing or empty results in agent response") | |
| # If we have valid results, extract the data | |
| if valid_result: | |
| leaderboard_result["parsing_status"] = "approved" | |
| leaderboard_result["top_models"] = [] | |
| leaderboard_result["evaluation_criteria"] = result["results"]["evaluation_criteria"] | |
| # Extract top models | |
| for model_info in result["results"]["top_models"]: | |
| model_entry = { | |
| "rank": model_info.get("rank"), | |
| "name": model_info.get("name"), | |
| "url": model_info.get("url", None) | |
| } | |
| leaderboard_result["top_models"].append(model_entry) | |
| else: | |
| print(f"Leaderboard rejected: {uid} - Incomplete or invalid information") | |
| # Check if this UID already exists in the results | |
| for i, existing_result in enumerate(all_results): | |
| if existing_result["uid"] == result_uid: | |
| # Replace the existing result | |
| all_results[i] = leaderboard_result | |
| print(f"Result updated for UID: {result_uid}") | |
| return all_results | |
| # ADDITIONAL CHECK: Make sure there's no confusion with other categories | |
| # for the same original_uid | |
| for existing_result in all_results: | |
| if existing_result["original_uid"] == uid and existing_result["category"] != normalized_category: | |
| print(f"WARNING: A result already exists for original_uid {uid} but with a different category:") | |
| print(f" - Existing category: {existing_result['category']}, UID: {existing_result['uid']}") | |
| print(f" - New category: {normalized_category}, UID: {result_uid}") | |
| # We continue anyway, as it's a valid case to have the same leaderboard in different categories | |
| # If we get here, this is a new result | |
| all_results.append(leaderboard_result) | |
| print(f"New result added for UID: {result_uid}") | |
| return all_results |