Spaces:

Vikhrmodels
/

DOoM-lb

Running

App Files Files Community

Anonumous commited on Nov 19, 2025

Commit

5f998b6

1 Parent(s): 9bb7096

Add uto generation for model files

Browse files

Files changed (1) hide show

src/leaderboard/build_leaderboard.py +132 -2

src/leaderboard/build_leaderboard.py CHANGED Viewed

@@ -2,12 +2,13 @@ import json
 import logging
 import os
 import time
 from typing import Any
 import pandas as pd
-from huggingface_hub import snapshot_download
-from src.envs import H4_TOKEN, RESULTS_PATH, RESULTS_REPO
 # Configure logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -68,11 +69,140 @@ def download_dataset(
     logging.error("Failed to download %s after %s attempts", repo_id, max_attempts)
 def download_results() -> None:
     """Download model evaluation results from HuggingFace RESULTS_REPO."""
     try:
         download_dataset(RESULTS_REPO, RESULTS_PATH)
         logging.info("Successfully downloaded model evaluation results")
     except Exception as e:
         logging.error(f"Failed to download model evaluation results: {e}")

 import logging
 import os
 import time
+from io import BytesIO
 from typing import Any
 import pandas as pd
+from huggingface_hub import hf_hub_download, snapshot_download
+from src.envs import API, H4_TOKEN, RESULTS_PATH, RESULTS_REPO
 # Configure logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
     logging.error("Failed to download %s after %s attempts", repo_id, max_attempts)
+def create_safe_filename(model_name: str) -> str:
+    """
+    Create safe filename from model name.
+    Args:
+        model_name: Full model name (e.g., "username/model-name")
+    Returns:
+        Safe filename (e.g., "username_model-name.json")
+    """
+    # Extract username and model parts
+    parts = model_name.split("/")
+    if len(parts) >= 2:
+        username = parts[0]
+        modelname = "_".join(parts[1:])
+    else:
+        username = "unknown"
+        modelname = model_name
+    # Replace invalid characters
+    safe_name = f"{username}_{modelname}".replace("/", "_").replace(" ", "_")
+    return f"{safe_name}.json"
+def generate_individual_files_from_leaderboard() -> None:
+    """
+    Generate individual model files from leaderboard.json backup.
+    Only creates missing files, doesn't overwrite existing ones.
+    Uploads new files to RESULTS_REPO.
+    """
+    try:
+        # Download leaderboard.json from RESULTS_REPO
+        logging.info("Checking for leaderboard.json in RESULTS_REPO")
+        leaderboard_path = hf_hub_download(
+            repo_id=RESULTS_REPO,
+            filename="leaderboard.json",
+            repo_type="dataset",
+            token=H4_TOKEN,
+        )
+        with open(leaderboard_path, encoding="utf-8") as f:
+            leaderboard_data = json.load(f)
+        if not leaderboard_data:
+            logging.info("leaderboard.json is empty, skipping generation")
+            return
+        logging.info(f"Found leaderboard.json with {len(leaderboard_data)} models")
+        # Check existing files
+        external_dir = "./m_data/model_data/external/"
+        os.makedirs(external_dir, exist_ok=True)
+        existing_files = set(os.listdir(external_dir))
+        logging.info(f"Existing files in external/: {len(existing_files)}")
+        # Process each model in leaderboard
+        created_count = 0
+        skipped_count = 0
+        error_count = 0
+        for entry in leaderboard_data:
+            try:
+                # Get model_name
+                model_name = entry.get("model_name") or entry.get("model")
+                if not model_name:
+                    logging.warning(f"Skipping entry without model_name: {entry}")
+                    error_count += 1
+                    continue
+                # Create safe filename
+                safe_filename = create_safe_filename(model_name)
+                # Skip if file already exists
+                if safe_filename in existing_files:
+                    skipped_count += 1
+                    continue
+                # Prepare model data
+                model_data = {
+                    "model_name": model_name,
+                    "score": float(entry.get("score", 0.0)),
+                    "math_score": float(entry.get("math_score", 0.0)),
+                    "physics_score": float(entry.get("physics_score", 0.0)),
+                    "total_tokens": int(entry.get("total_tokens", 0)),
+                    "evaluation_time": float(entry.get("evaluation_time", 0.0)),
+                    "system_prompt": entry.get(
+                        "system_prompt",
+                        "Вы - полезный помощник по математике и физике. Ответьте на русском языке.",
+                    ),
+                }
+                # Save locally
+                local_path = os.path.join(external_dir, safe_filename)
+                with open(local_path, "w", encoding="utf-8") as f:
+                    json.dump(model_data, f, ensure_ascii=False, indent=2)
+                # Upload to RESULTS_REPO
+                buf = BytesIO()
+                buf.write(json.dumps(model_data, ensure_ascii=False).encode("utf-8"))
+                API.upload_file(
+                    path_or_fileobj=buf.getvalue(),
+                    path_in_repo=f"model_data/external/{safe_filename}",
+                    repo_id=RESULTS_REPO,
+                    repo_type="dataset",
+                )
+                logging.info(f"Created: {safe_filename}")
+                created_count += 1
+            except Exception as e:
+                logging.error(f"Failed to process entry {entry.get('model_name', 'unknown')}: {e}")
+                error_count += 1
+                continue
+        logging.info(
+            f"Generation complete: {created_count} files created, {skipped_count} skipped, {error_count} errors"
+        )
+    except FileNotFoundError:
+        logging.warning("leaderboard.json not found in RESULTS_REPO, skipping generation")
+    except Exception as e:
+        logging.error(f"Failed to generate files from leaderboard.json: {e}")
 def download_results() -> None:
     """Download model evaluation results from HuggingFace RESULTS_REPO."""
     try:
         download_dataset(RESULTS_REPO, RESULTS_PATH)
         logging.info("Successfully downloaded model evaluation results")
+        # Generate individual files from leaderboard.json if needed
+        generate_individual_files_from_leaderboard()
     except Exception as e:
         logging.error(f"Failed to download model evaluation results: {e}")