Spaces:

a6687543
/

a-retriever-task

Sleeping

File size: 6,678 Bytes

import os
import json
import requests
import glob
from pathlib import Path

import gradio as gr
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download, snapshot_download
from huggingface_hub.repocard import metadata_load
from apscheduler.schedulers.background import BackgroundScheduler

from tqdm.contrib.concurrent import thread_map

from utils import *

# Configuration for retrieval task leaderboard
SUBMISSION_FOLDER = "submission"
HF_TOKEN = os.environ.get("HF_TOKEN")

block = gr.Blocks()
api = HfApi(token=HF_TOKEN)

# Retrieval task metrics configuration
retrieval_metrics = [
    {
        "metric_name": "Hit Rate Click@50",
        "metric_key": "hit_rate_click@50",
        "description": "Hit rate for click predictions at top 50"
    },
    {
        "metric_name": "Hit Rate A2C@50", 
        "metric_key": "hit_rate_A2C@50",
        "description": "Hit rate for A2C predictions at top 50"
    },
    {
        "metric_name": "Hit Rate Purchase@50",
        "metric_key": "hit_rate_purchase@50", 
        "description": "Hit rate for purchase predictions at top 50"
    }
]

# Main leaderboard configuration
leaderboard_config = {
    "title": "🏆 Retrieval Task Leaderboard 🏆",
    "description": "Leaderboard for retrieval task performance",
    "metrics": retrieval_metrics
}

def restart():
    print("RESTART")
    api.restart_space(repo_id="huggingface-projects/Deep-Reinforcement-Learning-Leaderboard")

def load_submission_files():
    """Load all JSON submission files from the submission folder"""
    submission_files = glob.glob(os.path.join(SUBMISSION_FOLDER, "*.json"))
    submissions = []
    
    for file_path in submission_files:
        try:
            with open(file_path, 'r') as f:
                submission_data = json.load(f)
                # Validate required fields
                required_fields = ["user_id", "model_id", "hit_rate_click@50", "hit_rate_A2C@50", "hit_rate_purchase@50"]
                if all(field in submission_data for field in required_fields):
                    submissions.append(submission_data)
                else:
                    print(f"Warning: Invalid submission format in {file_path}")
        except (json.JSONDecodeError, FileNotFoundError) as e:
            print(f"Error reading {file_path}: {e}")
    
    return submissions

def parse_submission_data(submission):
    """Parse a single submission and return formatted data"""
    try:
        # Convert string metrics to float, handle potential errors
        click_rate = float(submission.get("hit_rate_click@50", 0))
        a2c_rate = float(submission.get("hit_rate_A2C@50", 0))
        purchase_rate = float(submission.get("hit_rate_purchase@50", 0))
        
        return {
            "User": submission.get("user_id", "Unknown"),
            "Model": submission.get("model_id", "Unknown"),
            "Dataset": submission.get("dataset_id", "Unknown"),
            "Hit Rate Click@50": click_rate,
            "Hit Rate A2C@50": a2c_rate,
            "Hit Rate Purchase@50": purchase_rate,
            "Comment": submission.get("comment", "")
        }
    except (ValueError, TypeError) as e:
        print(f"Error parsing submission data: {e}")
        return None

def update_leaderboard_from_submissions():
    """Update leaderboard data from JSON submissions"""
    submissions = load_submission_files()
    data = []
    
    for submission in submissions:
        parsed_data = parse_submission_data(submission)
        if parsed_data:
            data.append(parsed_data)
    
    if not data:
        # Create empty dataframe with correct columns if no submissions
        return pd.DataFrame(columns=["User", "Model", "Dataset", "Hit Rate Click@50", "Hit Rate A2C@50", "Hit Rate Purchase@50", "Comment"])
    
    df = pd.DataFrame(data)
    # Sort by hit rate click@50 (descending) as default
    df = df.sort_values(by='Hit Rate Click@50', ascending=False)
    df.reset_index(drop=True, inplace=True)
    df.insert(0, 'Ranking', range(1, len(df) + 1))
    
    return df

    
def rank_dataframe(dataframe):
    dataframe = dataframe.sort_values(by=['Results', 'User', 'Model'], ascending=False)
    if not 'Ranking' in dataframe.columns:
        dataframe.insert(0, 'Ranking', [i for i in range(1,len(dataframe)+1)])
    else:
        dataframe['Ranking'] =   [i for i in range(1,len(dataframe)+1)]
    return dataframe

def get_leaderboard_data():
    """Get current leaderboard data from submissions"""
    return update_leaderboard_from_submissions()

def refresh_leaderboard():
    """Simple function to refresh the leaderboard display"""
    print("🔄 Refreshing leaderboard...")
    return get_leaderboard_data()

# run_update_dataset()

with block:
    gr.Markdown("""
    # 🏆 Retrieval Task Leaderboard 🏆 
    
    This leaderboard tracks the performance of different models on retrieval tasks.
    
    ### How to Submit
    Submit your results as a JSON file in the `submission` folder via pull request.
    
    ### Required JSON Format
    ```json
    {
        "user_id": "your_username",
        "model_id": "your_model_name", 
        "hit_rate_click@50": "0.75",
        "hit_rate_A2C@50": "0.68",
        "hit_rate_purchase@50": "0.82",
        "dataset_id": "your_dataset",
        "comment": "Optional comment about your submission"
    }
    ```
    
    ### How to Update After PR
    **Currently, PR detection is NOT automated.** After a PR is merged:
    Wait until the APP is rebuilt. 
    
    ### Rankings
    Currently ranked by "Hit Rate Click@50" (you can modify the sorting in the code)
    """)
    
    # Simple refresh button
    refresh_button = gr.Button("🔄 Refresh Leaderboard")
    
    # Display leaderboard without Overall Score column
    leaderboard_df = gr.Dataframe(
        value=get_leaderboard_data(),
        headers=["Ranking", "User", "Model", "Dataset", "Hit Rate Click@50", "Hit Rate A2C@50", "Hit Rate Purchase@50", "Comment"],
        label="Current Leaderboard"
    )
    
    # Simple refresh functionality
    refresh_button.click(refresh_leaderboard, outputs=leaderboard_df)

# Initialize the system
print("🚀 Starting Retrieval Task Leaderboard...")

# Setup background scheduler (optional, mainly for restart)
scheduler = BackgroundScheduler()
scheduler.add_job(restart, 'interval', seconds=21600)  # Restart every 6 hours
scheduler.start()

print("✅ System initialized successfully!")
print("📊 Leaderboard accessible at: http://127.0.0.1:7860")
print("⚠️  PR detection is NOT automated - restart manually after PR merges")
print("🔄 Use the refresh button in the UI to update the leaderboard")

block.launch()