File size: 6,678 Bytes
88d91f4
383512a
059d8f0
80d82f8
 
a1f3b5b
383512a
 
88d91f4
 
 
e653f9c
b774671
 
383512a
fb67b80
80d82f8
 
88d91f4
e9f37ce
383512a
88d91f4
383512a
80d82f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383512a
 
80d82f8
 
 
 
 
 
 
99e39f3
3ed1bd5
99e39f3
 
80d82f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383512a
80d82f8
 
 
 
383512a
80d82f8
 
 
 
 
 
 
 
 
 
 
383512a
391b960
80d82f8
 
 
e9f37ce
80d82f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88d91f4
383512a
 
d625373
383512a
 
 
 
 
e9f37ce
80d82f8
 
 
7fa09dc
80d82f8
 
 
 
0496749
80d82f8
0496749
059d8f0
80d82f8
 
059d8f0
80d82f8
383512a
80d82f8
 
383512a
80d82f8
 
 
 
 
 
 
 
 
 
 
 
383512a
80d82f8
 
fc87e93
80d82f8
 
 
383512a
80d82f8
 
 
 
 
 
 
 
 
5dac6c6
80d82f8
 
 
f6889a3
80d82f8
 
5dac6c6
80d82f8
0fd6c86
80d82f8
f6889a3
 
80d82f8
 
 
 
 
f6889a3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import os
import json
import requests
import glob
from pathlib import Path

import gradio as gr
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download, snapshot_download
from huggingface_hub.repocard import metadata_load
from apscheduler.schedulers.background import BackgroundScheduler

from tqdm.contrib.concurrent import thread_map

from utils import *

# Configuration for retrieval task leaderboard
SUBMISSION_FOLDER = "submission"
HF_TOKEN = os.environ.get("HF_TOKEN")

block = gr.Blocks()
api = HfApi(token=HF_TOKEN)

# Retrieval task metrics configuration
retrieval_metrics = [
    {
        "metric_name": "Hit Rate Click@50",
        "metric_key": "hit_rate_click@50",
        "description": "Hit rate for click predictions at top 50"
    },
    {
        "metric_name": "Hit Rate A2C@50", 
        "metric_key": "hit_rate_A2C@50",
        "description": "Hit rate for A2C predictions at top 50"
    },
    {
        "metric_name": "Hit Rate Purchase@50",
        "metric_key": "hit_rate_purchase@50", 
        "description": "Hit rate for purchase predictions at top 50"
    }
]

# Main leaderboard configuration
leaderboard_config = {
    "title": "πŸ† Retrieval Task Leaderboard πŸ†",
    "description": "Leaderboard for retrieval task performance",
    "metrics": retrieval_metrics
}

def restart():
    print("RESTART")
    api.restart_space(repo_id="huggingface-projects/Deep-Reinforcement-Learning-Leaderboard")

def load_submission_files():
    """Load all JSON submission files from the submission folder"""
    submission_files = glob.glob(os.path.join(SUBMISSION_FOLDER, "*.json"))
    submissions = []
    
    for file_path in submission_files:
        try:
            with open(file_path, 'r') as f:
                submission_data = json.load(f)
                # Validate required fields
                required_fields = ["user_id", "model_id", "hit_rate_click@50", "hit_rate_A2C@50", "hit_rate_purchase@50"]
                if all(field in submission_data for field in required_fields):
                    submissions.append(submission_data)
                else:
                    print(f"Warning: Invalid submission format in {file_path}")
        except (json.JSONDecodeError, FileNotFoundError) as e:
            print(f"Error reading {file_path}: {e}")
    
    return submissions

def parse_submission_data(submission):
    """Parse a single submission and return formatted data"""
    try:
        # Convert string metrics to float, handle potential errors
        click_rate = float(submission.get("hit_rate_click@50", 0))
        a2c_rate = float(submission.get("hit_rate_A2C@50", 0))
        purchase_rate = float(submission.get("hit_rate_purchase@50", 0))
        
        return {
            "User": submission.get("user_id", "Unknown"),
            "Model": submission.get("model_id", "Unknown"),
            "Dataset": submission.get("dataset_id", "Unknown"),
            "Hit Rate Click@50": click_rate,
            "Hit Rate A2C@50": a2c_rate,
            "Hit Rate Purchase@50": purchase_rate,
            "Comment": submission.get("comment", "")
        }
    except (ValueError, TypeError) as e:
        print(f"Error parsing submission data: {e}")
        return None

def update_leaderboard_from_submissions():
    """Update leaderboard data from JSON submissions"""
    submissions = load_submission_files()
    data = []
    
    for submission in submissions:
        parsed_data = parse_submission_data(submission)
        if parsed_data:
            data.append(parsed_data)
    
    if not data:
        # Create empty dataframe with correct columns if no submissions
        return pd.DataFrame(columns=["User", "Model", "Dataset", "Hit Rate Click@50", "Hit Rate A2C@50", "Hit Rate Purchase@50", "Comment"])
    
    df = pd.DataFrame(data)
    # Sort by hit rate click@50 (descending) as default
    df = df.sort_values(by='Hit Rate Click@50', ascending=False)
    df.reset_index(drop=True, inplace=True)
    df.insert(0, 'Ranking', range(1, len(df) + 1))
    
    return df

    
def rank_dataframe(dataframe):
    dataframe = dataframe.sort_values(by=['Results', 'User', 'Model'], ascending=False)
    if not 'Ranking' in dataframe.columns:
        dataframe.insert(0, 'Ranking', [i for i in range(1,len(dataframe)+1)])
    else:
        dataframe['Ranking'] =   [i for i in range(1,len(dataframe)+1)]
    return dataframe

def get_leaderboard_data():
    """Get current leaderboard data from submissions"""
    return update_leaderboard_from_submissions()

def refresh_leaderboard():
    """Simple function to refresh the leaderboard display"""
    print("πŸ”„ Refreshing leaderboard...")
    return get_leaderboard_data()

# run_update_dataset()

with block:
    gr.Markdown("""
    # πŸ† Retrieval Task Leaderboard πŸ† 
    
    This leaderboard tracks the performance of different models on retrieval tasks.
    
    ### How to Submit
    Submit your results as a JSON file in the `submission` folder via pull request.
    
    ### Required JSON Format
    ```json
    {
        "user_id": "your_username",
        "model_id": "your_model_name", 
        "hit_rate_click@50": "0.75",
        "hit_rate_A2C@50": "0.68",
        "hit_rate_purchase@50": "0.82",
        "dataset_id": "your_dataset",
        "comment": "Optional comment about your submission"
    }
    ```
    
    ### How to Update After PR
    **Currently, PR detection is NOT automated.** After a PR is merged:
    Wait until the APP is rebuilt. 
    
    ### Rankings
    Currently ranked by "Hit Rate Click@50" (you can modify the sorting in the code)
    """)
    
    # Simple refresh button
    refresh_button = gr.Button("πŸ”„ Refresh Leaderboard")
    
    # Display leaderboard without Overall Score column
    leaderboard_df = gr.Dataframe(
        value=get_leaderboard_data(),
        headers=["Ranking", "User", "Model", "Dataset", "Hit Rate Click@50", "Hit Rate A2C@50", "Hit Rate Purchase@50", "Comment"],
        label="Current Leaderboard"
    )
    
    # Simple refresh functionality
    refresh_button.click(refresh_leaderboard, outputs=leaderboard_df)

# Initialize the system
print("πŸš€ Starting Retrieval Task Leaderboard...")

# Setup background scheduler (optional, mainly for restart)
scheduler = BackgroundScheduler()
scheduler.add_job(restart, 'interval', seconds=21600)  # Restart every 6 hours
scheduler.start()

print("βœ… System initialized successfully!")
print("πŸ“Š Leaderboard accessible at: http://127.0.0.1:7860")
print("⚠️  PR detection is NOT automated - restart manually after PR merges")
print("πŸ”„ Use the refresh button in the UI to update the leaderboard")

block.launch()