Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import wandb | |
| def get_wandb_data( | |
| entity: str, project: str, api_key: str, job_type: str | |
| ) -> pd.DataFrame: | |
| api = wandb.Api(api_key=api_key) | |
| # Project is specified by <entity/project-name> | |
| filter_dict = {"jobType": job_type} | |
| runs = api.runs(f"{entity}/{project}", filters=filter_dict) | |
| summary_list, config_list, name_list = [], [], [] | |
| for run in runs: | |
| # .summary contains the output keys/values for metrics like accuracy. | |
| # We call ._json_dict to omit large files | |
| summary_list.append(run.summary._json_dict) | |
| # .config contains the hyperparameters. | |
| # We remove special values that start with _. | |
| config_list.append(run.config) | |
| # .name is the human-readable name of the run. | |
| name_list.append(run.name) | |
| summary_df = pd.json_normalize(summary_list, max_level=1) | |
| config_df = pd.json_normalize(config_list, max_level=2) | |
| runs_df = pd.concat([summary_df, config_df], axis=1) | |
| runs_df.index = name_list | |
| return runs_df | |
| def get_leaderboard(runs_df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame: | |
| leaderboard = pd.DataFrame(index=runs_df["model"].unique(), columns=metrics).fillna( | |
| 0 | |
| ) | |
| for _, building_df in runs_df.groupby("unique_id"): | |
| for column in leaderboard.columns: | |
| best_model = building_df.loc[building_df[column].idxmin()].model | |
| leaderboard.loc[best_model, column] += 1 | |
| leaderboard = leaderboard.sort_values(by=list(leaderboard.columns), ascending=False) | |
| return leaderboard | |
| def get_model_ranks(runs_df: pd.DataFrame, metric: str) -> pd.DataFrame: | |
| return ( | |
| runs_df.groupby(["model"]) | |
| .median(numeric_only=True) | |
| .sort_values(by=metric) | |
| .reset_index() | |
| .rename_axis("rank") | |
| .reset_index()[["rank", "model"]] | |
| ) | |