Spaces:
Running
Running
| """ | |
| Copyright 2025 Balacoon | |
| Utils to get data to populate leaderboard. | |
| Communicates with `balacoon/speech_gen_baselines` dataset on Hugging Face, | |
| that contains evaluation results of different speech generation systems. | |
| """ | |
| import yaml | |
| import logging | |
| import requests | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_url | |
| from huggingface_hub.hf_api import RepoFolder | |
| from api import api | |
| def make_pretty(df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Rename some columns | |
| """ | |
| df["whisperv3_cer"] = df["whisperv3_cer"] * 100 | |
| rename_map = { | |
| "whisperv3_cer": "WhisperV3 CER, %β", | |
| "utmos_mos": "UTMOS MOS, β", | |
| "aesthetics_enjoyment": "Enjoyment, β", | |
| "aesthetics_usefullness": "Usefulness, β", | |
| "aesthetics_complexity": "Complexity, β", | |
| "aesthetics_quality": "Quality, β", | |
| "ecapa_secs": "ECAPA SECS, β", | |
| "redimnet_secs": "ReDimNet SECS β" | |
| } | |
| description_map = { | |
| "whisperv3_cer": "Character Error Rate in %, measured with [WhisperV3](https://huggingface.co/openai/whisper-large-v3-turbo)", | |
| "utmos_mos": "Mean Opinion Score, showing how natural the speech is, measured with [UMTOS](https://huggingface.co/balacoon/utmos).", | |
| "ecapa_secs": "Speaker Embedding Cosine Similarity between reference audio and generated speech, measured with [ECAPA](https://huggingface.co/balacoon/ecapa).", | |
| "redimnet_secs": "Speaker Embedding Cosine Similarity between reference audio and generated speech, measured with [ReDimNet](https://github.com/IDRnD/redimnet).", | |
| } | |
| # compose a description for columns | |
| description = "" | |
| for k in df.columns: | |
| if k in description_map: | |
| description += f"* {description_map[k]}\n" | |
| if any("aesthetics_" in k for k in df.columns): | |
| description += "* Enjoyment / Usefulness / Complexity / Quality are Aesthetics metrics, measured with [audiobox-aesthetics](https://github.com/facebookresearch/audiobox-aesthetics).\n" | |
| # Only rename columns that exist in the dataframe | |
| existing_columns = {k: v for k, v in rename_map.items() if k in df.columns} | |
| df = df.rename(columns=existing_columns) | |
| return df, description | |
| def get_leaderboard_data(system_type: str, dataset: str) -> pd.DataFrame: | |
| """ | |
| Fetches metrics.yaml for all systems of given type if they have evaluation for the given dataset. | |
| Returns a DataFrame with metrics per system. | |
| """ | |
| # Get all models under the system type | |
| models_tree = api.list_repo_tree( | |
| "balacoon/speech_gen_baselines", | |
| repo_type="dataset", | |
| path_in_repo=system_type, | |
| recursive=False | |
| ) | |
| model_dirs = [item.path for item in models_tree if isinstance(item, RepoFolder)] | |
| # Collect metrics for each model that has the dataset | |
| metrics_data = [] | |
| for model_dir in model_dirs: | |
| model_name = model_dir.split('/')[-1] | |
| metrics_path = f"{model_dir}/{dataset}/metrics.yaml" | |
| try: | |
| url = hf_hub_url( | |
| repo_id="balacoon/speech_gen_baselines", | |
| filename=metrics_path, | |
| repo_type="dataset" | |
| ) | |
| response = requests.get(url) | |
| metrics_content = yaml.safe_load(response.text) | |
| if "metrics" not in metrics_content: | |
| logging.error(f"`metrics` are missing from metrics.yaml ({metrics_path})") | |
| continue | |
| # prepare a row for the table | |
| if "model_name" in metrics_content: | |
| # overwrite model name | |
| model_name = metrics_content["model_name"] | |
| # add a link to a model if it is provided | |
| if "link" in metrics_content: | |
| model_name = f"[{model_name}]({metrics_content['link']})" | |
| row = {"Model": model_name} | |
| # Round all metric values to 4 decimal places | |
| rounded_metrics = {k: float(f"{float(v):.4f}") for k, v in metrics_content["metrics"].items()} | |
| row.update(rounded_metrics) | |
| metrics_data.append(row) | |
| except: | |
| # Skip if metrics.yaml doesn't exist for this model/dataset | |
| continue | |
| df = pd.DataFrame(metrics_data) | |
| # Remove 'aesthetics_' prefix from column names where applicable | |
| df, description = make_pretty(df) | |
| # compose datatypes for the table: markdown for model name, and number for all other columns | |
| datatypes = ["markdown"] + ["number"] * (len(df.columns) - 1) | |
| return df, datatypes, description | |