Spaces:

balacoon
/

TTSLeaderboard

Running

App Files Files Community

TTSLeaderboard / leaderboard_data.py

clementruhm

leaderboard_data: add legend for ReDimNet SECS evaluation

4599dce about 1 year ago

raw

history blame contribute delete

4.54 kB

	"""
	Copyright 2025 Balacoon

	Utils to get data to populate leaderboard.
	Communicates with `balacoon/speech_gen_baselines` dataset on Hugging Face,
	that contains evaluation results of different speech generation systems.
	"""

	import yaml
	import logging
	import requests

	import pandas as pd
	from huggingface_hub import hf_hub_url
	from huggingface_hub.hf_api import RepoFolder

	from api import api


	def make_pretty(df: pd.DataFrame) -> pd.DataFrame:
	"""
	Rename some columns
	"""
	df["whisperv3_cer"] = df["whisperv3_cer"] * 100
	rename_map = {
	"whisperv3_cer": "WhisperV3 CER, %↓",
	"utmos_mos": "UTMOS MOS, ↑",
	"aesthetics_enjoyment": "Enjoyment, ↑",
	"aesthetics_usefullness": "Usefulness, ↑",
	"aesthetics_complexity": "Complexity, ↑",
	"aesthetics_quality": "Quality, ↑",
	"ecapa_secs": "ECAPA SECS, ↑",
	"redimnet_secs": "ReDimNet SECS ↑"
	}
	description_map = {
	"whisperv3_cer": "Character Error Rate in %, measured with [WhisperV3](https://huggingface.co/openai/whisper-large-v3-turbo)",
	"utmos_mos": "Mean Opinion Score, showing how natural the speech is, measured with [UMTOS](https://huggingface.co/balacoon/utmos).",
	"ecapa_secs": "Speaker Embedding Cosine Similarity between reference audio and generated speech, measured with [ECAPA](https://huggingface.co/balacoon/ecapa).",
	"redimnet_secs": "Speaker Embedding Cosine Similarity between reference audio and generated speech, measured with [ReDimNet](https://github.com/IDRnD/redimnet).",
	}
	# compose a description for columns
	description = ""
	for k in df.columns:
	if k in description_map:
	description += f"* {description_map[k]}\n"
	if any("aesthetics_" in k for k in df.columns):
	description += "* Enjoyment / Usefulness / Complexity / Quality are Aesthetics metrics, measured with [audiobox-aesthetics](https://github.com/facebookresearch/audiobox-aesthetics).\n"

	# Only rename columns that exist in the dataframe
	existing_columns = {k: v for k, v in rename_map.items() if k in df.columns}
	df = df.rename(columns=existing_columns)
	return df, description


	def get_leaderboard_data(system_type: str, dataset: str) -> pd.DataFrame:
	"""
	Fetches metrics.yaml for all systems of given type if they have evaluation for the given dataset.
	Returns a DataFrame with metrics per system.
	"""
	# Get all models under the system type
	models_tree = api.list_repo_tree(
	"balacoon/speech_gen_baselines",
	repo_type="dataset",
	path_in_repo=system_type,
	recursive=False
	)
	model_dirs = [item.path for item in models_tree if isinstance(item, RepoFolder)]

	# Collect metrics for each model that has the dataset
	metrics_data = []
	for model_dir in model_dirs:
	model_name = model_dir.split('/')[-1]
	metrics_path = f"{model_dir}/{dataset}/metrics.yaml"
	try:
	url = hf_hub_url(
	repo_id="balacoon/speech_gen_baselines",
	filename=metrics_path,
	repo_type="dataset"
	)
	response = requests.get(url)
	metrics_content = yaml.safe_load(response.text)
	if "metrics" not in metrics_content:
	logging.error(f"`metrics` are missing from metrics.yaml ({metrics_path})")
	continue
	# prepare a row for the table
	if "model_name" in metrics_content:
	# overwrite model name
	model_name = metrics_content["model_name"]
	# add a link to a model if it is provided
	if "link" in metrics_content:
	model_name = f"[{model_name}]({metrics_content['link']})"
	row = {"Model": model_name}
	# Round all metric values to 4 decimal places
	rounded_metrics = {k: float(f"{float(v):.4f}") for k, v in metrics_content["metrics"].items()}
	row.update(rounded_metrics)
	metrics_data.append(row)
	except:
	# Skip if metrics.yaml doesn't exist for this model/dataset
	continue
	df = pd.DataFrame(metrics_data)
	# Remove 'aesthetics_' prefix from column names where applicable
	df, description = make_pretty(df)
	# compose datatypes for the table: markdown for model name, and number for all other columns
	datatypes = ["markdown"] + ["number"] * (len(df.columns) - 1)
	return df, datatypes, description