Spaces:

rikhoffbauer2
/

drum-sample-extractor

Sleeping

File size: 4,461 Bytes
"""
Config persistence: save/load optimized pipeline configs to HuggingFace Hub.

Each config is a JSON file stored in a dataset repo with:
  - Pipeline parameters
  - Evaluation scores (overall, SI-SDR, sample quality, etc.)
  - Metadata (author, timestamp, description)

The leaderboard ranks configs by their evaluation scores across test songs.
"""

import json, time, io, os
from dataclasses import dataclass, asdict
from typing import Optional


CONFIGS_REPO = "rikhoffbauer2/sample-extractor-configs"
CONFIGS_REPO_TYPE = "dataset"


@dataclass
class PipelineConfig:
    """Complete pipeline configuration with eval scores."""
    # Identity
    name: str = "default"
    description: str = ""
    author: str = ""
    timestamp: str = ""
    version: str = "1.0"

    # Pipeline params
    stem: str = "drums"
    onset_mode: str = "auto"
    pre_pad: float = 0.005
    min_dur: float = 0.02
    max_dur: float = 1.5
    min_gap: float = 0.015
    energy_threshold_db: float = -45.0
    separate_overlaps: bool = True
    overlap_threshold: float = 0.15
    synthesize: bool = True

    # Eval scores (filled after evaluation)
    overall_score: float = 0.0
    mean_si_sdr: float = 0.0
    mean_sample_score: float = 0.0
    mean_env_corr: float = 0.0
    mean_onset_error_ms: float = 50.0
    n_test_songs: int = 0

    def to_dict(self) -> dict:
        return asdict(self)

    @classmethod
    def from_dict(cls, d: dict) -> 'PipelineConfig':
        valid = cls.__dataclass_fields__.keys()
        return cls(**{k: v for k, v in d.items() if k in valid})

    def to_json(self) -> str:
        return json.dumps(self.to_dict(), indent=2)


def save_config(config: PipelineConfig, token: str = None) -> str:
    """Save a config to the HF dataset repo. Returns the file path in repo."""
    from huggingface_hub import HfApi

    api = HfApi(token=token)

    # Ensure repo exists
    try:
        api.create_repo(CONFIGS_REPO, repo_type=CONFIGS_REPO_TYPE, exist_ok=True)
    except Exception:
        pass

    config.timestamp = time.strftime('%Y-%m-%dT%H:%M:%SZ')
    path = f"configs/{config.name}.json"

    api.upload_file(
        path_or_fileobj=io.BytesIO(config.to_json().encode()),
        path_in_repo=path,
        repo_id=CONFIGS_REPO,
        repo_type=CONFIGS_REPO_TYPE,
        commit_message=f"Config: {config.name} (score={config.overall_score:.1f})",
    )
    print(f"  ✓ Saved config '{config.name}' → {CONFIGS_REPO}/{path}")
    return path


def load_config(name: str, token: str = None) -> PipelineConfig:
    """Load a config by name from the HF dataset repo."""
    from huggingface_hub import hf_hub_download

    path = hf_hub_download(
        repo_id=CONFIGS_REPO,
        filename=f"configs/{name}.json",
        repo_type=CONFIGS_REPO_TYPE,
        token=token,
    )
    with open(path) as f:
        return PipelineConfig.from_dict(json.load(f))


def list_configs(token: str = None) -> list:
    """List all available configs with their scores."""
    from huggingface_hub import HfApi, hf_hub_download

    api = HfApi(token=token)
    try:
        files = list(api.list_repo_files(CONFIGS_REPO, repo_type=CONFIGS_REPO_TYPE))
    except Exception:
        return []

    configs = []
    for f in files:
        if f.startswith("configs/") and f.endswith(".json"):
            try:
                path = hf_hub_download(repo_id=CONFIGS_REPO, filename=f,
                                        repo_type=CONFIGS_REPO_TYPE, token=token)
                with open(path) as fh:
                    cfg = PipelineConfig.from_dict(json.load(fh))
                    configs.append(cfg)
            except Exception:
                continue

    # Sort by score descending
    configs.sort(key=lambda c: c.overall_score, reverse=True)
    return configs


def get_leaderboard(token: str = None) -> list:
    """Get leaderboard as list of dicts for display."""
    configs = list_configs(token)
    return [
        {
            'Rank': i + 1,
            'Name': c.name,
            'Score': f"{c.overall_score:.1f}",
            'SI-SDR': f"{c.mean_si_sdr:.1f}",
            'Sample Q': f"{c.mean_sample_score:.1f}",
            'Env Corr': f"{c.mean_env_corr:.3f}",
            'Onset (ms)': f"{c.mean_onset_error_ms:.1f}",
            'Tests': c.n_test_songs,
            'Author': c.author,
            'Date': c.timestamp[:10] if c.timestamp else '',
        }
        for i, c in enumerate(configs)
    ]