Spaces:

Gamahea
/

ACE-Step-Custom

Running on Zero

File size: 4,743 Bytes

a602628

from typing import Optional, Tuple

from loguru import logger

from .label_utils import get_audio_codes, parse_int
from .models import AudioSample


class LabelSingleMixin:
    """Label a single sample."""

    def label_sample(
        self,
        sample_idx: int,
        dit_handler,
        llm_handler,
        format_lyrics: bool = False,
        transcribe_lyrics: bool = False,
        skip_metas: bool = False,
        progress_callback=None,
    ) -> Tuple[AudioSample, str]:
        """Label a single sample using the LLM."""
        if sample_idx < 0 or sample_idx >= len(self.samples):
            return None, f"❌ Invalid sample index: {sample_idx}"

        sample = self.samples[sample_idx]

        has_preloaded_lyrics = sample.has_raw_lyrics() and not sample.is_instrumental
        has_csv_bpm = sample.bpm is not None
        has_csv_key = bool(sample.keyscale)

        try:
            if progress_callback:
                progress_callback(f"Processing: {sample.filename}")

            audio_codes = get_audio_codes(sample.audio_path, dit_handler)

            if not audio_codes:
                return sample, f"❌ Failed to encode audio: {sample.filename}"

            if progress_callback:
                progress_callback(f"Generating metadata for: {sample.filename}")

            if format_lyrics and has_preloaded_lyrics:
                from acestep.inference import format_sample

                result = format_sample(
                    llm_handler=llm_handler,
                    caption="",
                    lyrics=sample.raw_lyrics,
                    user_metadata=None,
                    temperature=0.85,
                    use_constrained_decoding=True,
                )

                if not result.success:
                    return sample, f"❌ LLM format failed: {result.error}"

                sample.caption = result.caption or ""
                if not skip_metas:
                    if not has_csv_bpm:
                        sample.bpm = result.bpm
                    if not has_csv_key:
                        sample.keyscale = result.keyscale or ""
                    sample.timesignature = result.timesignature or ""
                sample.language = result.language or "unknown"
                sample.formatted_lyrics = result.lyrics or ""
                sample.lyrics = sample.formatted_lyrics if sample.formatted_lyrics else sample.raw_lyrics

                status_suffix = "(lyrics formatted by LM)"

            else:
                metadata, status = llm_handler.understand_audio_from_codes(
                    audio_codes=audio_codes,
                    temperature=0.7,
                    use_constrained_decoding=True,
                )

                if not metadata:
                    return sample, f"❌ LLM labeling failed: {status}"

                sample.caption = metadata.get("caption", "")
                sample.genre = metadata.get("genres", "")

                if not skip_metas:
                    if not has_csv_bpm:
                        sample.bpm = parse_int(metadata.get("bpm"))
                    if not has_csv_key:
                        sample.keyscale = metadata.get("keyscale", "")
                    sample.timesignature = metadata.get("timesignature", "")

                sample.language = metadata.get("vocal_language", "unknown")

                llm_lyrics = metadata.get("lyrics", "")

                if sample.is_instrumental:
                    sample.lyrics = "[Instrumental]"
                    sample.language = "unknown"
                    sample.formatted_lyrics = ""
                    status_suffix = "(instrumental)"
                elif transcribe_lyrics:
                    sample.formatted_lyrics = llm_lyrics
                    sample.lyrics = llm_lyrics
                    status_suffix = "(lyrics transcribed by LM)"
                elif has_preloaded_lyrics:
                    sample.lyrics = sample.raw_lyrics
                    sample.formatted_lyrics = ""
                    status_suffix = "(using raw lyrics)"
                else:
                    sample.lyrics = llm_lyrics
                    sample.formatted_lyrics = llm_lyrics
                    status_suffix = ""

            sample.labeled = True
            self.samples[sample_idx] = sample

            status_msg = f"✅ Labeled: {sample.filename}"
            if skip_metas:
                status_msg += " (skip metas)"
            if status_suffix:
                status_msg += f" {status_suffix}"

            return sample, status_msg

        except Exception as e:
            logger.exception(f"Error labeling sample {sample.filename}")
            return sample, f"❌ Error: {str(e)}"