File size: 1,827 Bytes
a602628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from typing import Any, Dict, List


class DataframeMixin:
    """Display helpers for UI."""

    def get_samples_dataframe_data(self) -> List[List[Any]]:
        """Get samples data in a format suitable for Gradio DataFrame."""
        rows = []
        for i, sample in enumerate(self.samples):
            if sample.has_raw_lyrics():
                lyrics_status = "📝"
            elif sample.is_instrumental:
                lyrics_status = "🎵"
            else:
                lyrics_status = "-"

            rows.append(
                [
                    i,
                    sample.filename,
                    f"{sample.duration:.1f}s",
                    lyrics_status,
                    "✅" if sample.labeled else "❌",
                    sample.bpm or "-",
                    sample.keyscale or "-",
                    sample.caption[:50] + "..." if len(sample.caption) > 50 else sample.caption or "-",
                ]
            )
        return rows

    def to_training_format(self) -> List[Dict[str, Any]]:
        """Convert dataset to format suitable for training."""
        training_samples = []

        for sample in self.samples:
            if not sample.labeled:
                continue

            training_sample = {
                "audio_path": sample.audio_path,
                "caption": sample.get_full_caption(self.metadata.tag_position),
                "lyrics": sample.lyrics,
                "bpm": sample.bpm,
                "keyscale": sample.keyscale,
                "timesignature": sample.timesignature,
                "duration": sample.duration,
                "language": sample.language,
                "is_instrumental": sample.is_instrumental,
            }
            training_samples.append(training_sample)

        return training_samples