File size: 8,508 Bytes
31bf74c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
"""
Audio synthesis utilities for beat tracking evaluation.

This module provides functions to:
- Generate click sounds for beats and downbeats
- Mix click tracks with original audio
- Save audio files with beat annotations

Example usage:
    from exp.data.audio import create_click_track, mix_audio, save_audio

    # Create click track
    clicks = create_click_track(
        beat_times=pred_beats,
        downbeat_times=pred_downbeats,
        duration=30.0,
        sr=16000
    )

    # Mix with original audio
    mixed = mix_audio(original_audio, clicks, click_volume=0.5)

    # Save to file
    save_audio(mixed, "output.wav", sr=16000)
"""

import numpy as np
from pathlib import Path


def generate_click(
    frequency: float = 1000.0,
    duration: float = 0.02,
    sr: int = 16000,
    attack: float = 0.002,
    decay: float = 0.018,
) -> np.ndarray:
    """
    Generate a single click sound.

    Args:
        frequency: Frequency of the click tone in Hz
        duration: Duration of the click in seconds
        sr: Sample rate
        attack: Attack time in seconds
        decay: Decay time in seconds

    Returns:
        Click waveform as numpy array
    """
    t = np.arange(int(duration * sr)) / sr

    # Generate sine wave
    wave = np.sin(2 * np.pi * frequency * t)

    # Apply envelope (attack-decay)
    envelope = np.ones_like(t)
    attack_samples = int(attack * sr)
    decay_samples = int(decay * sr)

    if attack_samples > 0:
        envelope[:attack_samples] = np.linspace(0, 1, attack_samples)
    if decay_samples > 0:
        decay_start = len(t) - decay_samples
        if decay_start > 0:
            envelope[decay_start:] = np.linspace(1, 0, decay_samples)

    return wave * envelope


def create_click_track(
    beat_times: list[float] | np.ndarray,
    downbeat_times: list[float] | np.ndarray | None = None,
    duration: float | None = None,
    sr: int = 16000,
    beat_freq: float = 1000.0,
    downbeat_freq: float = 1500.0,
    click_duration: float = 0.03,
) -> np.ndarray:
    """
    Create a click track from beat and downbeat times.

    Args:
        beat_times: List of beat times in seconds
        downbeat_times: List of downbeat times in seconds (optional)
        duration: Total duration in seconds (auto-detected if None)
        sr: Sample rate
        beat_freq: Frequency for beat clicks (Hz)
        downbeat_freq: Frequency for downbeat clicks (Hz)
        click_duration: Duration of each click in seconds

    Returns:
        Click track as numpy array
    """
    beat_times = np.array(beat_times) if len(beat_times) > 0 else np.array([])
    if downbeat_times is not None:
        downbeat_times = (
            np.array(downbeat_times) if len(downbeat_times) > 0 else np.array([])
        )
    else:
        downbeat_times = np.array([])

    # Determine duration
    if duration is None:
        all_times = np.concatenate([beat_times, downbeat_times])
        if len(all_times) == 0:
            return np.array([])
        duration = float(np.max(all_times)) + 1.0

    # Create output array
    total_samples = int(duration * sr)
    output = np.zeros(total_samples, dtype=np.float32)

    # Generate click templates
    beat_click = generate_click(frequency=beat_freq, duration=click_duration, sr=sr)
    downbeat_click = generate_click(
        frequency=downbeat_freq, duration=click_duration, sr=sr
    )

    # Convert downbeat times to set for fast lookup
    downbeat_set = set(np.round(downbeat_times, 3))

    # Add beat clicks
    for t in beat_times:
        sample_idx = int(t * sr)
        if sample_idx < 0 or sample_idx >= total_samples:
            continue

        # Use downbeat click if this is also a downbeat
        is_downbeat = np.round(t, 3) in downbeat_set
        click = downbeat_click if is_downbeat else beat_click

        # Add click to output
        end_idx = min(sample_idx + len(click), total_samples)
        click_len = end_idx - sample_idx
        output[sample_idx:end_idx] += click[:click_len]

    # Add downbeat clicks (for downbeats not already in beats)
    beat_set = set(np.round(beat_times, 3))
    for t in downbeat_times:
        if np.round(t, 3) in beat_set:
            continue  # Already added as beat

        sample_idx = int(t * sr)
        if sample_idx < 0 or sample_idx >= total_samples:
            continue

        end_idx = min(sample_idx + len(downbeat_click), total_samples)
        click_len = end_idx - sample_idx
        output[sample_idx:end_idx] += downbeat_click[:click_len]

    return output


def mix_audio(
    audio: np.ndarray,
    click_track: np.ndarray,
    click_volume: float = 0.5,
) -> np.ndarray:
    """
    Mix original audio with a click track.

    Args:
        audio: Original audio waveform
        click_track: Click track to overlay
        click_volume: Volume of clicks relative to audio (0.0 to 1.0)

    Returns:
        Mixed audio
    """
    # Ensure same length
    max_len = max(len(audio), len(click_track))
    audio_padded = np.zeros(max_len, dtype=np.float32)
    click_padded = np.zeros(max_len, dtype=np.float32)

    audio_padded[: len(audio)] = audio
    click_padded[: len(click_track)] = click_track

    # Normalize audio
    audio_max = np.abs(audio_padded).max()
    if audio_max > 0:
        audio_padded = audio_padded / audio_max * 0.8

    # Normalize clicks
    click_max = np.abs(click_padded).max()
    if click_max > 0:
        click_padded = click_padded / click_max * click_volume * 0.8

    # Mix
    mixed = audio_padded + click_padded

    # Prevent clipping
    max_val = np.abs(mixed).max()
    if max_val > 1.0:
        mixed = mixed / max_val * 0.95

    return mixed.astype(np.float32)


def create_comparison_audio(
    audio: np.ndarray,
    pred_beats: list[float],
    pred_downbeats: list[float],
    gt_beats: list[float],
    gt_downbeats: list[float],
    sr: int = 16000,
    click_volume: float = 0.5,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Create audio files for comparison: prediction clicks, ground truth clicks, and combined.

    Args:
        audio: Original audio waveform
        pred_beats: Predicted beat times
        pred_downbeats: Predicted downbeat times
        gt_beats: Ground truth beat times
        gt_downbeats: Ground truth downbeat times
        sr: Sample rate
        click_volume: Volume of clicks

    Returns:
        Tuple of (audio_with_pred_clicks, audio_with_gt_clicks, audio_with_both)
    """
    duration = len(audio) / sr

    # Create click tracks
    pred_clicks = create_click_track(
        pred_beats,
        pred_downbeats,
        duration=duration,
        sr=sr,
        beat_freq=1000.0,
        downbeat_freq=1500.0,
    )

    gt_clicks = create_click_track(
        gt_beats,
        gt_downbeats,
        duration=duration,
        sr=sr,
        beat_freq=800.0,  # Different frequency for GT
        downbeat_freq=1200.0,
    )

    # Mix
    audio_pred = mix_audio(audio, pred_clicks, click_volume)
    audio_gt = mix_audio(audio, gt_clicks, click_volume)
    audio_both = mix_audio(audio, pred_clicks + gt_clicks, click_volume)

    return audio_pred, audio_gt, audio_both


def save_audio(
    audio: np.ndarray,
    path: str | Path,
    sr: int = 16000,
) -> None:
    """
    Save audio to a WAV file.

    Args:
        audio: Audio waveform
        path: Output file path
        sr: Sample rate
    """
    import scipy.io.wavfile as wavfile

    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    # Convert to int16
    audio_int16 = (audio * 32767).astype(np.int16)
    wavfile.write(str(path), sr, audio_int16)


if __name__ == "__main__":
    # Demo
    print("Audio synthesis demo...")

    # Create a simple sine wave as "music"
    sr = 16000
    duration = 10.0
    t = np.arange(int(duration * sr)) / sr
    music = np.sin(2 * np.pi * 220 * t) * 0.3  # 220 Hz tone

    # Beats every 0.5s, downbeats every 2s
    beats = np.arange(0, duration, 0.5).tolist()
    downbeats = np.arange(0, duration, 2.0).tolist()

    # Create click track
    clicks = create_click_track(beats, downbeats, duration=duration, sr=sr)

    # Mix
    mixed = mix_audio(music, clicks, click_volume=0.6)

    print(f"Created mixed audio: {len(mixed)} samples ({len(mixed) / sr:.2f}s)")
    print(f"Beats: {len(beats)}, Downbeats: {len(downbeats)}")

    # Save demo
    save_audio(mixed, "/tmp/beat_click_demo.wav", sr=sr)
    print("Saved demo to /tmp/beat_click_demo.wav")