File size: 5,861 Bytes
e049981
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""Audio mixing and effects for song generation."""

import numpy as np
import soundfile as sf
from pathlib import Path
from typing import Tuple, Optional
import subprocess
import sys


class AudioMixer:
    """Mixes vocals with instrumental and applies effects."""
    
    @staticmethod
    def normalize_audio(audio: np.ndarray, target_db: float = -3.0) -> np.ndarray:
        """
        Normalize audio to target dB level.
        
        Args:
            audio: Audio array
            target_db: Target peak level in dB (default -3dB is professional standard)
            
        Returns:
            Normalized audio
        """
        # Convert dB to linear
        target_linear = 10 ** (target_db / 20.0)
        
        # Find current peak
        current_peak = np.max(np.abs(audio))
        
        if current_peak > 0:
            # Scale to target
            audio = audio * (target_linear / current_peak)
        
        # Clip to prevent distortion
        audio = np.clip(audio, -1.0, 1.0)
        
        return audio
    
    @staticmethod
    def add_reverb(audio: np.ndarray, sr: int = 16000, room_scale: float = 0.3, 
                   delay_ms: float = 50) -> np.ndarray:
        """
        Add simple reverb effect.
        
        Args:
            audio: Input audio
            sr: Sample rate
            room_scale: Reverb amount (0-1)
            delay_ms: Delay in milliseconds
            
        Returns:
            Audio with reverb
        """
        delay_samples = int((delay_ms / 1000.0) * sr)
        
        # Create delayed version
        delayed = np.zeros_like(audio)
        if delay_samples < len(audio):
            delayed[delay_samples:] = audio[:-delay_samples]
        
        # Mix original with delayed
        reverb = audio + room_scale * delayed
        
        return reverb
    
    @staticmethod
    def compress_audio(audio: np.ndarray, threshold: float = 0.6, ratio: float = 4.0) -> np.ndarray:
        """
        Apply dynamic range compression.
        
        Args:
            audio: Input audio
            threshold: Compression threshold (0-1)
            ratio: Compression ratio
            
        Returns:
            Compressed audio
        """
        # Simple peak compression
        abs_audio = np.abs(audio)
        
        # Find samples above threshold
        mask = abs_audio > threshold
        
        # Apply compression to loud parts
        audio[mask] = np.sign(audio[mask]) * (threshold + (abs_audio[mask] - threshold) / ratio)
        
        return audio
    
    @staticmethod
    def mix_audio(vocal: np.ndarray, instrumental: np.ndarray, 
                  vocal_level: float = 0.7, instrumental_level: float = 0.3,
                  add_reverb: bool = True, add_compression: bool = True,
                  sr: int = 16000) -> np.ndarray:
        """
        Mix vocals and instrumental with effects.
        
        Args:
            vocal: Vocal audio
            instrumental: Instrumental audio
            vocal_level: Vocal volume level (0-1)
            instrumental_level: Instrumental volume level (0-1)
            add_reverb: Whether to add reverb to vocals
            add_compression: Whether to add compression
            sr: Sample rate
            
        Returns:
            Mixed audio
        """
        print("[AudioMixer] Normalizing tracks...")
        
        # Normalize individual tracks
        vocal = AudioMixer.normalize_audio(vocal, -6.0)  # Vocals a bit quieter initially
        instrumental = AudioMixer.normalize_audio(instrumental, -6.0)
        
        print("[AudioMixer] Adding effects...")
        
        # Add reverb to vocals
        if add_reverb:
            vocal = AudioMixer.add_reverb(vocal, sr, room_scale=0.2, delay_ms=40)
        
        # Apply compression
        if add_compression:
            vocal = AudioMixer.compress_audio(vocal, threshold=0.5, ratio=3.0)
        
        print("[AudioMixer] Mixing tracks...")
        
        # Ensure same length
        min_len = min(len(vocal), len(instrumental))
        vocal = vocal[:min_len]
        instrumental = instrumental[:min_len]
        
        # Mix with specified levels
        mixed = vocal_level * vocal + instrumental_level * instrumental
        
        # Normalize final mix
        mixed = AudioMixer.normalize_audio(mixed, -3.0)
        
        print(f"[AudioMixer] Mix complete - Peak: {np.max(np.abs(mixed)):.4f}")
        
        return mixed
    
    @staticmethod
    def save_audio(audio: np.ndarray, output_path: Path, sr: int = 16000) -> None:
        """
        Save audio to file.
        
        Args:
            audio: Audio array
            output_path: Output file path
            sr: Sample rate
        """
        output_path = Path(output_path)
        output_path.parent.mkdir(parents=True, exist_ok=True)
        
        print(f"[AudioMixer] Saving to {output_path}")
        sf.write(output_path, audio, sr)
        print(f"[AudioMixer] Saved successfully")
    
    @staticmethod
    def mix_and_save(vocal: np.ndarray, instrumental: np.ndarray, 
                     output_path: Path, sr: int = 16000,
                     add_effects: bool = True) -> Path:
        """
        Mix audio and save to file.
        
        Args:
            vocal: Vocal audio
            instrumental: Instrumental audio
            output_path: Output file path
            sr: Sample rate
            add_effects: Whether to add effects
            
        Returns:
            Output file path
        """
        mixed = AudioMixer.mix_audio(
            vocal, instrumental,
            add_reverb=add_effects,
            add_compression=add_effects,
            sr=sr
        )
        
        AudioMixer.save_audio(mixed, output_path, sr)
        
        return Path(output_path)