File size: 2,843 Bytes
8bd48f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""
Audio processing utilities
"""
import numpy as np
import librosa
import soundfile as sf
from typing import Tuple, Optional
import logging

logger = logging.getLogger(__name__)

class AudioProcessor:
    def __init__(self):
        self.sample_rate = 16000  # Default sample rate for models
    
    def load_audio(self, file_path: str) -> Tuple[np.ndarray, int]:
        """Load audio file and convert to appropriate format"""
        try:
            audio, sr = librosa.load(file_path, sr=self.sample_rate)
            return audio, sr
        except Exception as e:
            logger.error(f"Failed to load audio: {str(e)}")
            raise
    
    def save_audio(self, audio_array: np.ndarray, output_path: str, sample_rate: int = None):
        """Save audio array to file"""
        try:
            sr = sample_rate or self.sample_rate
            sf.write(output_path, audio_array, sr)
            logger.info(f"Audio saved to {output_path}")
        except Exception as e:
            logger.error(f"Failed to save audio: {str(e)}")
            raise
    
    def normalize_audio(self, audio: np.ndarray) -> np.ndarray:
        """Normalize audio to [-1, 1] range"""
        return audio / np.max(np.abs(audio))
    
    def trim_silence(self, audio: np.ndarray, threshold: float = 0.01) -> np.ndarray:
        """Remove silence from beginning and end"""
        return librosa.effects.trim(audio, top_db=20, frame_length=512, hop_length=256)[0]
    
    def change_speed(self, audio: np.ndarray, speed_factor: float) -> np.ndarray:
        """Change playback speed without changing pitch"""
        return librosa.effects.time_stretch(audio, rate=speed_factor)
    
    def change_pitch(self, audio: np.ndarray, n_steps: float) -> np.ndarray:
        """Change pitch by n semitones"""
        return librosa.effects.pitch_shift(audio, sr=self.sample_rate, n_steps=n_steps)
    
    def get_spectrogram(self, audio: np.ndarray) -> np.ndarray:
        """Generate spectrogram for visualization"""
        return librosa.stft(audio)
    
    def get_tempo(self, audio: np.ndarray) -> float:
        """Estimate tempo (BPM)"""
        tempo, _ = librosa.beat.beat_track(y=audio, sr=self.sample_rate)
        return tempo
    
    def apply_fade(self, audio: np.ndarray, fade_in: float = 0.1, fade_out: float = 0.1) -> np.ndarray:
        """Apply fade in/out"""
        fade_in_samples = int(fade_in * self.sample_rate)
        fade_out_samples = int(fade_out * self.sample_rate)
        
        if fade_in_samples > 0:
            fade_in_curve = np.linspace(0, 1, fade_in_samples)
            audio[:fade_in_samples] *= fade_in_curve
        
        if fade_out_samples > 0:
            fade_out_curve = np.linspace(1, 0, fade_out_samples)
            audio[-fade_out_samples:] *= fade_out_curve
        
        return audio