File size: 2,920 Bytes
1886358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import torch
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt


class AudioFeatureExtractor:
    def __init__(self, wavfile, sr=16000, n_fft=1024, hop_length=51, n_mels=256):
        self.wavfile = wavfile
        self.target_sr = sr
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.n_mels = n_mels

        # ✅ โหลดเสียงด้วย librosa (resample อัตโนมัติ)
        waveform, _ = librosa.load(self.wavfile, sr=self.target_sr)
        waveform = torch.tensor(waveform).unsqueeze(0)
        self.waveform = waveform
        self.sr = self.target_sr

    def get_spectrogram(self, to_db=True):
        """สร้าง spectrogram แบบธรรมดา"""
        spec = np.abs(librosa.stft(
            self.waveform.squeeze(0).numpy(),
            n_fft=self.n_fft,
            hop_length=self.hop_length
        )) ** 2
        if to_db:
            spec = librosa.power_to_db(spec, ref=np.max)
        return spec

    def get_melspectrogram(self):
        """สร้าง Mel-spectrogram"""
        mel_spec = librosa.feature.melspectrogram(
            y=self.waveform.squeeze(0).numpy(),
            sr=self.sr,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            power=2.0
        )
        mel_db = librosa.power_to_db(mel_spec, ref=np.max)
        return mel_db

    def normalize(self, spec):
        """ปรับค่าสีให้อยู่ในช่วง 0–1"""
        spec_min, spec_max = spec.min(), spec.max()
        return (spec - spec_min) / (spec_max - spec_min + 1e-6)

    def to_grayscale(self, spec):
        """แปลงให้เป็น 1-channel"""
        return np.expand_dims(spec, axis=0)

    def get_normalized_melspec(self):
        mel_db = self.get_melspectrogram()
        mel_norm = self.normalize(mel_db)
        return self.to_grayscale(mel_norm)

    def plot_melspectrogram(self, save_path=None):
        mel_db = self.get_melspectrogram()
        plt.figure(figsize=(10, 4))
        librosa.display.specshow(mel_db, sr=self.sr, hop_length=self.hop_length, cmap="viridis")
        plt.axis("off")
        plt.tight_layout()
        if save_path:
            plt.savefig(save_path, bbox_inches="tight", pad_inches=0)
            plt.close()
        else:
            plt.show()

    def save_melspectrogram(self, out_path="melspec.png"):
        melspec = self.get_melspectrogram()
        plt.figure(figsize=(10, 4))
        import librosa.display

        librosa.display.specshow(melspec, sr=self.sr, hop_length=self.hop_length)
        plt.axis("off")
        plt.tight_layout()
        plt.savefig(out_path, bbox_inches="tight", pad_inches=0)
        plt.close()
        return out_path