File size: 10,433 Bytes
a117a90
1271c6c
 
 
c5a6ba8
 
 
1271c6c
a117a90
1271c6c
c5a6ba8
03a67e2
a117a90
 
 
1271c6c
a117a90
 
 
1271c6c
c5a6ba8
 
 
1271c6c
c5a6ba8
1271c6c
 
be33133
1271c6c
 
 
 
 
 
be33133
1271c6c
 
 
 
be33133
 
a117a90
 
 
c5a6ba8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a117a90
 
 
 
 
 
 
 
 
 
 
c5a6ba8
 
 
03a67e2
c5a6ba8
 
a117a90
c5a6ba8
a117a90
c5a6ba8
 
 
 
 
 
a117a90
 
c5a6ba8
 
 
a117a90
c5a6ba8
 
 
 
 
 
 
 
 
a117a90
c5a6ba8
a117a90
 
 
 
 
 
 
 
 
 
29b5808
a117a90
 
 
 
 
 
c5a6ba8
a117a90
 
 
 
29b5808
a117a90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5a6ba8
a117a90
 
 
 
 
 
 
 
c5a6ba8
 
 
a117a90
c5a6ba8
 
 
 
 
 
 
 
 
 
 
 
 
a117a90
c5a6ba8
 
 
a117a90
c5a6ba8
 
2c3ca13
c5a6ba8
 
 
 
 
 
 
 
 
 
 
a117a90
c5a6ba8
a117a90
 
 
 
 
c5a6ba8
a117a90
 
 
be33133
 
 
 
 
 
 
 
 
 
 
b6e535d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be33133
2c084dd
b6e535d
 
 
 
 
 
 
 
a117a90
 
 
32cc0d4
364e6b1
a117a90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6e535d
a117a90
 
b6e535d
 
a117a90
 
b6e535d
 
a117a90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
import gradio as gr
import numpy as np
import librosa
import librosa.display
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import pywt
import io
from PIL import Image
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
import json
import random
import plotly.express as px

# ================================
# CẤU HÌNH
# ================================
SAMPLE_RATE = 22050
MAX_DURATION = 5
TIME_STEPS = 20
USE_DENOISE = True

model = load_model("Huan_luyen_6_huhong.h5")

def load_scaler_from_json(filepath):
    with open(filepath, 'r') as f:
        data = json.load(f)
    scaler = StandardScaler()
    scaler.mean_ = np.array(data['mean_'])
    scaler.scale_ = np.array(data['scale_'])
    scaler.n_features_in_ = len(scaler.mean_)
    return scaler

scaler = load_scaler_from_json("scaler.json")

with open("label_map.json", "r") as f:
    label_map = json.load(f)
index_to_label = {v: k for k, v in label_map.items()}

# ================================
# HÀM TIỀN XỬ LÝ
# ================================
def denoise_wavelet(signal, wavelet='db8', level=4):
    coeffs = pywt.wavedec(signal, wavelet, level=level)
    sigma = np.median(np.abs(coeffs[-1])) / 0.6745
    uthresh = sigma * np.sqrt(2 * np.log(len(signal)))
    coeffs_denoised = [pywt.threshold(c, value=uthresh, mode='soft') for c in coeffs]
    return pywt.waverec(coeffs_denoised, wavelet)

def create_sequences(mfcc, time_steps=20):
    return np.array([mfcc[i:i+time_steps] for i in range(len(mfcc) - time_steps)])

def cat_2s_ngau_nhien(y, sr, duration=2):
    if len(y) < duration * sr:
        return y
    start = random.randint(0, len(y) - duration * sr)
    return y[start:start + duration * sr]

# ================================
# VẼ ẢNH (numpy array)
# ================================
def fig_to_numpy(fig):
    buf = io.BytesIO()
    fig.savefig(buf, format="png", dpi=90, bbox_inches="tight")
    buf.seek(0)
    img = Image.open(buf)
    plt.close(fig)
    return np.array(img)

def tao_anh_mel(file_path):
    y, sr = librosa.load(file_path, sr=None, mono=True)
    y = cat_2s_ngau_nhien(y, sr)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_dB = librosa.power_to_db(S, ref=np.max)
    fig, ax = plt.subplots(figsize=(6, 3))
    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', ax=ax, cmap='magma')
    ax.set_title("Phổ tần Mel", fontsize=10)
    return fig_to_numpy(fig)

def tao_wavelet_transform(file_path):
    y, sr = librosa.load(file_path, sr=None, mono=True)
    y = cat_2s_ngau_nhien(y, sr)
    coef, _ = pywt.cwt(y, scales=np.arange(1, 128), wavelet='morl', sampling_period=1/sr)
    fig, ax = plt.subplots(figsize=(6, 3))
    ax.imshow(np.abs(coef), extent=[0, len(y)/sr, 1, 128],
              cmap='plasma', aspect='auto', origin='lower')
    ax.set_title("Phổ sóng con (Wavelet)")
    ax.set_xlabel("Thời gian (s)")
    ax.set_ylabel("Tần số (scale)")
    return fig_to_numpy(fig)

def tao_waveform_image(file_path):
    y, sr = librosa.load(file_path, sr=None, mono=True)
    y = cat_2s_ngau_nhien(y, sr)
    fig, ax = plt.subplots(figsize=(6, 2.5))
    librosa.display.waveshow(y, sr=sr, ax=ax, color='steelblue')
    ax.set_title("Biểu đồ Sóng Âm (Waveform)")
    ax.set_xlabel("Thời gian (s)")
    ax.set_ylabel("Biên độ")
    return fig_to_numpy(fig)

def tao_waveform_denoise(file_path):
    y, sr = librosa.load(file_path, sr=None, mono=True)
    y = cat_2s_ngau_nhien(y, sr)
    y_denoised = denoise_wavelet(y)

    fig, ax = plt.subplots(3, 2, figsize=(10, 8))

    # 1. Waveform
    librosa.display.waveshow(y, sr=sr, ax=ax[0,0], color='red')
    ax[0,0].set_title("Waveform - Trước lọc")
    librosa.display.waveshow(y_denoised, sr=sr, ax=ax[0,1], color='green')
    ax[0,1].set_title("Waveform - Sau lọc")

    # 2. FFT
    freqs = np.fft.rfftfreq(len(y), 1/sr)
    fft_y = np.abs(np.fft.rfft(y))
    fft_y_denoised = np.abs(np.fft.rfft(y_denoised))

    ax[1,0].plot(freqs, fft_y, color='red')
    ax[1,0].set_xlim(0, 8000)
    ax[1,0].set_title("FFT - Trước lọc")

    ax[1,1].plot(freqs, fft_y_denoised, color='green')
    ax[1,1].set_xlim(0, 8000)
    ax[1,1].set_title("FFT - Sau lọc")

    # 3. Spectrogram
    D1 = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
    D2 = librosa.amplitude_to_db(np.abs(librosa.stft(y_denoised)), ref=np.max)

    img1 = librosa.display.specshow(D1, sr=sr, x_axis='time', y_axis='log', ax=ax[2,0], cmap="magma")
    ax[2,0].set_title("Spectrogram - Trước lọc")
    fig.colorbar(img1, ax=ax[2,0], format="%+2.0f dB")

    img2 = librosa.display.specshow(D2, sr=sr, x_axis='time', y_axis='log', ax=ax[2,1], cmap="magma")
    ax[2,1].set_title("Spectrogram - Sau lọc")
    fig.colorbar(img2, ax=ax[2,1], format="%+2.0f dB")

    plt.tight_layout()
    return fig_to_numpy(fig)

# ================================
# VẼ BIỂU ĐỒ Top-3 (Plotly)
# ================================
def ve_top3_chart(probs):
    labels = [index_to_label[i] for i in range(len(probs))]
    values = probs * 100
    top_idx = np.argsort(values)[::-1][:3]
    fig = px.pie(
        values=[values[i] for i in top_idx],
        names=[labels[i] for i in top_idx],
        title="Top-3 dự đoán"
    )
    return fig

# ================================
# DỰ ĐOÁN
# ================================
def bao_san_sang(file_path):
    if not file_path:
        return "", None, None, None, None
    return (
        "<b style='color:green;'>✅ Âm thanh đã sẵn sàng. Nhấn kiểm tra ngay!</b>",
        tao_anh_mel(file_path),
        tao_wavelet_transform(file_path),
        tao_waveform_image(file_path),
        tao_waveform_denoise(file_path)
    )

def du_doan(file_path):
    if not file_path:
        return "<b style='color:red;'>❌ Chưa có âm thanh.</b>", None

    signal, sr = librosa.load(file_path, sr=SAMPLE_RATE, mono=True)
    signal, _ = librosa.effects.trim(signal)
    signal = librosa.util.fix_length(signal, size=SAMPLE_RATE * MAX_DURATION)

    if USE_DENOISE:
        signal = denoise_wavelet(signal)

    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13).T
    mfcc = scaler.transform(mfcc)
    X_input = create_sequences(mfcc, time_steps=TIME_STEPS)

    if len(X_input) == 0:
        return "<b style='color:red;'>⚠️ Âm thanh quá ngắn để phân tích.</b>", None

    y_preds = model.predict(X_input, verbose=0)
    avg_probs = np.mean(y_preds, axis=0)

    pred_index = np.argmax(avg_probs)
    confidence = avg_probs[pred_index] * 100
    pred_label = "HƯ HỎNG KHÁC" if confidence < 50 else index_to_label[pred_index]

    html = f"""<div style='background:#f0faff;color:#000;padding:10px;border-radius:10px'>
<b style='color:#000'>📋 Kết Quả:</b><br>
✅ <b style='color:#000'>Tình trạng:</b> <span style='color:#007acc;font-size:18px'>{pred_label.upper()}</span><br>
📊 <b style='color:#000'>Độ tin cậy:</b> <span style='color:#000'>{confidence:.2f}%</span><br>
<hr style='margin:6px 0'>
<b style='color:#000'>Xác suất từng lớp:</b><br>"""
    for i, prob in enumerate(avg_probs):
        html += f"<span style='color:#000'>- {index_to_label[i]}: {prob*100:.1f}%</span><br>"
    html += "</div>"

    return html, ve_top3_chart(avg_probs)

# ================================
# RESET
# ================================
def reset_output():
    return "", None, None, None, None, "", None

# ================================
# GIAO DIỆN
# ================================
with gr.Blocks(css="""
#check-btn {
    background: #007acc;
    color: white;
    height: 48px;
    font-size: 16px;
    font-weight: bold;
    border-radius: 10px;
}
""") as demo:

    gr.HTML("""
    <div style="
        display: flex;
        align-items: center;
        background-image: url('https://cdn-uploads.huggingface.co/production/uploads/6881f05ad0fc87fca019ee65/t7NwSiUHpjoFXh1S10MT4.png');
        background-repeat: no-repeat;
        background-size: 100px 40px;
        background-position: 0px 0px;   
        padding-left: 60px;
        height: 50px;
        margin: 0;                     
    ">
    </div>
    """)

    gr.Markdown("""
    <div style='
        display: flex;
        justify-content: center;
        align-items: center;
        margin-top: -10px;
        margin-bottom: 10px;
        height: 40px;
    '>
        <h4 style='color:#007acc; font-size:20px; font-weight:bold; margin: 0;'>
                  CHẨN ĐOÁN HƯ HỎNG TỪ ÂM THANH ĐỘNG CƠ 
        </h4>
    </div>
    """)

    with gr.Row():
        audio_file = gr.Audio(type="filepath", label="📂 Tải File Âm Thanh", interactive=True)
        audio_mic = gr.Audio(type="filepath", label="🎤 Ghi Âm", sources=["microphone"], interactive=True)

    thong_bao_ready = gr.HTML()
    btn_check = gr.Button("🔍 KIỂM TRA NGAY", elem_id="check-btn")
    output_html = gr.HTML()

    with gr.Accordion("📊 Phân tích Âm Thanh", open=False):
        mel_output = gr.Image(label="Mel Spectrogram", type="numpy")
        wavelet_output = gr.Image(label="Wavelet Transform", type="numpy")
        waveform_output = gr.Image(label="Waveform", type="numpy")
        waveform_denoise_output = gr.Image(label="So sánh", type="numpy")
        top3_chart = gr.Plot(label="Top 3 dự đoán")

    # --- Upload/ghi âm → chỉ báo sẵn sàng + vẽ ảnh
    audio_file.change(
        fn=bao_san_sang,
        inputs=audio_file,
        outputs=[thong_bao_ready, mel_output, wavelet_output, waveform_output, waveform_denoise_output]
    )

    audio_mic.change(
        fn=bao_san_sang,
        inputs=audio_mic,
        outputs=[thong_bao_ready, mel_output, wavelet_output, waveform_output, waveform_denoise_output]
    )

    # --- Nút kiểm tra → chỉ dự đoán
    btn_check.click(
        fn=lambda f1, f2: du_doan(f1 if f1 else f2),
        inputs=[audio_file, audio_mic],
        outputs=[output_html, top3_chart]
    )

    # --- Reset khi clear
    audio_file.clear(fn=reset_output, outputs=[
        thong_bao_ready, mel_output, wavelet_output, waveform_output, waveform_denoise_output,
        output_html, top3_chart
    ])
    audio_mic.clear(fn=reset_output, outputs=[
        thong_bao_ready, mel_output, wavelet_output, waveform_output, waveform_denoise_output,
        output_html, top3_chart
    ])

demo.launch()