Spaces:
Build error
Build error
File size: 10,433 Bytes
a117a90 1271c6c c5a6ba8 1271c6c a117a90 1271c6c c5a6ba8 03a67e2 a117a90 1271c6c a117a90 1271c6c c5a6ba8 1271c6c c5a6ba8 1271c6c be33133 1271c6c be33133 1271c6c be33133 a117a90 c5a6ba8 a117a90 c5a6ba8 03a67e2 c5a6ba8 a117a90 c5a6ba8 a117a90 c5a6ba8 a117a90 c5a6ba8 a117a90 c5a6ba8 a117a90 c5a6ba8 a117a90 29b5808 a117a90 c5a6ba8 a117a90 29b5808 a117a90 c5a6ba8 a117a90 c5a6ba8 a117a90 c5a6ba8 a117a90 c5a6ba8 a117a90 c5a6ba8 2c3ca13 c5a6ba8 a117a90 c5a6ba8 a117a90 c5a6ba8 a117a90 be33133 b6e535d be33133 2c084dd b6e535d a117a90 32cc0d4 364e6b1 a117a90 b6e535d a117a90 b6e535d a117a90 b6e535d a117a90 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 | import gradio as gr
import numpy as np
import librosa
import librosa.display
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import pywt
import io
from PIL import Image
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
import json
import random
import plotly.express as px
# ================================
# CẤU HÌNH
# ================================
SAMPLE_RATE = 22050
MAX_DURATION = 5
TIME_STEPS = 20
USE_DENOISE = True
model = load_model("Huan_luyen_6_huhong.h5")
def load_scaler_from_json(filepath):
with open(filepath, 'r') as f:
data = json.load(f)
scaler = StandardScaler()
scaler.mean_ = np.array(data['mean_'])
scaler.scale_ = np.array(data['scale_'])
scaler.n_features_in_ = len(scaler.mean_)
return scaler
scaler = load_scaler_from_json("scaler.json")
with open("label_map.json", "r") as f:
label_map = json.load(f)
index_to_label = {v: k for k, v in label_map.items()}
# ================================
# HÀM TIỀN XỬ LÝ
# ================================
def denoise_wavelet(signal, wavelet='db8', level=4):
coeffs = pywt.wavedec(signal, wavelet, level=level)
sigma = np.median(np.abs(coeffs[-1])) / 0.6745
uthresh = sigma * np.sqrt(2 * np.log(len(signal)))
coeffs_denoised = [pywt.threshold(c, value=uthresh, mode='soft') for c in coeffs]
return pywt.waverec(coeffs_denoised, wavelet)
def create_sequences(mfcc, time_steps=20):
return np.array([mfcc[i:i+time_steps] for i in range(len(mfcc) - time_steps)])
def cat_2s_ngau_nhien(y, sr, duration=2):
if len(y) < duration * sr:
return y
start = random.randint(0, len(y) - duration * sr)
return y[start:start + duration * sr]
# ================================
# VẼ ẢNH (numpy array)
# ================================
def fig_to_numpy(fig):
buf = io.BytesIO()
fig.savefig(buf, format="png", dpi=90, bbox_inches="tight")
buf.seek(0)
img = Image.open(buf)
plt.close(fig)
return np.array(img)
def tao_anh_mel(file_path):
y, sr = librosa.load(file_path, sr=None, mono=True)
y = cat_2s_ngau_nhien(y, sr)
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
S_dB = librosa.power_to_db(S, ref=np.max)
fig, ax = plt.subplots(figsize=(6, 3))
librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', ax=ax, cmap='magma')
ax.set_title("Phổ tần Mel", fontsize=10)
return fig_to_numpy(fig)
def tao_wavelet_transform(file_path):
y, sr = librosa.load(file_path, sr=None, mono=True)
y = cat_2s_ngau_nhien(y, sr)
coef, _ = pywt.cwt(y, scales=np.arange(1, 128), wavelet='morl', sampling_period=1/sr)
fig, ax = plt.subplots(figsize=(6, 3))
ax.imshow(np.abs(coef), extent=[0, len(y)/sr, 1, 128],
cmap='plasma', aspect='auto', origin='lower')
ax.set_title("Phổ sóng con (Wavelet)")
ax.set_xlabel("Thời gian (s)")
ax.set_ylabel("Tần số (scale)")
return fig_to_numpy(fig)
def tao_waveform_image(file_path):
y, sr = librosa.load(file_path, sr=None, mono=True)
y = cat_2s_ngau_nhien(y, sr)
fig, ax = plt.subplots(figsize=(6, 2.5))
librosa.display.waveshow(y, sr=sr, ax=ax, color='steelblue')
ax.set_title("Biểu đồ Sóng Âm (Waveform)")
ax.set_xlabel("Thời gian (s)")
ax.set_ylabel("Biên độ")
return fig_to_numpy(fig)
def tao_waveform_denoise(file_path):
y, sr = librosa.load(file_path, sr=None, mono=True)
y = cat_2s_ngau_nhien(y, sr)
y_denoised = denoise_wavelet(y)
fig, ax = plt.subplots(3, 2, figsize=(10, 8))
# 1. Waveform
librosa.display.waveshow(y, sr=sr, ax=ax[0,0], color='red')
ax[0,0].set_title("Waveform - Trước lọc")
librosa.display.waveshow(y_denoised, sr=sr, ax=ax[0,1], color='green')
ax[0,1].set_title("Waveform - Sau lọc")
# 2. FFT
freqs = np.fft.rfftfreq(len(y), 1/sr)
fft_y = np.abs(np.fft.rfft(y))
fft_y_denoised = np.abs(np.fft.rfft(y_denoised))
ax[1,0].plot(freqs, fft_y, color='red')
ax[1,0].set_xlim(0, 8000)
ax[1,0].set_title("FFT - Trước lọc")
ax[1,1].plot(freqs, fft_y_denoised, color='green')
ax[1,1].set_xlim(0, 8000)
ax[1,1].set_title("FFT - Sau lọc")
# 3. Spectrogram
D1 = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
D2 = librosa.amplitude_to_db(np.abs(librosa.stft(y_denoised)), ref=np.max)
img1 = librosa.display.specshow(D1, sr=sr, x_axis='time', y_axis='log', ax=ax[2,0], cmap="magma")
ax[2,0].set_title("Spectrogram - Trước lọc")
fig.colorbar(img1, ax=ax[2,0], format="%+2.0f dB")
img2 = librosa.display.specshow(D2, sr=sr, x_axis='time', y_axis='log', ax=ax[2,1], cmap="magma")
ax[2,1].set_title("Spectrogram - Sau lọc")
fig.colorbar(img2, ax=ax[2,1], format="%+2.0f dB")
plt.tight_layout()
return fig_to_numpy(fig)
# ================================
# VẼ BIỂU ĐỒ Top-3 (Plotly)
# ================================
def ve_top3_chart(probs):
labels = [index_to_label[i] for i in range(len(probs))]
values = probs * 100
top_idx = np.argsort(values)[::-1][:3]
fig = px.pie(
values=[values[i] for i in top_idx],
names=[labels[i] for i in top_idx],
title="Top-3 dự đoán"
)
return fig
# ================================
# DỰ ĐOÁN
# ================================
def bao_san_sang(file_path):
if not file_path:
return "", None, None, None, None
return (
"<b style='color:green;'>✅ Âm thanh đã sẵn sàng. Nhấn kiểm tra ngay!</b>",
tao_anh_mel(file_path),
tao_wavelet_transform(file_path),
tao_waveform_image(file_path),
tao_waveform_denoise(file_path)
)
def du_doan(file_path):
if not file_path:
return "<b style='color:red;'>❌ Chưa có âm thanh.</b>", None
signal, sr = librosa.load(file_path, sr=SAMPLE_RATE, mono=True)
signal, _ = librosa.effects.trim(signal)
signal = librosa.util.fix_length(signal, size=SAMPLE_RATE * MAX_DURATION)
if USE_DENOISE:
signal = denoise_wavelet(signal)
mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13).T
mfcc = scaler.transform(mfcc)
X_input = create_sequences(mfcc, time_steps=TIME_STEPS)
if len(X_input) == 0:
return "<b style='color:red;'>⚠️ Âm thanh quá ngắn để phân tích.</b>", None
y_preds = model.predict(X_input, verbose=0)
avg_probs = np.mean(y_preds, axis=0)
pred_index = np.argmax(avg_probs)
confidence = avg_probs[pred_index] * 100
pred_label = "HƯ HỎNG KHÁC" if confidence < 50 else index_to_label[pred_index]
html = f"""<div style='background:#f0faff;color:#000;padding:10px;border-radius:10px'>
<b style='color:#000'>📋 Kết Quả:</b><br>
✅ <b style='color:#000'>Tình trạng:</b> <span style='color:#007acc;font-size:18px'>{pred_label.upper()}</span><br>
📊 <b style='color:#000'>Độ tin cậy:</b> <span style='color:#000'>{confidence:.2f}%</span><br>
<hr style='margin:6px 0'>
<b style='color:#000'>Xác suất từng lớp:</b><br>"""
for i, prob in enumerate(avg_probs):
html += f"<span style='color:#000'>- {index_to_label[i]}: {prob*100:.1f}%</span><br>"
html += "</div>"
return html, ve_top3_chart(avg_probs)
# ================================
# RESET
# ================================
def reset_output():
return "", None, None, None, None, "", None
# ================================
# GIAO DIỆN
# ================================
with gr.Blocks(css="""
#check-btn {
background: #007acc;
color: white;
height: 48px;
font-size: 16px;
font-weight: bold;
border-radius: 10px;
}
""") as demo:
gr.HTML("""
<div style="
display: flex;
align-items: center;
background-image: url('https://cdn-uploads.huggingface.co/production/uploads/6881f05ad0fc87fca019ee65/t7NwSiUHpjoFXh1S10MT4.png');
background-repeat: no-repeat;
background-size: 100px 40px;
background-position: 0px 0px;
padding-left: 60px;
height: 50px;
margin: 0;
">
</div>
""")
gr.Markdown("""
<div style='
display: flex;
justify-content: center;
align-items: center;
margin-top: -10px;
margin-bottom: 10px;
height: 40px;
'>
<h4 style='color:#007acc; font-size:20px; font-weight:bold; margin: 0;'>
CHẨN ĐOÁN HƯ HỎNG TỪ ÂM THANH ĐỘNG CƠ
</h4>
</div>
""")
with gr.Row():
audio_file = gr.Audio(type="filepath", label="📂 Tải File Âm Thanh", interactive=True)
audio_mic = gr.Audio(type="filepath", label="🎤 Ghi Âm", sources=["microphone"], interactive=True)
thong_bao_ready = gr.HTML()
btn_check = gr.Button("🔍 KIỂM TRA NGAY", elem_id="check-btn")
output_html = gr.HTML()
with gr.Accordion("📊 Phân tích Âm Thanh", open=False):
mel_output = gr.Image(label="Mel Spectrogram", type="numpy")
wavelet_output = gr.Image(label="Wavelet Transform", type="numpy")
waveform_output = gr.Image(label="Waveform", type="numpy")
waveform_denoise_output = gr.Image(label="So sánh", type="numpy")
top3_chart = gr.Plot(label="Top 3 dự đoán")
# --- Upload/ghi âm → chỉ báo sẵn sàng + vẽ ảnh
audio_file.change(
fn=bao_san_sang,
inputs=audio_file,
outputs=[thong_bao_ready, mel_output, wavelet_output, waveform_output, waveform_denoise_output]
)
audio_mic.change(
fn=bao_san_sang,
inputs=audio_mic,
outputs=[thong_bao_ready, mel_output, wavelet_output, waveform_output, waveform_denoise_output]
)
# --- Nút kiểm tra → chỉ dự đoán
btn_check.click(
fn=lambda f1, f2: du_doan(f1 if f1 else f2),
inputs=[audio_file, audio_mic],
outputs=[output_html, top3_chart]
)
# --- Reset khi clear
audio_file.clear(fn=reset_output, outputs=[
thong_bao_ready, mel_output, wavelet_output, waveform_output, waveform_denoise_output,
output_html, top3_chart
])
audio_mic.clear(fn=reset_output, outputs=[
thong_bao_ready, mel_output, wavelet_output, waveform_output, waveform_denoise_output,
output_html, top3_chart
])
demo.launch()
|