Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
import librosa
|
| 4 |
import librosa.display
|
|
@@ -13,7 +13,6 @@ import tempfile
|
|
| 13 |
from PIL import Image
|
| 14 |
from tensorflow.keras.models import load_model
|
| 15 |
from sklearn.preprocessing import StandardScaler
|
| 16 |
-
from scipy.signal import butter, lfilter
|
| 17 |
|
| 18 |
SAMPLE_RATE = 22050
|
| 19 |
MAX_DURATION = 5
|
|
@@ -41,20 +40,9 @@ def denoise_wavelet(signal, wavelet='db8', level=4):
|
|
| 41 |
coeffs = pywt.wavedec(signal, wavelet, level=level)
|
| 42 |
sigma = np.median(np.abs(coeffs[-1])) / 0.6745
|
| 43 |
uthresh = sigma * np.sqrt(2 * np.log(len(signal)))
|
| 44 |
-
coeffs_denoised = [pywt.threshold(c, value=uthresh, mode='soft')
|
| 45 |
return pywt.waverec(coeffs_denoised, wavelet)
|
| 46 |
|
| 47 |
-
def normalize_volume(signal):
|
| 48 |
-
max_amp = np.max(np.abs(signal))
|
| 49 |
-
return signal / max_amp if max_amp > 0 else signal
|
| 50 |
-
|
| 51 |
-
def bandpass_filter(signal, sr, lowcut=50, highcut=3000, order=5):
|
| 52 |
-
nyquist = 0.5 * sr
|
| 53 |
-
low = lowcut / nyquist
|
| 54 |
-
high = highcut / nyquist
|
| 55 |
-
b, a = butter(order, [low, high], btype='band')
|
| 56 |
-
return lfilter(b, a, signal)
|
| 57 |
-
|
| 58 |
def create_sequences(mfcc, time_steps=20):
|
| 59 |
return np.array([mfcc[i:i+time_steps] for i in range(len(mfcc) - time_steps)])
|
| 60 |
|
|
@@ -70,53 +58,65 @@ def tao_anh_mel(file_path):
|
|
| 70 |
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
|
| 71 |
S_dB = librosa.power_to_db(S, ref=np.max)
|
| 72 |
fig, ax = plt.subplots(figsize=(6, 3))
|
| 73 |
-
img = librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', ax=ax)
|
| 74 |
-
ax.
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
|
|
|
| 82 |
y, sr = librosa.load(file_path, sr=None, mono=True)
|
| 83 |
y = cat_2s_ngau_nhien(y, sr)
|
| 84 |
-
coef,
|
| 85 |
fig, ax = plt.subplots(figsize=(6, 3))
|
| 86 |
-
|
| 87 |
-
ax.set_title("
|
| 88 |
-
ax.
|
| 89 |
-
ax.
|
| 90 |
-
plt.
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
plt.close(
|
| 94 |
-
return
|
| 95 |
-
|
| 96 |
-
def
|
| 97 |
-
y, sr = librosa.load(file_path, sr=None)
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
def du_doan(file_path):
|
| 107 |
if not file_path:
|
| 108 |
return "<b style='color:red;'>❌ Chưa có âm thanh.</b>"
|
| 109 |
|
| 110 |
signal, sr = librosa.load(file_path, sr=SAMPLE_RATE, mono=True)
|
| 111 |
-
signal, _ = librosa.effects.trim(signal
|
| 112 |
-
signal = normalize_volume(signal)
|
| 113 |
-
signal = bandpass_filter(signal, sr)
|
| 114 |
-
|
| 115 |
-
rms = np.sqrt(np.mean(signal**2))
|
| 116 |
-
if rms < 0.001:
|
| 117 |
-
return "<b style='color:red;'>⚠️ Âm lượng quá thấp. Ghi âm gần động cơ hơn.</b>"
|
| 118 |
-
|
| 119 |
signal = librosa.util.fix_length(signal, size=SAMPLE_RATE * MAX_DURATION)
|
|
|
|
| 120 |
if USE_DENOISE:
|
| 121 |
signal = denoise_wavelet(signal)
|
| 122 |
|
|
@@ -131,11 +131,7 @@ def du_doan(file_path):
|
|
| 131 |
avg_probs = np.mean(y_preds, axis=0)
|
| 132 |
pred_index = np.argmax(avg_probs)
|
| 133 |
confidence = avg_probs[pred_index] * 100
|
| 134 |
-
|
| 135 |
-
if confidence < 60:
|
| 136 |
-
return "<b style='color:red;'>⚠️ Không nhận dạng được rõ ràng. Vui lòng ghi âm lại với ít nhiễu hơn.</b>"
|
| 137 |
-
|
| 138 |
-
pred_label = index_to_label[pred_index]
|
| 139 |
|
| 140 |
html = f"""<div style='background:#f0faff;color:#000;padding:10px;border-radius:10px'>
|
| 141 |
<b style='color:#000'>📋 Kết Quả:</b><br>
|
|
@@ -148,18 +144,6 @@ def du_doan(file_path):
|
|
| 148 |
html += "</div>"
|
| 149 |
return html
|
| 150 |
|
| 151 |
-
def bao_san_sang(file_path):
|
| 152 |
-
if file_path:
|
| 153 |
-
return "<b style='color:green;'>✅ Âm thanh đã sẵn sàng. Nhấn kiểm tra ngay!</b>"
|
| 154 |
-
else:
|
| 155 |
-
return ""
|
| 156 |
-
|
| 157 |
-
def sinh_anh(file_path):
|
| 158 |
-
if file_path:
|
| 159 |
-
return tao_anh_mel(file_path), tao_anh_wavelet(file_path), tao_anh_waveform(file_path)
|
| 160 |
-
else:
|
| 161 |
-
return None, None, None
|
| 162 |
-
|
| 163 |
def reset_output():
|
| 164 |
return "", None, None, None, ""
|
| 165 |
|
|
@@ -208,7 +192,7 @@ with gr.Blocks(css="""
|
|
| 208 |
""")
|
| 209 |
|
| 210 |
with gr.Row():
|
| 211 |
-
audio_file = gr.Audio(type="filepath", label="📂 Tải File Âm Thanh",
|
| 212 |
audio_mic = gr.Audio(type="filepath", label="🎤 Ghi Âm", sources=["microphone"], interactive=True)
|
| 213 |
|
| 214 |
thong_bao_ready = gr.HTML()
|
|
@@ -245,4 +229,4 @@ with gr.Blocks(css="""
|
|
| 245 |
output_html
|
| 246 |
])
|
| 247 |
|
| 248 |
-
demo.launch()
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
import librosa
|
| 4 |
import librosa.display
|
|
|
|
| 13 |
from PIL import Image
|
| 14 |
from tensorflow.keras.models import load_model
|
| 15 |
from sklearn.preprocessing import StandardScaler
|
|
|
|
| 16 |
|
| 17 |
SAMPLE_RATE = 22050
|
| 18 |
MAX_DURATION = 5
|
|
|
|
| 40 |
coeffs = pywt.wavedec(signal, wavelet, level=level)
|
| 41 |
sigma = np.median(np.abs(coeffs[-1])) / 0.6745
|
| 42 |
uthresh = sigma * np.sqrt(2 * np.log(len(signal)))
|
| 43 |
+
coeffs_denoised = [pywt.threshold(c, value=uthresh, mode='soft') for c in coeffs]
|
| 44 |
return pywt.waverec(coeffs_denoised, wavelet)
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def create_sequences(mfcc, time_steps=20):
|
| 47 |
return np.array([mfcc[i:i+time_steps] for i in range(len(mfcc) - time_steps)])
|
| 48 |
|
|
|
|
| 58 |
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
|
| 59 |
S_dB = librosa.power_to_db(S, ref=np.max)
|
| 60 |
fig, ax = plt.subplots(figsize=(6, 3))
|
| 61 |
+
img = librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', ax=ax, cmap='magma')
|
| 62 |
+
ax.set_title("Phổ tần Mel", fontsize=10)
|
| 63 |
+
fig.colorbar(img, ax=ax)
|
| 64 |
+
plt.tight_layout()
|
| 65 |
+
path = os.path.join(tempfile.gettempdir(), "mel.png")
|
| 66 |
+
fig.savefig(path, dpi=80)
|
| 67 |
+
plt.close()
|
| 68 |
+
return Image.open(path)
|
| 69 |
+
|
| 70 |
+
def tao_wavelet_transform(file_path):
|
| 71 |
y, sr = librosa.load(file_path, sr=None, mono=True)
|
| 72 |
y = cat_2s_ngau_nhien(y, sr)
|
| 73 |
+
coef, _ = pywt.cwt(y, scales=np.arange(1, 128), wavelet='morl', sampling_period=1/sr)
|
| 74 |
fig, ax = plt.subplots(figsize=(6, 3))
|
| 75 |
+
ax.imshow(np.abs(coef), extent=[0, len(y)/sr, 1, 128], cmap='plasma', aspect='auto', origin='lower')
|
| 76 |
+
ax.set_title("Phổ sóng con (Wavelet)")
|
| 77 |
+
ax.set_xlabel("Thời gian (s)")
|
| 78 |
+
ax.set_ylabel("Tần số (scale)")
|
| 79 |
+
plt.tight_layout()
|
| 80 |
+
path = os.path.join(tempfile.gettempdir(), "wavelet.png")
|
| 81 |
+
fig.savefig(path, dpi=80)
|
| 82 |
+
plt.close()
|
| 83 |
+
return Image.open(path)
|
| 84 |
+
|
| 85 |
+
def tao_waveform_image(file_path):
|
| 86 |
+
y, sr = librosa.load(file_path, sr=None, mono=True)
|
| 87 |
+
y = cat_2s_ngau_nhien(y, sr)
|
| 88 |
+
fig, ax = plt.subplots(figsize=(6, 2.5))
|
| 89 |
+
librosa.display.waveshow(y, sr=sr, ax=ax, color='steelblue')
|
| 90 |
+
ax.set_title("Biểu đồ Sóng Âm (Waveform)")
|
| 91 |
+
ax.set_xlabel("Thời gian (s)")
|
| 92 |
+
ax.set_ylabel("Biên độ")
|
| 93 |
+
plt.tight_layout()
|
| 94 |
+
path = os.path.join(tempfile.gettempdir(), "waveform.png")
|
| 95 |
+
fig.savefig(path, dpi=80)
|
| 96 |
+
plt.close()
|
| 97 |
+
return Image.open(path)
|
| 98 |
+
|
| 99 |
+
def bao_san_sang(file_path):
|
| 100 |
+
if not file_path:
|
| 101 |
+
return ""
|
| 102 |
+
return "<b style='color:green;'>✅ Âm thanh đã sẵn sàng. Nhấn kiểm tra ngay!</b>"
|
| 103 |
+
|
| 104 |
+
def sinh_anh(file_path):
|
| 105 |
+
if not file_path:
|
| 106 |
+
return None, None, None
|
| 107 |
+
mel_img = tao_anh_mel(file_path)
|
| 108 |
+
wavelet_img = tao_wavelet_transform(file_path)
|
| 109 |
+
waveform_img = tao_waveform_image(file_path)
|
| 110 |
+
return mel_img, wavelet_img, waveform_img
|
| 111 |
|
| 112 |
def du_doan(file_path):
|
| 113 |
if not file_path:
|
| 114 |
return "<b style='color:red;'>❌ Chưa có âm thanh.</b>"
|
| 115 |
|
| 116 |
signal, sr = librosa.load(file_path, sr=SAMPLE_RATE, mono=True)
|
| 117 |
+
signal, _ = librosa.effects.trim(signal)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
signal = librosa.util.fix_length(signal, size=SAMPLE_RATE * MAX_DURATION)
|
| 119 |
+
|
| 120 |
if USE_DENOISE:
|
| 121 |
signal = denoise_wavelet(signal)
|
| 122 |
|
|
|
|
| 131 |
avg_probs = np.mean(y_preds, axis=0)
|
| 132 |
pred_index = np.argmax(avg_probs)
|
| 133 |
confidence = avg_probs[pred_index] * 100
|
| 134 |
+
pred_label = "HƯ HỎNG KHÁC" if confidence < 60 else index_to_label[pred_index]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
html = f"""<div style='background:#f0faff;color:#000;padding:10px;border-radius:10px'>
|
| 137 |
<b style='color:#000'>📋 Kết Quả:</b><br>
|
|
|
|
| 144 |
html += "</div>"
|
| 145 |
return html
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
def reset_output():
|
| 148 |
return "", None, None, None, ""
|
| 149 |
|
|
|
|
| 192 |
""")
|
| 193 |
|
| 194 |
with gr.Row():
|
| 195 |
+
audio_file = gr.Audio(type="filepath", label="📂 Tải File Âm Thanh", interactive=True)
|
| 196 |
audio_mic = gr.Audio(type="filepath", label="🎤 Ghi Âm", sources=["microphone"], interactive=True)
|
| 197 |
|
| 198 |
thong_bao_ready = gr.HTML()
|
|
|
|
| 229 |
output_html
|
| 230 |
])
|
| 231 |
|
| 232 |
+
demo.launch()
|