ftiiii commited on
Commit
3ba478f
·
verified ·
1 Parent(s): 8d5612b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -70
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import gradio as gr
2
  import numpy as np
3
  import librosa
4
  import librosa.display
@@ -13,7 +13,6 @@ import tempfile
13
  from PIL import Image
14
  from tensorflow.keras.models import load_model
15
  from sklearn.preprocessing import StandardScaler
16
- from scipy.signal import butter, lfilter
17
 
18
  SAMPLE_RATE = 22050
19
  MAX_DURATION = 5
@@ -41,20 +40,9 @@ def denoise_wavelet(signal, wavelet='db8', level=4):
41
  coeffs = pywt.wavedec(signal, wavelet, level=level)
42
  sigma = np.median(np.abs(coeffs[-1])) / 0.6745
43
  uthresh = sigma * np.sqrt(2 * np.log(len(signal)))
44
- coeffs_denoised = [pywt.threshold(c, value=uthresh, mode='soft') if i > 0 else c for i, c in enumerate(coeffs)]
45
  return pywt.waverec(coeffs_denoised, wavelet)
46
 
47
- def normalize_volume(signal):
48
- max_amp = np.max(np.abs(signal))
49
- return signal / max_amp if max_amp > 0 else signal
50
-
51
- def bandpass_filter(signal, sr, lowcut=50, highcut=3000, order=5):
52
- nyquist = 0.5 * sr
53
- low = lowcut / nyquist
54
- high = highcut / nyquist
55
- b, a = butter(order, [low, high], btype='band')
56
- return lfilter(b, a, signal)
57
-
58
  def create_sequences(mfcc, time_steps=20):
59
  return np.array([mfcc[i:i+time_steps] for i in range(len(mfcc) - time_steps)])
60
 
@@ -70,53 +58,65 @@ def tao_anh_mel(file_path):
70
  S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
71
  S_dB = librosa.power_to_db(S, ref=np.max)
72
  fig, ax = plt.subplots(figsize=(6, 3))
73
- img = librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', ax=ax)
74
- ax.set(title='Mel Spectrogram')
75
- plt.colorbar(img, ax=ax, format='%+2.0f dB')
76
- buf = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
77
- plt.savefig(buf.name, bbox_inches='tight', pad_inches=0.1)
78
- plt.close(fig)
79
- return buf.name
80
-
81
- def tao_anh_wavelet(file_path):
 
82
  y, sr = librosa.load(file_path, sr=None, mono=True)
83
  y = cat_2s_ngau_nhien(y, sr)
84
- coef, freqs = pywt.cwt(y, np.arange(1, 129), 'morl')
85
  fig, ax = plt.subplots(figsize=(6, 3))
86
- img = ax.imshow(np.abs(coef), extent=[0, len(y)/sr, 1, 128], cmap='viridis', aspect='auto', vmax=np.abs(coef).max(), vmin=0)
87
- ax.set_title("Wavelet Transform")
88
- ax.set_ylabel("Scale")
89
- ax.set_xlabel("Time (s)")
90
- plt.colorbar(img, ax=ax)
91
- buf = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
92
- plt.savefig(buf.name, bbox_inches='tight', pad_inches=0.1)
93
- plt.close(fig)
94
- return buf.name
95
-
96
- def tao_anh_waveform(file_path):
97
- y, sr = librosa.load(file_path, sr=None)
98
- fig, ax = plt.subplots(figsize=(6, 2))
99
- librosa.display.waveshow(y, sr=sr, ax=ax)
100
- ax.set(title='Waveform')
101
- buf = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
102
- plt.savefig(buf.name, bbox_inches='tight')
103
- plt.close(fig)
104
- return buf.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  def du_doan(file_path):
107
  if not file_path:
108
  return "<b style='color:red;'>❌ Chưa có âm thanh.</b>"
109
 
110
  signal, sr = librosa.load(file_path, sr=SAMPLE_RATE, mono=True)
111
- signal, _ = librosa.effects.trim(signal, top_db=30)
112
- signal = normalize_volume(signal)
113
- signal = bandpass_filter(signal, sr)
114
-
115
- rms = np.sqrt(np.mean(signal**2))
116
- if rms < 0.001:
117
- return "<b style='color:red;'>⚠️ Âm lượng quá thấp. Ghi âm gần động cơ hơn.</b>"
118
-
119
  signal = librosa.util.fix_length(signal, size=SAMPLE_RATE * MAX_DURATION)
 
120
  if USE_DENOISE:
121
  signal = denoise_wavelet(signal)
122
 
@@ -131,11 +131,7 @@ def du_doan(file_path):
131
  avg_probs = np.mean(y_preds, axis=0)
132
  pred_index = np.argmax(avg_probs)
133
  confidence = avg_probs[pred_index] * 100
134
-
135
- if confidence < 60:
136
- return "<b style='color:red;'>⚠️ Không nhận dạng được rõ ràng. Vui lòng ghi âm lại với ít nhiễu hơn.</b>"
137
-
138
- pred_label = index_to_label[pred_index]
139
 
140
  html = f"""<div style='background:#f0faff;color:#000;padding:10px;border-radius:10px'>
141
  <b style='color:#000'>📋 Kết Quả:</b><br>
@@ -148,18 +144,6 @@ def du_doan(file_path):
148
  html += "</div>"
149
  return html
150
 
151
- def bao_san_sang(file_path):
152
- if file_path:
153
- return "<b style='color:green;'>✅ Âm thanh đã sẵn sàng. Nhấn kiểm tra ngay!</b>"
154
- else:
155
- return ""
156
-
157
- def sinh_anh(file_path):
158
- if file_path:
159
- return tao_anh_mel(file_path), tao_anh_wavelet(file_path), tao_anh_waveform(file_path)
160
- else:
161
- return None, None, None
162
-
163
  def reset_output():
164
  return "", None, None, None, ""
165
 
@@ -208,7 +192,7 @@ with gr.Blocks(css="""
208
  """)
209
 
210
  with gr.Row():
211
- audio_file = gr.Audio(type="filepath", label="📂 Tải File Âm Thanh", sources=["upload"], interactive=True)
212
  audio_mic = gr.Audio(type="filepath", label="🎤 Ghi Âm", sources=["microphone"], interactive=True)
213
 
214
  thong_bao_ready = gr.HTML()
@@ -245,4 +229,4 @@ with gr.Blocks(css="""
245
  output_html
246
  ])
247
 
248
- demo.launch()
 
1
+ import gradio as gr
2
  import numpy as np
3
  import librosa
4
  import librosa.display
 
13
  from PIL import Image
14
  from tensorflow.keras.models import load_model
15
  from sklearn.preprocessing import StandardScaler
 
16
 
17
  SAMPLE_RATE = 22050
18
  MAX_DURATION = 5
 
40
  coeffs = pywt.wavedec(signal, wavelet, level=level)
41
  sigma = np.median(np.abs(coeffs[-1])) / 0.6745
42
  uthresh = sigma * np.sqrt(2 * np.log(len(signal)))
43
+ coeffs_denoised = [pywt.threshold(c, value=uthresh, mode='soft') for c in coeffs]
44
  return pywt.waverec(coeffs_denoised, wavelet)
45
 
 
 
 
 
 
 
 
 
 
 
 
46
  def create_sequences(mfcc, time_steps=20):
47
  return np.array([mfcc[i:i+time_steps] for i in range(len(mfcc) - time_steps)])
48
 
 
58
  S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
59
  S_dB = librosa.power_to_db(S, ref=np.max)
60
  fig, ax = plt.subplots(figsize=(6, 3))
61
+ img = librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', ax=ax, cmap='magma')
62
+ ax.set_title("Phổ tần Mel", fontsize=10)
63
+ fig.colorbar(img, ax=ax)
64
+ plt.tight_layout()
65
+ path = os.path.join(tempfile.gettempdir(), "mel.png")
66
+ fig.savefig(path, dpi=80)
67
+ plt.close()
68
+ return Image.open(path)
69
+
70
+ def tao_wavelet_transform(file_path):
71
  y, sr = librosa.load(file_path, sr=None, mono=True)
72
  y = cat_2s_ngau_nhien(y, sr)
73
+ coef, _ = pywt.cwt(y, scales=np.arange(1, 128), wavelet='morl', sampling_period=1/sr)
74
  fig, ax = plt.subplots(figsize=(6, 3))
75
+ ax.imshow(np.abs(coef), extent=[0, len(y)/sr, 1, 128], cmap='plasma', aspect='auto', origin='lower')
76
+ ax.set_title("Phổ sóng con (Wavelet)")
77
+ ax.set_xlabel("Thời gian (s)")
78
+ ax.set_ylabel("Tần số (scale)")
79
+ plt.tight_layout()
80
+ path = os.path.join(tempfile.gettempdir(), "wavelet.png")
81
+ fig.savefig(path, dpi=80)
82
+ plt.close()
83
+ return Image.open(path)
84
+
85
+ def tao_waveform_image(file_path):
86
+ y, sr = librosa.load(file_path, sr=None, mono=True)
87
+ y = cat_2s_ngau_nhien(y, sr)
88
+ fig, ax = plt.subplots(figsize=(6, 2.5))
89
+ librosa.display.waveshow(y, sr=sr, ax=ax, color='steelblue')
90
+ ax.set_title("Biểu đồ Sóng Âm (Waveform)")
91
+ ax.set_xlabel("Thời gian (s)")
92
+ ax.set_ylabel("Biên độ")
93
+ plt.tight_layout()
94
+ path = os.path.join(tempfile.gettempdir(), "waveform.png")
95
+ fig.savefig(path, dpi=80)
96
+ plt.close()
97
+ return Image.open(path)
98
+
99
+ def bao_san_sang(file_path):
100
+ if not file_path:
101
+ return ""
102
+ return "<b style='color:green;'>✅ Âm thanh đã sẵn sàng. Nhấn kiểm tra ngay!</b>"
103
+
104
+ def sinh_anh(file_path):
105
+ if not file_path:
106
+ return None, None, None
107
+ mel_img = tao_anh_mel(file_path)
108
+ wavelet_img = tao_wavelet_transform(file_path)
109
+ waveform_img = tao_waveform_image(file_path)
110
+ return mel_img, wavelet_img, waveform_img
111
 
112
  def du_doan(file_path):
113
  if not file_path:
114
  return "<b style='color:red;'>❌ Chưa có âm thanh.</b>"
115
 
116
  signal, sr = librosa.load(file_path, sr=SAMPLE_RATE, mono=True)
117
+ signal, _ = librosa.effects.trim(signal)
 
 
 
 
 
 
 
118
  signal = librosa.util.fix_length(signal, size=SAMPLE_RATE * MAX_DURATION)
119
+
120
  if USE_DENOISE:
121
  signal = denoise_wavelet(signal)
122
 
 
131
  avg_probs = np.mean(y_preds, axis=0)
132
  pred_index = np.argmax(avg_probs)
133
  confidence = avg_probs[pred_index] * 100
134
+ pred_label = "HƯ HỎNG KHÁC" if confidence < 60 else index_to_label[pred_index]
 
 
 
 
135
 
136
  html = f"""<div style='background:#f0faff;color:#000;padding:10px;border-radius:10px'>
137
  <b style='color:#000'>📋 Kết Quả:</b><br>
 
144
  html += "</div>"
145
  return html
146
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  def reset_output():
148
  return "", None, None, None, ""
149
 
 
192
  """)
193
 
194
  with gr.Row():
195
+ audio_file = gr.Audio(type="filepath", label="📂 Tải File Âm Thanh", interactive=True)
196
  audio_mic = gr.Audio(type="filepath", label="🎤 Ghi Âm", sources=["microphone"], interactive=True)
197
 
198
  thong_bao_ready = gr.HTML()
 
229
  output_html
230
  ])
231
 
232
+ demo.launch()