Spaces:

suriya7
/

Deep-Aduio-Classification

Sleeping

suriya7 commited on Feb 20, 2024

Commit

f00407e

verified ·

1 Parent(s): e25e7dc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,32 +7,40 @@ model = tf.keras.models.load_model('capuchin_bird_audio.h5')
 class_names = ['This Is Not A Capuchin bird','It is a capuchin Bird']
 # Function to preprocess input for the model
 def test_preprocess_1(file_path):
-    file_contents = tf.io.read_file(file_path)
-    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
-    wav = tf.squeeze(wav, axis=-1)
-    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
-    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
-    wav = wav[:48000]
-    zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
-    wav = tf.concat([zero_padding, wav], 0)
-    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
-    spectrogram = tf.abs(spectrogram)
-    spectrogram = tf.expand_dims(spectrogram, axis=2)
-    spectrogram = tf.expand_dims(spectrogram, axis=0)
-    return spectrogram
 # Function to make predictions
 def predict_audio(wav):
     input_data = test_preprocess_1(wav)
-    prediction = model.predict(input_data)
-    # Threshold logic
-    if prediction > 0.5:
-        result = class_names[1]
     else:
-        result = class_names[0]
-    return result
 # Gradio Interface
@@ -45,5 +53,5 @@ iface = gr.Interface(
 )
 # Launch the interface on localhost
-iface.launch()

 class_names = ['This Is Not A Capuchin bird','It is a capuchin Bird']
 # Function to preprocess input for the model
 def test_preprocess_1(file_path):
+    _, file_extension = os.path.splitext(file_path)
+    if file_extension.lower() == '.wav':
+        file_contents = tf.io.read_file(file_path)
+        wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
+        wav = tf.squeeze(wav, axis=-1)
+        sample_rate = tf.cast(sample_rate, dtype=tf.int64)
+        wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
+        wav = wav[:48000]
+        zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
+        wav = tf.concat([zero_padding, wav], 0)
+        spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
+        spectrogram = tf.abs(spectrogram)
+        spectrogram = tf.expand_dims(spectrogram, axis=2)
+        spectrogram = tf.expand_dims(spectrogram, axis=0)
+        return spectrogram
+    else:
+        return False
 # Function to make predictions
 def predict_audio(wav):
     input_data = test_preprocess_1(wav)
+    if input_data:
+        prediction = model.predict(input_data)
+        # Threshold logic
+        if prediction > 0.5:
+            result = class_names[1]
+        else:
+            result = class_names[0]
+        return result
     else:
+        return "please upload a wav format"
 # Gradio Interface
 )
 # Launch the interface on localhost
+iface.launch(share=True)