Spaces:
Sleeping
Sleeping
File size: 5,958 Bytes
2fceb69 1364a0c 2fceb69 1364a0c d82f784 2fceb69 b0713c6 2fceb69 b0713c6 2fceb69 0ed912d 2fceb69 0ed912d 2fceb69 b0713c6 2fceb69 b0713c6 2fceb69 0ed912d 2e8ae12 de67440 d82f784 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import streamlit as st
import tensorflow as tf
# avoid massive CUDA stub cache downloads
tf.config.set_visible_devices([], 'GPU')
import numpy as np
import librosa
import matplotlib.pyplot as plt
import librosa.display
import tempfile
import os
os.environ["HF_HOME"] = "/data/huggingface"
os.environ["HF_HUB_CACHE"] = "/data/huggingface/hub"
os.environ["HF_DATASETS_CACHE"] = "/data/huggingface/datasets"
os.environ["TRANSFORMERS_CACHE"] = "/data/huggingface/transformers"
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
os.environ["XDG_CACHE_HOME"] = "/data/cache"
# Disable tensordlow GPU discovery
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
# Load the trained model #test
@st.cache_resource
def load_model():
model_path = "sound_classification_model.h5" # Replace with the path to your .h5 file
model = tf.keras.models.load_model(model_path)
return model
model = load_model()
# Map Class Labels
CLASS_LABELS = {
0: 'Air Conditioner',
1: 'Car Horn',
2: 'Children Playing',
3: 'Dog Bark',
4: 'Drilling',
5: 'Engine Idling',
6: 'Gun Shot',
7: 'Jackhammer',
8: 'Siren',
9: 'Street Music'
}
# Preprocess audio into a spectrogram
def preprocess_audio(file_path, n_mels=128, fixed_time_steps=128):
try:
y, sr = librosa.load(file_path, sr=None)
mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=sr / 2)
log_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
log_spectrogram = log_spectrogram / np.max(np.abs(log_spectrogram))
if log_spectrogram.shape[1] < fixed_time_steps:
padding = fixed_time_steps - log_spectrogram.shape[1]
log_spectrogram = np.pad(log_spectrogram, ((0, 0), (0, padding)), mode='constant')
else:
log_spectrogram = log_spectrogram[:, :fixed_time_steps]
return np.expand_dims(log_spectrogram, axis=-1) # Add channel dimension for CNNs
except Exception as e:
print(f"Error processing {file_path}: {e}")
return None
# Streamlit app UI
st.title("Audio Spectrogram Prediction")
st.write("Upload an audio file to generate a spectrogram and predict its class using your trained model.")
# File upload widget
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3"])
if uploaded_file is not None:
# Read uploaded bytes so we can both play and save to temp for processing
file_bytes = uploaded_file.read()
# Determine a sensible suffix from the uploaded filename (fall back to .wav)
try:
ext = os.path.splitext(uploaded_file.name)[1]
except Exception:
ext = ".wav"
if not ext:
ext = ".wav"
# Save the uploaded audio file to a temporary location for librosa processing
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_audio_file:
temp_audio_file.write(file_bytes)
temp_audio_path = temp_audio_file.name
# Provide an audio player in Streamlit using the raw bytes
try:
st.audio(file_bytes, format=(uploaded_file.type if hasattr(uploaded_file, "type") else None))
except Exception:
# Fallback: try to let Streamlit infer format
st.audio(file_bytes)
# Preprocess the audio into a spectrogram
st.write("Processing audio into a spectrogram...")
spectrogram = preprocess_audio(temp_audio_path)
if spectrogram is not None:
# Display the spectrogram
st.write("Generated Spectrogram:")
plt.figure(figsize=(10, 4))
librosa.display.specshow(spectrogram[:, :, 0], sr=22050, x_axis='time', y_axis='mel', fmax=8000, cmap='plasma')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel-Spectrogram')
plt.tight_layout()
st.pyplot(plt)
# Predict using the model
st.write("Predicting...")
spectrogram = np.expand_dims(spectrogram, axis=0) # Add batch dimension
predictions = model.predict(spectrogram)
predicted_class_index = np.argmax(predictions, axis=-1)[0]
predicted_class_label = CLASS_LABELS.get(predicted_class_index, "Unknown")
# Display the results
st.write("Prediction Results:")
st.write(f"**Predicted Class:** {predicted_class_label} (Index: {predicted_class_index})")
st.write(f"**Raw Model Output:** {predictions}")
else:
st.write("Failed to process the audio file. Please try again with a different file.")
# Clean up temporary file
try:
os.remove(temp_audio_path)
except Exception:
pass
# st.write("### Developer Team")
# developer_info = [
# {"name": "Faheyra", "image_url": "https://italeemc.iium.edu.my/pluginfile.php/21200/user/icon/remui/f3?rev=40826", "title": "MLetops Engineer"},
# {"name": "Adilah", "image_url": "https://italeemc.iium.edu.my/pluginfile.php/21229/user/icon/remui/f3?rev=43498", "title": "Ra-Sis-Chear"},
# {"name": "Aida", "image_url": "https://italeemc.iium.edu.my/pluginfile.php/21236/user/icon/remui/f3?rev=43918", "title": "Ra-Sis-Chear"},
# {"name": "Naufal", "image_url": "https://italeemc.iium.edu.my/pluginfile.php/21260/user/icon/remui/f3?rev=400622", "title": "Rizzichear"},
# {"name": "Fadzwan", "image_url": "https://italeemc.iium.edu.my/pluginfile.php/21094/user/icon/remui/f3?rev=59457", "title": "Nasser"},
# ]
# # Dynamically create columns based on the number of developers
# num_devs = len(developer_info)
# cols = st.columns(num_devs)
# # Display the developer profiles
# for idx, dev in enumerate(developer_info):
# col = cols[idx]
# with col:
# st.markdown(
# f'<div style="display: flex; flex-direction: column; align-items: center;">'
# f'<img src="{dev["image_url"]}" width="100" style="border-radius: 50%;">'
# f'<p>{dev["name"]}<br>{dev["title"]}</p>'
# f'<p></p>'
# f'</div>',
# unsafe_allow_html=True
# )
|