import streamlit as st import tensorflow as tf # avoid massive CUDA stub cache downloads tf.config.set_visible_devices([], 'GPU') import numpy as np import librosa import matplotlib.pyplot as plt import librosa.display import tempfile import os os.environ["HF_HOME"] = "/data/huggingface" os.environ["HF_HUB_CACHE"] = "/data/huggingface/hub" os.environ["HF_DATASETS_CACHE"] = "/data/huggingface/datasets" os.environ["TRANSFORMERS_CACHE"] = "/data/huggingface/transformers" os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib" os.environ["XDG_CACHE_HOME"] = "/data/cache" # Disable tensordlow GPU discovery os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Load the trained model #test @st.cache_resource def load_model(): model_path = "sound_classification_model.h5" # Replace with the path to your .h5 file model = tf.keras.models.load_model(model_path) return model model = load_model() # Map Class Labels CLASS_LABELS = { 0: 'Air Conditioner', 1: 'Car Horn', 2: 'Children Playing', 3: 'Dog Bark', 4: 'Drilling', 5: 'Engine Idling', 6: 'Gun Shot', 7: 'Jackhammer', 8: 'Siren', 9: 'Street Music' } # Preprocess audio into a spectrogram def preprocess_audio(file_path, n_mels=128, fixed_time_steps=128): try: y, sr = librosa.load(file_path, sr=None) mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=sr / 2) log_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max) log_spectrogram = log_spectrogram / np.max(np.abs(log_spectrogram)) if log_spectrogram.shape[1] < fixed_time_steps: padding = fixed_time_steps - log_spectrogram.shape[1] log_spectrogram = np.pad(log_spectrogram, ((0, 0), (0, padding)), mode='constant') else: log_spectrogram = log_spectrogram[:, :fixed_time_steps] return np.expand_dims(log_spectrogram, axis=-1) # Add channel dimension for CNNs except Exception as e: print(f"Error processing {file_path}: {e}") return None # Streamlit app UI st.title("Audio Spectrogram Prediction") st.write("Upload an audio file to generate a spectrogram and predict its class using your trained model.") # File upload widget uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3"]) if uploaded_file is not None: # Read uploaded bytes so we can both play and save to temp for processing file_bytes = uploaded_file.read() # Determine a sensible suffix from the uploaded filename (fall back to .wav) try: ext = os.path.splitext(uploaded_file.name)[1] except Exception: ext = ".wav" if not ext: ext = ".wav" # Save the uploaded audio file to a temporary location for librosa processing with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_audio_file: temp_audio_file.write(file_bytes) temp_audio_path = temp_audio_file.name # Provide an audio player in Streamlit using the raw bytes try: st.audio(file_bytes, format=(uploaded_file.type if hasattr(uploaded_file, "type") else None)) except Exception: # Fallback: try to let Streamlit infer format st.audio(file_bytes) # Preprocess the audio into a spectrogram st.write("Processing audio into a spectrogram...") spectrogram = preprocess_audio(temp_audio_path) if spectrogram is not None: # Display the spectrogram st.write("Generated Spectrogram:") plt.figure(figsize=(10, 4)) librosa.display.specshow(spectrogram[:, :, 0], sr=22050, x_axis='time', y_axis='mel', fmax=8000, cmap='plasma') plt.colorbar(format='%+2.0f dB') plt.title('Mel-Spectrogram') plt.tight_layout() st.pyplot(plt) # Predict using the model st.write("Predicting...") spectrogram = np.expand_dims(spectrogram, axis=0) # Add batch dimension predictions = model.predict(spectrogram) predicted_class_index = np.argmax(predictions, axis=-1)[0] predicted_class_label = CLASS_LABELS.get(predicted_class_index, "Unknown") # Display the results st.write("Prediction Results:") st.write(f"**Predicted Class:** {predicted_class_label} (Index: {predicted_class_index})") st.write(f"**Raw Model Output:** {predictions}") else: st.write("Failed to process the audio file. Please try again with a different file.") # Clean up temporary file try: os.remove(temp_audio_path) except Exception: pass # st.write("### Developer Team") # developer_info = [ # {"name": "Faheyra", "image_url": "https://italeemc.iium.edu.my/pluginfile.php/21200/user/icon/remui/f3?rev=40826", "title": "MLetops Engineer"}, # {"name": "Adilah", "image_url": "https://italeemc.iium.edu.my/pluginfile.php/21229/user/icon/remui/f3?rev=43498", "title": "Ra-Sis-Chear"}, # {"name": "Aida", "image_url": "https://italeemc.iium.edu.my/pluginfile.php/21236/user/icon/remui/f3?rev=43918", "title": "Ra-Sis-Chear"}, # {"name": "Naufal", "image_url": "https://italeemc.iium.edu.my/pluginfile.php/21260/user/icon/remui/f3?rev=400622", "title": "Rizzichear"}, # {"name": "Fadzwan", "image_url": "https://italeemc.iium.edu.my/pluginfile.php/21094/user/icon/remui/f3?rev=59457", "title": "Nasser"}, # ] # # Dynamically create columns based on the number of developers # num_devs = len(developer_info) # cols = st.columns(num_devs) # # Display the developer profiles # for idx, dev in enumerate(developer_info): # col = cols[idx] # with col: # st.markdown( # f'
' # f'' # f'

{dev["name"]}
{dev["title"]}

' # f'

' # f'
', # unsafe_allow_html=True # )