| import gradio as gr |
| import numpy as np |
| import torch |
| import librosa |
| from transformers import AutoFeatureExtractor, AutoModelForAudioClassification |
| import matplotlib.pyplot as plt |
| from matplotlib.colors import Normalize |
|
|
| |
| SAMPLING_RATE = 16000 |
| MODEL_NAME = "MIT/ast-finetuned-audioset-10-10-0.4593" |
| DEFAULT_THRESHOLD = 0.7 |
|
|
| |
| feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME) |
| model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME) |
|
|
| def analyze_audio(audio_array, threshold=DEFAULT_THRESHOLD): |
| """ |
| Process audio and detect anomalies |
| Returns: |
| - classification result |
| - confidence score |
| - spectrogram visualization |
| """ |
| try: |
| |
| if isinstance(audio_array, tuple): |
| sr, audio = audio_array |
| audio = librosa.resample(audio, orig_sr=sr, target_sr=SAMPLING_RATE) |
| else: |
| audio = audio_array |
| |
| if len(audio.shape) > 1: |
| audio = librosa.to_mono(audio) |
| |
| |
| inputs = feature_extractor( |
| audio, |
| sampling_rate=SAMPLING_RATE, |
| return_tensors="pt", |
| padding=True, |
| return_attention_mask=True |
| ) |
| |
| |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| logits = outputs.logits |
| probs = torch.softmax(logits, dim=-1) |
| |
| |
| predicted_class = "Normal" if probs[0][0] > threshold else "Anomaly" |
| confidence = probs[0][0].item() if predicted_class == "Normal" else 1 - probs[0][0].item() |
| |
| |
| spectrogram = librosa.feature.melspectrogram( |
| y=audio, |
| sr=SAMPLING_RATE, |
| n_mels=128, |
| fmax=8000 |
| ) |
| db_spec = librosa.power_to_db(spectrogram, ref=np.max) |
| |
| plt.figure(figsize=(10, 4)) |
| plt.imshow(db_spec, aspect='auto', origin='lower', |
| norm=Normalize(vmin=-80, vmax=0), |
| cmap='viridis') |
| plt.colorbar(format='%+2.0f dB') |
| plt.title('Mel Spectrogram') |
| plt.tight_layout() |
| plt.savefig('spec.png', bbox_inches='tight') |
| plt.close() |
| |
| return ( |
| predicted_class, |
| f"{confidence:.1%}", |
| 'spec.png', |
| str(probs.tolist()[0]) |
| ) |
| |
| except Exception as e: |
| return f"Error: {str(e)}", "", "", "" |
|
|
| |
| with gr.Blocks(title="Industrial Audio Analyzer", theme=gr.themes.Soft()) as demo: |
| gr.Markdown(""" |
| # π Industrial Equipment Sound Analyzer |
| ### Powered by Audio Spectrogram Transformer (AST) |
| """) |
| |
| with gr.Row(): |
| with gr.Column(): |
| audio_input = gr.Audio( |
| label="Upload Equipment Audio Recording", |
| type="numpy", |
| source="upload", |
| show_download_button=True |
| ) |
| threshold = gr.Slider( |
| minimum=0.5, |
| maximum=0.95, |
| step=0.05, |
| value=DEFAULT_THRESHOLD, |
| label="Anomaly Detection Threshold", |
| info="Higher values reduce false positives but may miss subtle anomalies" |
| ) |
| analyze_btn = gr.Button("π Analyze Sound", variant="primary") |
| |
| gr.Examples( |
| examples=["examples/normal_machine.wav", "examples/anomalous_machine.wav"], |
| inputs=audio_input, |
| label="Sample Recordings" |
| ) |
| |
| with gr.Column(): |
| result_label = gr.Label(label="Detection Result") |
| confidence = gr.Textbox(label="Confidence Score") |
| spectrogram = gr.Image(label="Spectrogram Visualization") |
| raw_probs = gr.Textbox( |
| label="Model Output Probabilities", |
| visible=False |
| ) |
| |
| analyze_btn.click( |
| fn=analyze_audio, |
| inputs=[audio_input, threshold], |
| outputs=[result_label, confidence, spectrogram, raw_probs] |
| ) |
| |
| gr.Markdown(""" |
| ## How It Works |
| - Upload audio recordings from industrial equipment |
| - The AI analyzes sound patterns using spectrogram analysis |
| - Detects anomalies indicating potential equipment issues |
| |
| **Tip**: For best results, use 5-10 second recordings of steady operation |
| """) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|