"""Gradio demo app for AnomalyMachine-50K dataset anomaly detection.""" import os import tempfile from pathlib import Path import gradio as gr # Fix Gradio 4.35/4.44 API info crash when JSON schema has boolean (e.g. additionalProperties: true) import gradio_client.utils as _client_utils _orig_get_type = _client_utils.get_type def _get_type_handle_bool(schema): if isinstance(schema, bool): return "boolean" return _orig_get_type(schema) _client_utils.get_type = _get_type_handle_bool import librosa import matplotlib matplotlib.use("Agg") # Non-interactive backend import matplotlib.pyplot as plt import numpy as np # Dataset metadata DATASET_INFO = { "total_clips": 50000, "machines": ["fan", "pump", "compressor", "conveyor_belt", "electric_motor", "valve"], "normal_ratio": 0.6, "anomalous_ratio": 0.4, "clip_duration_seconds": 10.0, "sample_rate": 22050, "total_hours": round(50000 * 10.0 / 3600, 2), } # Anomaly subtypes mapping ANOMALY_SUBTYPES = { "fan": ["bearing_fault", "imbalance", "obstruction"], "pump": ["bearing_fault", "cavitation", "overheating"], "compressor": ["bearing_fault", "imbalance", "overheating"], "conveyor_belt": ["obstruction"], "electric_motor": ["bearing_fault", "imbalance", "overheating"], "valve": ["cavitation", "obstruction"], } # Placeholder model - replace with actual trained model MODEL_NAME = "YOUR_HF_USERNAME/AnomalyMachine-Classifier" model = None def load_model(): """Lazy load the audio classification model. Uses placeholder if transformers unavailable.""" global model if model is None: try: from transformers import pipeline model = pipeline( "audio-classification", model=MODEL_NAME, ) except Exception as e: print(f"Using placeholder predictions (no model): {e}") model = "placeholder" return model def create_mel_spectrogram(audio_path: str, title: str = "Mel Spectrogram") -> str: """Create a mel spectrogram visualization from audio file.""" try: y, sr = librosa.load(audio_path, sr=22050, mono=True) mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000) mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max) fig, ax = plt.subplots(figsize=(10, 4)) img = librosa.display.specshow( mel_spec_db, x_axis="time", y_axis="mel", sr=sr, fmax=8000, ax=ax, cmap="viridis", ) ax.set_title(title, fontsize=14, fontweight="bold") plt.colorbar(img, ax=ax, format="%+2.0f dB") plt.tight_layout() # Save to temporary file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png") plt.savefig(temp_file.name, dpi=100, bbox_inches="tight") plt.close() return temp_file.name except Exception as e: print(f"Error creating spectrogram: {e}") return None def get_reference_audio(machine_type: str) -> str: """Get path to reference normal audio for a machine type.""" examples_dir = Path(__file__).parent / "examples" # Look for a normal example (naming convention: {machine}_normal_*.wav) ref_pattern = f"{machine_type}_*_normal_*.wav" ref_files = list(examples_dir.glob(ref_pattern)) if not ref_files: # Fallback: use any example for this machine ref_files = list(examples_dir.glob(f"{machine_type}_*.wav")) return str(ref_files[0]) if ref_files else None def predict_anomaly(audio_file, machine_type): """Predict if audio contains an anomaly.""" if audio_file is None: return None, None, None, None, None # Load model model_instance = load_model() # Create spectrograms input_spec = create_mel_spectrogram(audio_file, f"Input Audio - {machine_type}") ref_audio = get_reference_audio(machine_type) ref_spec = None if ref_audio: ref_spec = create_mel_spectrogram(ref_audio, f"Reference Normal - {machine_type}") # Make prediction if model_instance == "placeholder": # Placeholder predictions for demo import random is_anomaly = random.random() > 0.5 confidence = random.uniform(0.7, 0.95) if is_anomaly: anomaly_subtype = random.choice(ANOMALY_SUBTYPES.get(machine_type, ["unknown"])) label = "ANOMALY" color = "red" else: anomaly_subtype = "none" label = "NORMAL" color = "green" else: # Real model prediction try: results = model_instance(audio_file) # Assuming model returns list of dicts with 'label' and 'score' top_result = results[0] if isinstance(results, list) else results label_str = top_result.get("label", "").lower() confidence = top_result.get("score", 0.5) is_anomaly = "anomaly" in label_str or "anomalous" in label_str if is_anomaly: label = "ANOMALY" color = "red" # Try to extract anomaly subtype from label anomaly_subtype = "unknown" for subtype in ANOMALY_SUBTYPES.get(machine_type, []): if subtype in label_str: anomaly_subtype = subtype break else: label = "NORMAL" color = "green" anomaly_subtype = "none" except Exception as e: print(f"Prediction error: {e}") label = "ERROR" color = "gray" confidence = 0.0 anomaly_subtype = "none" # Format result HTML result_html = f"""

{label} {'✓' if label == 'NORMAL' else '✗'}

{f'

Anomaly Type: {anomaly_subtype.replace("_", " ").title()}

' if anomaly_subtype != 'none' else ''}

Confidence: {confidence:.1%}

""" return result_html, confidence, input_spec, ref_spec, audio_file def create_dataset_gallery(): """Create gallery of example spectrograms for each machine type.""" examples_dir = Path(__file__).parent / "examples" if not examples_dir.exists(): return [] gallery_items = [] for machine in DATASET_INFO["machines"]: # Find normal and anomalous examples normal_files = list(examples_dir.glob(f"{machine}_*_normal_*.wav")) anomaly_files = list(examples_dir.glob(f"{machine}_*_anomalous_*.wav")) normal_spec = None anomaly_spec = None if normal_files: normal_spec = create_mel_spectrogram(str(normal_files[0]), f"{machine} - Normal") if anomaly_files: anomaly_spec = create_mel_spectrogram(str(anomaly_files[0]), f"{machine} - Anomaly") if normal_spec or anomaly_spec: gallery_items.append((normal_spec, anomaly_spec, machine)) return gallery_items def build_explore_tab(): """Build the dataset exploration tab.""" gallery_items = create_dataset_gallery() # Populate galleries normal_images = [item[0] for item in gallery_items if item[0] and item[0] is not None] anomaly_images = [item[1] for item in gallery_items if item[1] and item[1] is not None] with gr.Row(): with gr.Column(): gr.Markdown("### Normal Examples") normal_gallery = gr.Gallery( label="Normal Machine Sounds", show_label=False, elem_id="normal_gallery", columns=2, rows=3, height="auto", value=normal_images if normal_images else None, ) with gr.Column(): gr.Markdown("### Anomaly Examples") anomaly_gallery = gr.Gallery( label="Anomalous Machine Sounds", show_label=False, elem_id="anomaly_gallery", columns=2, rows=3, height="auto", value=anomaly_images if anomaly_images else None, ) # Dataset statistics with gr.Accordion("Dataset Statistics", open=False): stats_html = f"""

AnomalyMachine-50K Dataset

Machine Breakdown:

""" gr.HTML(stats_html) # Download button dataset_url = "https://huggingface.co/datasets/mandipgoswami/AnomalyMachine-50K" gr.Markdown(f"""
""") return normal_gallery, anomaly_gallery, normal_images, anomaly_images def build_detect_tab(): """Build the anomaly detection tab.""" with gr.Row(): with gr.Column(scale=1): audio_input = gr.Audio( label="Upload Audio or Record", type="filepath", sources=["upload", "microphone"], ) machine_dropdown = gr.Dropdown( choices=DATASET_INFO["machines"], label="Machine Type", value=DATASET_INFO["machines"][0], info="Select the type of machine in the audio", ) predict_btn = gr.Button("Detect Anomaly", variant="primary", size="lg") with gr.Column(scale=2): result_html = gr.HTML(label="Prediction Result") confidence_bar = gr.Slider( minimum=0, maximum=1, value=0, label="Confidence Score", interactive=False, ) with gr.Row(): with gr.Column(): input_spec = gr.Image(label="Input Audio Spectrogram") with gr.Column(): ref_spec = gr.Image(label="Reference Normal Spectrogram") audio_output = gr.Audio(label="Processed Audio", visible=False) predict_btn.click( fn=predict_anomaly, inputs=[audio_input, machine_dropdown], outputs=[result_html, confidence_bar, input_spec, ref_spec, audio_output], ) return ( audio_input, machine_dropdown, predict_btn, result_html, confidence_bar, input_spec, ref_spec, audio_output, ) def build_header(): """Build the app header.""" return gr.Markdown( """

🏭 AnomalyMachine-50K

Synthetic Industrial Machine Sound Anomaly Detection Dataset

View Dataset on Hugging Face →

""" ) def build_footer(): """Build the app footer.""" return gr.Markdown( """

License: CC-BY 4.0 | Dataset: AnomalyMachine-50K | GitHub: mandip42/anomaly-machine-50k

""" ) def build_how_it_works(): """Build the 'How it works' accordion.""" how_it_works_html = """

Signal Processing-Based Synthesis

The AnomalyMachine-50K dataset is generated entirely using deterministic signal processing techniques—no neural audio models are used. This ensures reproducibility, lightweight generation, and freedom from copyright concerns.

1. Base Machine Sound Generation

Each machine type has a dedicated synthesis model:

2. Operating Condition Modulation

Conditions (idle, normal_load, high_load) modulate amplitude and harmonic content.

3. Anomaly Injection

Anomalies are injected via signal transformations:

4. Background Noise

Factory-floor ambience (pink noise + 60/120 Hz hum) is mixed at configurable SNR levels.

All synthesis is deterministic and reproducible with a fixed random seed.

""" return gr.Accordion("How It Works", open=False).update( value=gr.HTML(how_it_works_html) ) def main(): """Main Gradio app entry point.""" theme = gr.themes.Monochrome( primary_hue="red", secondary_hue="gray", font=("Helvetica", "ui-sans-serif", "system-ui"), ) with gr.Blocks(theme=theme, title="AnomalyMachine-50K Demo") as app: build_header() with gr.Tabs(): with gr.Tab("🔍 Detect Anomaly"): with gr.Accordion("How It Works", open=False): gr.HTML("""

Signal Processing-Based Synthesis

The AnomalyMachine-50K dataset is generated entirely using deterministic signal processing techniques—no neural audio models are used. This ensures reproducibility, lightweight generation, and freedom from copyright concerns.

1. Base Machine Sound Generation

Each machine type has a dedicated synthesis model:

2. Operating Condition Modulation

Conditions (idle, normal_load, high_load) modulate amplitude and harmonic content.

3. Anomaly Injection

Anomalies are injected via signal transformations:

4. Background Noise

Factory-floor ambience (pink noise + 60/120 Hz hum) is mixed at configurable SNR levels.

All synthesis is deterministic and reproducible with a fixed random seed.

""") build_detect_tab() with gr.Tab("📊 Explore Dataset"): normal_gallery, anomaly_gallery, normal_images, anomaly_images = build_explore_tab() build_footer() app.launch(share=False, server_name="0.0.0.0", server_port=7860) if __name__ == "__main__": main()