Spaces:

zainulabedin949
/

Audio-Spectrogram-Transformer

Sleeping

App Files Files Community

zainulabedin949 commited on Apr 9, 2025

Commit

65c3f40

verified ·

1 Parent(s): e9b0e37

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -81

app.py CHANGED Viewed

@@ -18,140 +18,195 @@ DEFAULT_THRESHOLD = 0.7
 feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
 model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME)
-def handle_audio_file(audio_file):
-    """Handle uploaded audio file and convert to numpy array"""
-    try:
-        # Save to temp file and load with soundfile
-        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
-            tmp.write(audio_file.read())
-            tmp_path = tmp.name
-        audio, sr = sf.read(tmp_path)
-        os.unlink(tmp_path)  # Clean up temp file
-        # Convert to mono if needed
-        if len(audio.shape) > 1:
-            audio = np.mean(audio, axis=1)
-        return audio, sr
-    except Exception as e:
-        raise ValueError(f"Error processing audio file: {str(e)}")
 def analyze_audio(audio_input, threshold=DEFAULT_THRESHOLD):
     """Process audio and detect anomalies"""
     try:
-        # Handle different input types
-        if isinstance(audio_input, str):  # File path
-            audio, sr = handle_audio_file(open(audio_input, 'rb'))
-        elif hasattr(audio_input, 'name'):  # Gradio file object
-            audio, sr = handle_audio_file(audio_input)
-        elif isinstance(audio_input, tuple):  # Direct numpy array
-            sr, audio = audio_input
-        else:
-            raise ValueError("Unsupported audio input format")
-        # Resample if needed
         if sr != SAMPLING_RATE:
             audio = librosa.resample(audio, orig_sr=sr, target_sr=SAMPLING_RATE)
-        # Extract features
-        inputs = feature_extractor(
-            audio,
-            sampling_rate=SAMPLING_RATE,
-            return_tensors="pt",
-            padding=True,
-            return_attention_mask=True
-        )
-        # Run inference
         with torch.no_grad():
             outputs = model(**inputs)
-            logits = outputs.logits
-            probs = torch.softmax(logits, dim=-1)
         # Get results
         predicted_class = "Normal" if probs[0][0] > threshold else "Anomaly"
         confidence = probs[0][0].item() if predicted_class == "Normal" else 1 - probs[0][0].item()
-        # Create spectrogram
-        spectrogram = librosa.feature.melspectrogram(
-            y=audio,
-            sr=SAMPLING_RATE,
-            n_mels=64,
-            fmax=8000
-        )
         db_spec = librosa.power_to_db(spectrogram, ref=np.max)
         fig, ax = plt.subplots(figsize=(10, 4))
-        img = librosa.display.specshow(
-            db_spec,
-            x_axis='time',
-            y_axis='mel',
-            sr=SAMPLING_RATE,
-            fmax=8000,
-            ax=ax
-        )
-        fig.colorbar(img, ax=ax, format='%+2.0f dB')
-        ax.set(title='Mel Spectrogram')
-        plt.tight_layout()
-        # Save to temp file
         spec_path = os.path.join(tempfile.gettempdir(), 'spec.png')
         plt.savefig(spec_path, bbox_inches='tight')
         plt.close()
         return (
             predicted_class,
             f"{confidence:.1%}",
             spec_path,
-            str(probs.tolist()[0])
         )
     except Exception as e:
         return f"Error: {str(e)}", "", None, ""
-# Gradio interface
-with gr.Blocks(title="Industrial Audio Analyzer", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
-    # 🏭 Industrial Equipment Sound Analyzer
-    ### Powered by Audio Spectrogram Transformer (AST)
     """)
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(
-                label="Upload Equipment Audio (.wav)",
-                type="filepath"
             )
             threshold = gr.Slider(
-                minimum=0.5,
-                maximum=0.95,
-                step=0.05,
-                value=DEFAULT_THRESHOLD,
-                label="Anomaly Detection Threshold"
             )
-            analyze_btn = gr.Button("🔍 Analyze Sound", variant="primary")
         with gr.Column():
-            result_label = gr.Label(label="Detection Result")
             confidence = gr.Textbox(label="Confidence Score")
-            spectrogram = gr.Image(label="Spectrogram Visualization")
-            raw_probs = gr.Textbox(
-                label="Model Output Probabilities",
-                visible=False
             )
     analyze_btn.click(
         fn=analyze_audio,
         inputs=[audio_input, threshold],
-        outputs=[result_label, confidence, spectrogram, raw_probs]
     )
     gr.Markdown("""
-    **Instructions:**
-    - Upload .wav audio recordings (5-10 seconds recommended)
-    - Adjust threshold to control sensitivity
-    - Results show Normal/Anomaly classification with confidence
     """)
 if __name__ == "__main__":

 feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
 model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME)
+# Equipment knowledge base
+EQUIPMENT_RECOMMENDATIONS = {
+    "bearing": {
+        "high_frequency": "Recommend bearing replacement. High-frequency noise indicates wear or lubrication issues.",
+        "low_frequency": "Check for improper installation or contamination in bearings.",
+        "irregular": "Possible bearing cage damage. Schedule vibration analysis."
+    },
+    "pump": {
+        "cavitation": "Pump cavitation detected. Check suction conditions and NPSH.",
+        "impeller": "Impeller damage likely. Inspect and balance if needed.",
+        "misalignment": "Misalignment detected. Perform laser shaft alignment."
+    },
+    "motor": {
+        "electrical": "Electrical fault suspected. Check windings and connections.",
+        "mechanical": "Mechanical imbalance detected. Perform dynamic balancing.",
+        "bearing": "Motor bearing wear detected. Schedule replacement."
+    },
+    "compressor": {
+        "valve": "Compressor valve leakage suspected. Perform valve test.",
+        "pulsation": "Pulsation issues detected. Check dampeners and piping.",
+        "surge": "Compressor surge condition. Review control settings."
+    }
+}
+def analyze_frequency_patterns(audio, sr):
+    """Analyze frequency patterns to identify potential issues"""
+    patterns = []
+    # Spectral analysis
+    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
+    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)[0]
+    mean_centroid = np.mean(spectral_centroid)
+    mean_rolloff = np.mean(spectral_rolloff)
+    if mean_centroid > 3000:  # High frequency components
+        patterns.append("high_frequency")
+    elif mean_centroid < 1000:  # Low frequency components
+        patterns.append("low_frequency")
+    if mean_rolloff > 8000:  # Rich in harmonics
+        patterns.append("harmonic_rich")
+    return patterns
+def generate_recommendation(prediction, confidence, audio, sr):
+    """Generate maintenance recommendations based on analysis"""
+    if prediction == "Normal":
+        return "No immediate action required. Equipment operating within normal parameters."
+    patterns = analyze_frequency_patterns(audio, sr)
+    # Simple equipment type classifier based on frequency profile
+    spectral_flatness = librosa.feature.spectral_flatness(y=audio)[0]
+    mean_flatness = np.mean(spectral_flatness)
+    if mean_flatness < 0.2:
+        equipment_type = "bearing"
+    elif 0.2 <= mean_flatness < 0.6:
+        equipment_type = "pump"
+    else:
+        equipment_type = "motor" if np.mean(audio) < 0.1 else "compressor"
+    # Generate specific recommendations
+    recommendations = ["🔧 Maintenance Recommendations:"]
+    recommendations.append(f"Detected issues in {equipment_type} with {confidence:.1%} confidence")
+    for pattern in patterns:
+        if pattern in EQUIPMENT_RECOMMENDATIONS.get(equipment_type, {}):
+            recommendations.append(f"→ {EQUIPMENT_RECOMMENDATIONS[equipment_type][pattern]}")
+    # General recommendations
+    if prediction == "Anomaly":
+        recommendations.append("\n🛠️ Suggested Actions:")
+        recommendations.append("1. Isolate equipment if possible")
+        recommendations.append("2. Perform visual inspection")
+        recommendations.append("3. Schedule detailed diagnostics")
+        recommendations.append(f"4. Review last maintenance records ({equipment_type})")
+        if confidence > 0.8:
+            recommendations.append("\n🚨 Urgent: High confidence abnormality detected. Recommend immediate inspection!")
+    return "\n".join(recommendations)
 def analyze_audio(audio_input, threshold=DEFAULT_THRESHOLD):
     """Process audio and detect anomalies"""
     try:
+        # Handle file upload
+        if isinstance(audio_input, str):
+            audio, sr = sf.read(audio_input)
+        else:  # Gradio file object
+            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
+                tmp.write(audio_input.read())
+                tmp_path = tmp.name
+            audio, sr = sf.read(tmp_path)
+            os.unlink(tmp_path)
+        # Convert to mono and resample if needed
+        if len(audio.shape) > 1:
+            audio = np.mean(audio, axis=1)
         if sr != SAMPLING_RATE:
             audio = librosa.resample(audio, orig_sr=sr, target_sr=SAMPLING_RATE)
+        # Feature extraction and prediction
+        inputs = feature_extractor(audio, sampling_rate=SAMPLING_RATE, return_tensors="pt")
         with torch.no_grad():
             outputs = model(**inputs)
+            probs = torch.softmax(outputs.logits, dim=-1)
         # Get results
         predicted_class = "Normal" if probs[0][0] > threshold else "Anomaly"
         confidence = probs[0][0].item() if predicted_class == "Normal" else 1 - probs[0][0].item()
+        # Generate spectrogram
+        spectrogram = librosa.feature.melspectrogram(y=audio, sr=SAMPLING_RATE, n_mels=64, fmax=8000)
         db_spec = librosa.power_to_db(spectrogram, ref=np.max)
         fig, ax = plt.subplots(figsize=(10, 4))
+        librosa.display.specshow(db_spec, x_axis='time', y_axis='mel', sr=SAMPLING_RATE, fmax=8000, ax=ax)
+        plt.colorbar(format='%+2.0f dB')
+        plt.title('Mel Spectrogram with Anomaly Detection')
+        # Mark anomalies on plot
+        if predicted_class == "Anomaly":
+            plt.text(0.5, 0.9, 'ANOMALY DETECTED', color='red',
+                    ha='center', va='center', transform=ax.transAxes,
+                    fontsize=14, bbox=dict(facecolor='white', alpha=0.8))
         spec_path = os.path.join(tempfile.gettempdir(), 'spec.png')
         plt.savefig(spec_path, bbox_inches='tight')
         plt.close()
+        # Generate detailed recommendations
+        recommendations = generate_recommendation(predicted_class, confidence, audio, SAMPLING_RATE)
         return (
             predicted_class,
             f"{confidence:.1%}",
             spec_path,
+            recommendations
         )
     except Exception as e:
         return f"Error: {str(e)}", "", None, ""
+# Gradio Interface
+with gr.Blocks(title="Industrial Diagnostic Assistant 👨‍🔧", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+    # 🏭 Industrial Equipment Diagnostic Assistant
+    ## Acoustic Anomaly Detection & Maintenance Recommendation System
     """)
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(
+                label="Upload Equipment Recording (.wav)",
+                type="filepath",
+                source="upload"
             )
             threshold = gr.Slider(
+                minimum=0.5, maximum=0.95, step=0.05, value=DEFAULT_THRESHOLD,
+                label="Detection Sensitivity", interactive=True
             )
+            analyze_btn = gr.Button("🔍 Analyze & Diagnose", variant="primary")
         with gr.Column():
+            result_label = gr.Label(label="Diagnosis Result")
             confidence = gr.Textbox(label="Confidence Score")
+            spectrogram = gr.Image(label="Acoustic Analysis")
+            recommendations = gr.Textbox(
+                label="Maintenance Recommendations",
+                lines=10,
+                interactive=False
             )
     analyze_btn.click(
         fn=analyze_audio,
         inputs=[audio_input, threshold],
+        outputs=[result_label, confidence, spectrogram, recommendations]
     )
     gr.Markdown("""
+    ### System Capabilities:
+    - Automatic anomaly detection in industrial equipment sounds
+    - Frequency pattern analysis to identify failure modes
+    - Equipment-specific maintenance recommendations
+    - Confidence-based urgency classification
+    **Tip:** For best results, use 5-10 second recordings of steady operation
     """)
 if __name__ == "__main__":