Spaces:

krshubham
/

engine-fault-detection

Sleeping

File size: 9,418 Bytes

df6079a

# import gradio as gr
import os
import joblib
from sound_classifier import SoundClassifier
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tempfile
from collections import Counter

# Get list of available models and their friendly names
MODELS_DIR = 'models'
MODEL_NAMES = {
    'lr_model.joblib': 'Logistic Regression',
    'nn_model.joblib': 'Neural Network',
    'rf_model.joblib': 'Random Forest',
    'svm_model.joblib': 'Support Vector Machine',
    'xgb_model.joblib': 'XGBoost'
}

model_files = [f for f in os.listdir(MODELS_DIR) if f.endswith('_model.joblib')]
model_choices = {MODEL_NAMES[file]: file for file in model_files if file in MODEL_NAMES}

print(model_choices)

def load_model(model_file):
    """Load a saved model and its associated scaler and label encoder"""
    model_path = os.path.join(MODELS_DIR, model_file)
    saved_data = joblib.load(model_path)
    return saved_data['model'], saved_data['scaler'], saved_data['label_encoder']

def format_issue(issue_text):
    """Format the issue text to be more readable"""
    # Replace underscores with spaces and title case the text
    if issue_text == 'normal':
        return 'Normal Engine Sound (No Issues)'
    formatted = issue_text.replace('_', ' ').title()
    return formatted

def get_all_model_predictions(audio_file):
    """Get predictions from all available models"""
    results = {}
    highest_confidence = 0
    best_model = None
    best_prediction = None
    all_predictions = []
    all_confidences = {}
    
    # Initialize classifier for feature extraction only
    classifier = SoundClassifier(data_dir='data')
    features = classifier.extract_features(audio_file)
    features = features.reshape(1, -1)
    
    # Get predictions from each model
    for model_name, model_file in model_choices.items():
        try:
            model, scaler, le = load_model(model_file)
            
            # Scale features
            features_scaled = scaler.transform(features)
            
            # Make prediction
            prediction = model.predict(features_scaled)[0]
            predicted_label = le.inverse_transform([prediction])[0]
            formatted_label = format_issue(predicted_label)
            
            # Get confidence
            confidence = 0
            if hasattr(model, 'predict_proba'):
                proba = model.predict_proba(features_scaled)[0]
                confidence = proba[prediction]
            
            results[model_name] = {
                'label': formatted_label,
                'confidence': confidence,
                'raw_label': predicted_label  # Store raw label for voting
            }
            
            # Track highest confidence
            if confidence > highest_confidence:
                highest_confidence = confidence
                best_model = model_name
                best_prediction = formatted_label
            
            # Store for voting
            all_predictions.append(predicted_label)
            if predicted_label not in all_confidences:
                all_confidences[predicted_label] = []
            all_confidences[predicted_label].append(confidence)
                
        except Exception as e:
            print(f"Error with model {model_name}: {str(e)}")
            results[model_name] = {
                'label': 'Error',
                'confidence': 0,
                'raw_label': 'error'
            }
    
    # Perform voting
    vote_results = Counter(all_predictions)
    if vote_results:
        # Get the most common prediction
        voted_prediction, vote_count = vote_results.most_common(1)[0]
        
        # Calculate average confidence for the voted prediction
        avg_confidence = np.mean(all_confidences.get(voted_prediction, [0]))
        
        # Format the voted prediction
        voted_formatted = format_issue(voted_prediction)
        
        # Add voting results
        results['Ensemble (Voting)'] = {
            'label': voted_formatted,
            'confidence': avg_confidence,
            'raw_label': voted_prediction,
            'vote_count': vote_count,
            'total_votes': len(all_predictions)
        }
        
        # Check if voting has higher confidence than individual models
        if avg_confidence > highest_confidence:
            highest_confidence = avg_confidence
            best_model = 'Ensemble (Voting)'
            best_prediction = voted_formatted
    
    return results, best_model, best_prediction, highest_confidence

def create_confidence_chart(results, best_model):
    """Create a bar chart of confidence scores"""
    models = []
    confidences = []
    colors = []
    
    for model, data in results.items():
        models.append(model)
        confidences.append(data['confidence'] * 100)  # Convert to percentage
        # Highlight the best model
        if model == best_model:
            colors.append('green')
        elif model == 'Ensemble (Voting)':
            colors.append('purple')  # Highlight voting in a different color
        else:
            colors.append('blue')
    
    plt.figure(figsize=(10, 6))
    bars = plt.bar(models, confidences, color=colors)
    plt.xlabel('Model')
    plt.ylabel('Confidence (%)')
    plt.title('Model Confidence Comparison')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    
    # Add value labels on top of bars
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 1,
                 f'{height:.1f}%', ha='center', va='bottom')
    
    # Save to a temporary file
    with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
        plt.savefig(tmp.name)
        plt.close()
        return tmp.name

def create_voting_chart(results):
    """Create a pie chart showing the voting distribution"""
    if 'Ensemble (Voting)' not in results:
        return None
        
    # Count votes for each class
    vote_counts = {}
    for model, data in results.items():
        if model != 'Ensemble (Voting)':  # Skip the ensemble result itself
            raw_label = data.get('raw_label', 'unknown')
            if raw_label not in vote_counts:
                vote_counts[raw_label] = 0
            vote_counts[raw_label] += 1
    
    # Create pie chart
    labels = [format_issue(label) for label in vote_counts.keys()]
    counts = list(vote_counts.values())
    
    plt.figure(figsize=(8, 8))
    plt.pie(counts, labels=labels, autopct='%1.1f%%', startangle=90, shadow=True)
    plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle
    plt.title('Voting Distribution')
    
    # Save to a temporary file
    with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
        plt.savefig(tmp.name)
        plt.close()
        return tmp.name

def predict_sound(audio_file):
    """
    Function to make predictions on uploaded audio files using all models
    and show a comparison chart
    """
    if not audio_file:
        return "Please upload an audio file", None, None
    
    # Get predictions from all models
    results, best_model, best_prediction, highest_confidence = get_all_model_predictions(audio_file)
    
    # Create confidence comparison chart
    confidence_chart = create_confidence_chart(results, best_model)
    
    # Create voting distribution chart
    voting_chart = create_voting_chart(results)
    
    # Format the text output
    output_text = f"Best Prediction: {best_prediction} (Confidence: {highest_confidence:.2%})\n\n"
    
    # Add voting details if available
    if 'Ensemble (Voting)' in results:
        voting_data = results['Ensemble (Voting)']
        output_text += f"Ensemble Voting Result: {voting_data['label']} "
        output_text += f"(Confidence: {voting_data['confidence']:.2%}, "
        output_text += f"Votes: {voting_data['vote_count']}/{voting_data['total_votes']})\n\n"
    
    output_text += "All Model Predictions:\n"
    
    for model, data in results.items():
        if model != 'Ensemble (Voting)':  # Skip ensemble in this section
            confidence_str = f"{data['confidence']:.2%}" if data['confidence'] > 0 else "N/A"
            output_text += f"- {model}: {data['label']} (Confidence: {confidence_str})\n"
    
    return output_text, confidence_chart, voting_chart

# Create Gradio interface
# iface = gr.Interface(
#     fn=predict_sound,
#     inputs=gr.Audio(type="filepath", label="Upload Sound File"),
#     outputs=[
#         gr.Textbox(label="Prediction Results"),
#         gr.Image(label="Confidence Comparison"),
#         gr.Image(label="Voting Distribution")
#     ],
#     title="Engine Sound Issue Classifier",
#     description="Upload an audio file of engine sound to identify potential issues or normal operation. The system will compare predictions across all available models and use ensemble voting to provide a consensus prediction.",
#     examples=[
#         [os.path.join("test_data", "air_filter_sample_5.wav")],
#         [os.path.join("test_data", "cd_sample_16.wav")],
#         [os.path.join("test_data", "vl_sample_4.wav")],
#         # Add example for fan belt issue if available
#         [os.path.join("test_data", "fan_belt_sample.wav") if os.path.exists(os.path.join("test_data", "fan_belt_sample.wav")) else None]
#     ]
# )

# if __name__ == "__main__":
#     iface.launch()