Kumar Shubham
Adding new ui
df6079a
# import gradio as gr
import os
import joblib
from sound_classifier import SoundClassifier
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tempfile
from collections import Counter
# Get list of available models and their friendly names
MODELS_DIR = 'models'
MODEL_NAMES = {
'lr_model.joblib': 'Logistic Regression',
'nn_model.joblib': 'Neural Network',
'rf_model.joblib': 'Random Forest',
'svm_model.joblib': 'Support Vector Machine',
'xgb_model.joblib': 'XGBoost'
}
model_files = [f for f in os.listdir(MODELS_DIR) if f.endswith('_model.joblib')]
model_choices = {MODEL_NAMES[file]: file for file in model_files if file in MODEL_NAMES}
print(model_choices)
def load_model(model_file):
"""Load a saved model and its associated scaler and label encoder"""
model_path = os.path.join(MODELS_DIR, model_file)
saved_data = joblib.load(model_path)
return saved_data['model'], saved_data['scaler'], saved_data['label_encoder']
def format_issue(issue_text):
"""Format the issue text to be more readable"""
# Replace underscores with spaces and title case the text
if issue_text == 'normal':
return 'Normal Engine Sound (No Issues)'
formatted = issue_text.replace('_', ' ').title()
return formatted
def get_all_model_predictions(audio_file):
"""Get predictions from all available models"""
results = {}
highest_confidence = 0
best_model = None
best_prediction = None
all_predictions = []
all_confidences = {}
# Initialize classifier for feature extraction only
classifier = SoundClassifier(data_dir='data')
features = classifier.extract_features(audio_file)
features = features.reshape(1, -1)
# Get predictions from each model
for model_name, model_file in model_choices.items():
try:
model, scaler, le = load_model(model_file)
# Scale features
features_scaled = scaler.transform(features)
# Make prediction
prediction = model.predict(features_scaled)[0]
predicted_label = le.inverse_transform([prediction])[0]
formatted_label = format_issue(predicted_label)
# Get confidence
confidence = 0
if hasattr(model, 'predict_proba'):
proba = model.predict_proba(features_scaled)[0]
confidence = proba[prediction]
results[model_name] = {
'label': formatted_label,
'confidence': confidence,
'raw_label': predicted_label # Store raw label for voting
}
# Track highest confidence
if confidence > highest_confidence:
highest_confidence = confidence
best_model = model_name
best_prediction = formatted_label
# Store for voting
all_predictions.append(predicted_label)
if predicted_label not in all_confidences:
all_confidences[predicted_label] = []
all_confidences[predicted_label].append(confidence)
except Exception as e:
print(f"Error with model {model_name}: {str(e)}")
results[model_name] = {
'label': 'Error',
'confidence': 0,
'raw_label': 'error'
}
# Perform voting
vote_results = Counter(all_predictions)
if vote_results:
# Get the most common prediction
voted_prediction, vote_count = vote_results.most_common(1)[0]
# Calculate average confidence for the voted prediction
avg_confidence = np.mean(all_confidences.get(voted_prediction, [0]))
# Format the voted prediction
voted_formatted = format_issue(voted_prediction)
# Add voting results
results['Ensemble (Voting)'] = {
'label': voted_formatted,
'confidence': avg_confidence,
'raw_label': voted_prediction,
'vote_count': vote_count,
'total_votes': len(all_predictions)
}
# Check if voting has higher confidence than individual models
if avg_confidence > highest_confidence:
highest_confidence = avg_confidence
best_model = 'Ensemble (Voting)'
best_prediction = voted_formatted
return results, best_model, best_prediction, highest_confidence
def create_confidence_chart(results, best_model):
"""Create a bar chart of confidence scores"""
models = []
confidences = []
colors = []
for model, data in results.items():
models.append(model)
confidences.append(data['confidence'] * 100) # Convert to percentage
# Highlight the best model
if model == best_model:
colors.append('green')
elif model == 'Ensemble (Voting)':
colors.append('purple') # Highlight voting in a different color
else:
colors.append('blue')
plt.figure(figsize=(10, 6))
bars = plt.bar(models, confidences, color=colors)
plt.xlabel('Model')
plt.ylabel('Confidence (%)')
plt.title('Model Confidence Comparison')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
# Add value labels on top of bars
for bar in bars:
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width()/2., height + 1,
f'{height:.1f}%', ha='center', va='bottom')
# Save to a temporary file
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
plt.savefig(tmp.name)
plt.close()
return tmp.name
def create_voting_chart(results):
"""Create a pie chart showing the voting distribution"""
if 'Ensemble (Voting)' not in results:
return None
# Count votes for each class
vote_counts = {}
for model, data in results.items():
if model != 'Ensemble (Voting)': # Skip the ensemble result itself
raw_label = data.get('raw_label', 'unknown')
if raw_label not in vote_counts:
vote_counts[raw_label] = 0
vote_counts[raw_label] += 1
# Create pie chart
labels = [format_issue(label) for label in vote_counts.keys()]
counts = list(vote_counts.values())
plt.figure(figsize=(8, 8))
plt.pie(counts, labels=labels, autopct='%1.1f%%', startangle=90, shadow=True)
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle
plt.title('Voting Distribution')
# Save to a temporary file
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
plt.savefig(tmp.name)
plt.close()
return tmp.name
def predict_sound(audio_file):
"""
Function to make predictions on uploaded audio files using all models
and show a comparison chart
"""
if not audio_file:
return "Please upload an audio file", None, None
# Get predictions from all models
results, best_model, best_prediction, highest_confidence = get_all_model_predictions(audio_file)
# Create confidence comparison chart
confidence_chart = create_confidence_chart(results, best_model)
# Create voting distribution chart
voting_chart = create_voting_chart(results)
# Format the text output
output_text = f"Best Prediction: {best_prediction} (Confidence: {highest_confidence:.2%})\n\n"
# Add voting details if available
if 'Ensemble (Voting)' in results:
voting_data = results['Ensemble (Voting)']
output_text += f"Ensemble Voting Result: {voting_data['label']} "
output_text += f"(Confidence: {voting_data['confidence']:.2%}, "
output_text += f"Votes: {voting_data['vote_count']}/{voting_data['total_votes']})\n\n"
output_text += "All Model Predictions:\n"
for model, data in results.items():
if model != 'Ensemble (Voting)': # Skip ensemble in this section
confidence_str = f"{data['confidence']:.2%}" if data['confidence'] > 0 else "N/A"
output_text += f"- {model}: {data['label']} (Confidence: {confidence_str})\n"
return output_text, confidence_chart, voting_chart
# Create Gradio interface
# iface = gr.Interface(
# fn=predict_sound,
# inputs=gr.Audio(type="filepath", label="Upload Sound File"),
# outputs=[
# gr.Textbox(label="Prediction Results"),
# gr.Image(label="Confidence Comparison"),
# gr.Image(label="Voting Distribution")
# ],
# title="Engine Sound Issue Classifier",
# description="Upload an audio file of engine sound to identify potential issues or normal operation. The system will compare predictions across all available models and use ensemble voting to provide a consensus prediction.",
# examples=[
# [os.path.join("test_data", "air_filter_sample_5.wav")],
# [os.path.join("test_data", "cd_sample_16.wav")],
# [os.path.join("test_data", "vl_sample_4.wav")],
# # Add example for fan belt issue if available
# [os.path.join("test_data", "fan_belt_sample.wav") if os.path.exists(os.path.join("test_data", "fan_belt_sample.wav")) else None]
# ]
# )
# if __name__ == "__main__":
# iface.launch()