Spaces:

krshubham
/

engine-fault-detection

Sleeping

Kumar Shubham

Adding new ui

df6079a 11 months ago

9.42 kB

	# import gradio as gr
	import os
	import joblib
	from sound_classifier import SoundClassifier
	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd
	import tempfile
	from collections import Counter

	# Get list of available models and their friendly names
	MODELS_DIR = 'models'
	MODEL_NAMES = {
	'lr_model.joblib': 'Logistic Regression',
	'nn_model.joblib': 'Neural Network',
	'rf_model.joblib': 'Random Forest',
	'svm_model.joblib': 'Support Vector Machine',
	'xgb_model.joblib': 'XGBoost'
	}

	model_files = [f for f in os.listdir(MODELS_DIR) if f.endswith('_model.joblib')]
	model_choices = {MODEL_NAMES[file]: file for file in model_files if file in MODEL_NAMES}

	print(model_choices)

	def load_model(model_file):
	"""Load a saved model and its associated scaler and label encoder"""
	model_path = os.path.join(MODELS_DIR, model_file)
	saved_data = joblib.load(model_path)
	return saved_data['model'], saved_data['scaler'], saved_data['label_encoder']

	def format_issue(issue_text):
	"""Format the issue text to be more readable"""
	# Replace underscores with spaces and title case the text
	if issue_text == 'normal':
	return 'Normal Engine Sound (No Issues)'
	formatted = issue_text.replace('_', ' ').title()
	return formatted

	def get_all_model_predictions(audio_file):
	"""Get predictions from all available models"""
	results = {}
	highest_confidence = 0
	best_model = None
	best_prediction = None
	all_predictions = []
	all_confidences = {}

	# Initialize classifier for feature extraction only
	classifier = SoundClassifier(data_dir='data')
	features = classifier.extract_features(audio_file)
	features = features.reshape(1, -1)

	# Get predictions from each model
	for model_name, model_file in model_choices.items():
	try:
	model, scaler, le = load_model(model_file)

	# Scale features
	features_scaled = scaler.transform(features)

	# Make prediction
	prediction = model.predict(features_scaled)[0]
	predicted_label = le.inverse_transform([prediction])[0]
	formatted_label = format_issue(predicted_label)

	# Get confidence
	confidence = 0
	if hasattr(model, 'predict_proba'):
	proba = model.predict_proba(features_scaled)[0]
	confidence = proba[prediction]

	results[model_name] = {
	'label': formatted_label,
	'confidence': confidence,
	'raw_label': predicted_label # Store raw label for voting
	}

	# Track highest confidence
	if confidence > highest_confidence:
	highest_confidence = confidence
	best_model = model_name
	best_prediction = formatted_label

	# Store for voting
	all_predictions.append(predicted_label)
	if predicted_label not in all_confidences:
	all_confidences[predicted_label] = []
	all_confidences[predicted_label].append(confidence)

	except Exception as e:
	print(f"Error with model {model_name}: {str(e)}")
	results[model_name] = {
	'label': 'Error',
	'confidence': 0,
	'raw_label': 'error'
	}

	# Perform voting
	vote_results = Counter(all_predictions)
	if vote_results:
	# Get the most common prediction
	voted_prediction, vote_count = vote_results.most_common(1)[0]

	# Calculate average confidence for the voted prediction
	avg_confidence = np.mean(all_confidences.get(voted_prediction, [0]))

	# Format the voted prediction
	voted_formatted = format_issue(voted_prediction)

	# Add voting results
	results['Ensemble (Voting)'] = {
	'label': voted_formatted,
	'confidence': avg_confidence,
	'raw_label': voted_prediction,
	'vote_count': vote_count,
	'total_votes': len(all_predictions)
	}

	# Check if voting has higher confidence than individual models
	if avg_confidence > highest_confidence:
	highest_confidence = avg_confidence
	best_model = 'Ensemble (Voting)'
	best_prediction = voted_formatted

	return results, best_model, best_prediction, highest_confidence

	def create_confidence_chart(results, best_model):
	"""Create a bar chart of confidence scores"""
	models = []
	confidences = []
	colors = []

	for model, data in results.items():
	models.append(model)
	confidences.append(data['confidence'] * 100) # Convert to percentage
	# Highlight the best model
	if model == best_model:
	colors.append('green')
	elif model == 'Ensemble (Voting)':
	colors.append('purple') # Highlight voting in a different color
	else:
	colors.append('blue')

	plt.figure(figsize=(10, 6))
	bars = plt.bar(models, confidences, color=colors)
	plt.xlabel('Model')
	plt.ylabel('Confidence (%)')
	plt.title('Model Confidence Comparison')
	plt.xticks(rotation=45, ha='right')
	plt.tight_layout()

	# Add value labels on top of bars
	for bar in bars:
	height = bar.get_height()
	plt.text(bar.get_x() + bar.get_width()/2., height + 1,
	f'{height:.1f}%', ha='center', va='bottom')

	# Save to a temporary file
	with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
	plt.savefig(tmp.name)
	plt.close()
	return tmp.name

	def create_voting_chart(results):
	"""Create a pie chart showing the voting distribution"""
	if 'Ensemble (Voting)' not in results:
	return None

	# Count votes for each class
	vote_counts = {}
	for model, data in results.items():
	if model != 'Ensemble (Voting)': # Skip the ensemble result itself
	raw_label = data.get('raw_label', 'unknown')
	if raw_label not in vote_counts:
	vote_counts[raw_label] = 0
	vote_counts[raw_label] += 1

	# Create pie chart
	labels = [format_issue(label) for label in vote_counts.keys()]
	counts = list(vote_counts.values())

	plt.figure(figsize=(8, 8))
	plt.pie(counts, labels=labels, autopct='%1.1f%%', startangle=90, shadow=True)
	plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle
	plt.title('Voting Distribution')

	# Save to a temporary file
	with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
	plt.savefig(tmp.name)
	plt.close()
	return tmp.name

	def predict_sound(audio_file):
	"""
	Function to make predictions on uploaded audio files using all models
	and show a comparison chart
	"""
	if not audio_file:
	return "Please upload an audio file", None, None

	# Get predictions from all models
	results, best_model, best_prediction, highest_confidence = get_all_model_predictions(audio_file)

	# Create confidence comparison chart
	confidence_chart = create_confidence_chart(results, best_model)

	# Create voting distribution chart
	voting_chart = create_voting_chart(results)

	# Format the text output
	output_text = f"Best Prediction: {best_prediction} (Confidence: {highest_confidence:.2%})\n\n"

	# Add voting details if available
	if 'Ensemble (Voting)' in results:
	voting_data = results['Ensemble (Voting)']
	output_text += f"Ensemble Voting Result: {voting_data['label']} "
	output_text += f"(Confidence: {voting_data['confidence']:.2%}, "
	output_text += f"Votes: {voting_data['vote_count']}/{voting_data['total_votes']})\n\n"

	output_text += "All Model Predictions:\n"

	for model, data in results.items():
	if model != 'Ensemble (Voting)': # Skip ensemble in this section
	confidence_str = f"{data['confidence']:.2%}" if data['confidence'] > 0 else "N/A"
	output_text += f"- {model}: {data['label']} (Confidence: {confidence_str})\n"

	return output_text, confidence_chart, voting_chart

	# Create Gradio interface
	# iface = gr.Interface(
	# fn=predict_sound,
	# inputs=gr.Audio(type="filepath", label="Upload Sound File"),
	# outputs=[
	# gr.Textbox(label="Prediction Results"),
	# gr.Image(label="Confidence Comparison"),
	# gr.Image(label="Voting Distribution")
	# ],
	# title="Engine Sound Issue Classifier",
	# description="Upload an audio file of engine sound to identify potential issues or normal operation. The system will compare predictions across all available models and use ensemble voting to provide a consensus prediction.",
	# examples=[
	# [os.path.join("test_data", "air_filter_sample_5.wav")],
	# [os.path.join("test_data", "cd_sample_16.wav")],
	# [os.path.join("test_data", "vl_sample_4.wav")],
	# # Add example for fan belt issue if available
	# [os.path.join("test_data", "fan_belt_sample.wav") if os.path.exists(os.path.join("test_data", "fan_belt_sample.wav")) else None]
	# ]
	# )

	# if __name__ == "__main__":
	# iface.launch()