Spaces:

msmaje
/

enhance_voice_security_authentication_model

Sleeping

App Files Files Community

enhance_voice_security_authentication_model / app.py

msmaje

Update app.py

6650360 verified 8 months ago

raw

history blame contribute delete

4.49 kB

	import gradio as gr
	import torch
	import librosa
	import numpy as np
	import torch.nn as nn
	import torchvision.models as models

	# Define the custom model using ResNet18
	class TransferLearningModel(nn.Module):
	def __init__(self, num_classes):
	super(TransferLearningModel, self).__init__()
	self.resnet = models.resnet18(weights=None) # updated from deprecated 'pretrained'
	self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
	num_ftrs = self.resnet.fc.in_features
	self.resnet.fc = nn.Linear(num_ftrs, num_classes)
	self.dropout = nn.Dropout(0.5)

	def forward(self, x):
	x = self.resnet.conv1(x)
	x = self.resnet.bn1(x)
	x = self.resnet.relu(x)
	x = self.resnet.maxpool(x)
	x = self.resnet.layer1(x)
	x = self.resnet.layer2(x)
	x = self.resnet.layer3(x)
	x = self.resnet.layer4(x)
	x = self.resnet.avgpool(x)
	x = torch.flatten(x, 1)
	x = self.dropout(x)
	x = self.resnet.fc(x)
	return x

	# Labels for classification
	LABELS = ['unknown', 'user1', 'user2', 'user3', 'user4', 'user5', 'user6', 'user7']

	# Load the model
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model = TransferLearningModel(num_classes=len(LABELS))
	model_path = "voice_recognition_final_enhanced.pth"

	try:
	model.load_state_dict(torch.load(model_path, map_location=device))
	model.to(device)
	model.eval()
	except Exception as e:
	print(f"[ERROR] Failed to load model: {e}")
	model = None

	# Extract MFCC features from audio
	def extract_features_from_file(file_path, max_pad_len=174):
	try:
	audio, sample_rate = librosa.load(file_path, sr=None, res_type='kaiser_fast')
	mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
	pad_width = max_pad_len - mfccs.shape[1]
	if pad_width > 0:
	mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
	else:
	mfccs = mfccs[:, :max_pad_len]
	return mfccs
	except Exception as e:
	print(f"[ERROR] Feature extraction failed: {e}")
	return None

	# Prediction function
	def predict(file):
	if model is None:
	return "Error: Model not loaded."

	features = extract_features_from_file(file.name)
	if features is None:
	return "Error: Could not extract features from audio."

	input_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)

	with torch.no_grad():
	outputs = model(input_tensor)
	probabilities = torch.nn.functional.softmax(outputs, dim=1)
	confidence, predicted = torch.max(probabilities, 1)

	predicted_user = LABELS[predicted.item()]
	confidence_score = confidence.item()

	if confidence_score < 0.7 or predicted_user == 'unknown':
	return f"❌ Unknown user or low confidence (Confidence: {confidence_score:.3f})"
	else:
	return f"✅ Access granted to {predicted_user} (Confidence: {confidence_score:.3f})"

	# Gradio interface
	iface = gr.Interface(
	fn=predict,
	inputs=gr.Audio(type="filepath"), # fixed deprecated 'source'
	outputs="text",
	title="🎙️ Voice Recognition",
	description=(
	"Upload an audio file (wav, mp3, etc.) containing a user's voice. "
	"The model will analyze the audio and predict the user identity if recognized. "
	"If the user is unknown or the confidence is low, access will be denied. "
	"This system supports 7 authorized users and detects unknown users for security."
	),
	article=(
	"### How to Use\n"
	"1. Click the 'Browse' button to upload an audio file.\n"
	"2. Wait for the model to process and display the prediction result.\n"
	"3. The output will show the predicted user and confidence score.\n"
	"4. If the user is unknown or confidence is below threshold, access will be denied.\n\n"
	"### Supported Users\n"
	"- user1, user2, user3, user4, user5, user6, user7\n\n"
	"### Notes\n"
	"- Ensure audio quality is good for best results.\n"
	"- Supported audio formats include wav, mp3, flac, ogg, m4a, aac.\n"
	"- The model uses MFCC features and a ResNet18-based CNN architecture.\n"
	"- For questions or issues, please refer to the project README or contact support."
	)
	)

	if __name__ == "__main__":
	iface.launch()