Spaces:

msmaje
/

voiceAccess

Sleeping

App Files Files Community

voiceAccess / app.py

msmaje

Creating an App.py

1bf3830 verified about 1 year ago

raw

history blame contribute delete

4.64 kB

	# app.py
	import os
	import torch
	import torch.nn as nn
	import torchaudio
	import gradio as gr
	from torch.nn import functional as F
	from torchaudio.transforms import MelSpectrogram, AmplitudeToDB

	# Constants
	SAMPLE_RATE = 16000
	N_MELS = 128
	N_FFT = 2048
	HOP_LENGTH = 512
	DURATION = 3
	MAX_AUDIO_LENGTH = SAMPLE_RATE * DURATION

	class AudioPreprocessor:
	def __init__(self, target_sr=SAMPLE_RATE, target_length=MAX_AUDIO_LENGTH):
	self.target_sr = target_sr
	self.target_length = target_length
	self.mel_spec = MelSpectrogram(
	sample_rate=target_sr,
	n_fft=N_FFT,
	hop_length=HOP_LENGTH,
	n_mels=N_MELS
	)
	self.amplitude_to_db = AmplitudeToDB()

	def process_audio(self, audio_path):
	try:
	waveform, sr = torchaudio.load(audio_path)
	if waveform.shape[0] > 1:
	waveform = torch.mean(waveform, dim=0, keepdim=True)
	if sr != self.target_sr:
	resampler = torchaudio.transforms.Resample(sr, self.target_sr)
	waveform = resampler(waveform)
	waveform = waveform / (torch.max(torch.abs(waveform)) + 1e-8)
	if waveform.shape[1] > self.target_length:
	start = (waveform.shape[1] - self.target_length) // 2
	waveform = waveform[:, start:start + self.target_length]
	else:
	pad_length = self.target_length - waveform.shape[1]
	waveform = F.pad(waveform, (0, pad_length))
	mel_spec = self.mel_spec(waveform)
	mel_db = self.amplitude_to_db(mel_spec)
	return mel_db
	except Exception as e:
	print(f"Error processing audio: {str(e)}")
	return None

	class VoiceAccessNet(nn.Module):
	def __init__(self):
	super().__init__()
	self.time_dim = (MAX_AUDIO_LENGTH // HOP_LENGTH) + 1

	self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
	self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
	self.conv3 = nn.Conv2d(64, 128, 3, padding=1)

	self.bn1 = nn.BatchNorm2d(32)
	self.bn2 = nn.BatchNorm2d(64)
	self.bn3 = nn.BatchNorm2d(128)

	self.pool = nn.MaxPool2d(2, 2)
	self.dropout = nn.Dropout(0.5)

	self.flatten_size = self._get_flatten_size()

	self.fc1 = nn.Linear(self.flatten_size, 256)
	self.fc2 = nn.Linear(256, 2)

	def _get_flatten_size(self):
	x = torch.randn(1, 1, N_MELS, (MAX_AUDIO_LENGTH // HOP_LENGTH) + 1)
	x = self.pool(F.relu(self.bn1(self.conv1(x))))
	x = self.pool(F.relu(self.bn2(self.conv2(x))))
	x = self.pool(F.relu(self.bn3(self.conv3(x))))
	return x.numel() // x.size(0)

	def forward(self, x):
	x = x.unsqueeze(1) if x.dim() == 3 else x
	x = self.pool(F.relu(self.bn1(self.conv1(x))))
	x = self.pool(F.relu(self.bn2(self.conv2(x))))
	x = self.pool(F.relu(self.bn3(self.conv3(x))))
	x = x.view(x.size(0), -1)
	x = F.relu(self.fc1(self.dropout(x)))
	return self.fc2(self.dropout(x))

	# Load the model
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = VoiceAccessNet().to(device)
	model.load_state_dict(torch.load('best_model.pth', map_location=device)['model_state_dict'])
	model.eval()

	def predict_access(audio_path):
	preprocessor = AudioPreprocessor()

	try:
	mel_spec = preprocessor.process_audio(audio_path)
	if mel_spec is None:
	return "Error processing audio", "N/A"

	mel_spec = mel_spec.unsqueeze(0).to(device)

	with torch.no_grad():
	outputs = model(mel_spec)
	probabilities = F.softmax(outputs, dim=1)
	prediction = torch.argmax(probabilities, dim=1).item()
	confidence = probabilities[0][prediction].item()

	result = "Access Granted" if prediction == 1 else "Access Denied"
	return result, f"Confidence: {confidence:.2f}"

	except Exception as e:
	return f"Error: {str(e)}", "N/A"

	# Create Gradio interface
	iface = gr.Interface(
	fn=predict_access,
	inputs=gr.Audio(type="filepath", label="Upload Voice Recording"),
	outputs=[
	gr.Text(label="Access Result"),
	gr.Text(label="Confidence Score")
	],
	title="Voice Access Control System",
	description="Upload a voice recording to verify access authorization. The system will analyze the voice and determine if access should be granted.",
	examples=[["example1.wav"], ["example2.wav"]], # Add example files if you have them
	theme="default"
	)

	iface.launch()