Spaces:

iiserkbikram
/

Audio-DeepFake

Build error

Update app.py

7f1fd95 verified almost 2 years ago

1.61 kB

	import gradio as gr
	import librosa
	import numpy as np
	import torch
	from torch import Tensor
	import torch.nn as nn
	from model import Model

	model_path = 'model.pth'
	def load_data(path):
	X, fs = librosa.load(path)
	X_pad = pad(X,64600)
	x_inp = Tensor(X_pad).unsqueeze(0)
	return x_inp,fs

	def pad(x, max_len=64600):
	x_len = x.shape[0]
	if x_len >= max_len:
	return x[:max_len]
	# need to pad
	num_repeats = int(max_len / x_len)+1
	padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
	return padded_x

	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	model = Model(None, device)
	nb_params = sum([param.view(-1).size()[0] for param in model.parameters()])
	model =nn.DataParallel(model).to(device)

	model.load_state_dict(torch.load(model_path, map_location=device))
	print("Model loaded : {}".format(model_path))

	model.eval()
	prediction_dict = {0: 'Fake', 1: 'Real'}
	def Detection(audio_1):

	x_inp,fs = load_data(audio_1)
	print(x_inp.shape)
	validity_probs = model(x_inp)
	validity_probs = torch.nn.functional.softmax(validity_probs, dim=1)

	emotion = torch.argmax(validity_probs).item()
	print(emotion)
	validity = prediction_dict[emotion]
	return validity

	audio_1 = gr.Audio(type="filepath", label="Audio 1")
	# text_output = gr.Textbox(label="Prediction")
	text_output = gr.Textbox(label="Similarity Score")
	gr.Interface(
	fn=Detection,
	inputs=audio_1,
	outputs=text_output,
	title="Audio Deepfake Detection",
	description="Audio Deepfake Detection using finetuned model on for-2seconds dataset.",
	).launch()