| | import argparse |
| | import sys |
| | import os |
| | import numpy as np |
| | import torch |
| | from torch import nn |
| | from torch import Tensor |
| | import yaml |
| | from model import RawNet |
| | from torch.nn import functional as F |
| | import librosa |
| | import json |
| | from datetime import datetime |
| |
|
| | def pad(x, max_len=96000): |
| | x_len = x.shape[0] |
| | if x_len >= max_len: |
| | return x[:max_len] |
| | |
| | num_repeats = int(max_len / x_len)+1 |
| | padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0] |
| | return padded_x |
| |
|
| | def load_sample(sample_path, max_len = 96000): |
| | |
| | y_list = [] |
| | y, sr = librosa.load(sample_path, sr=None) |
| | |
| | if sr != 24000: |
| | y = librosa.resample(y, orig_sr = sr, target_sr = 24000) |
| | |
| | if(len(y) <= 96000): |
| | return [Tensor(pad(y, max_len))] |
| | |
| | for i in range(int(len(y)/96000)): |
| | if (i+1) == range(int(len(y)/96000)): |
| | y_seg = y[i*96000 : ] |
| | else: |
| | y_seg = y[i*96000 : (i+1)*96000] |
| | |
| | y_pad = pad(y_seg, max_len) |
| | y_inp = Tensor(y_pad) |
| | |
| | y_list.append(y_inp) |
| | |
| | return y_list |
| | |
| | |
| | |
| | with open(output_path, 'w') as json_w: |
| | json.dump(json_text, json_w) |
| | |
| |
|
| | if __name__ == '__main__': |
| | parser = argparse.ArgumentParser() |
| | parser.add_argument('--input_path', type=str, help='This path should be an external path point to an audio file') |
| | parser.add_argument('--model_path', type=str, help='This path should be an external path point to an audio file') |
| | args = parser.parse_args() |
| |
|
| | input_path = args.input_path |
| | model_path = args.model_path |
| |
|
| | |
| | dir_yaml = 'model_config_RawNet.yaml' |
| | with open(dir_yaml, 'r') as f_yaml: |
| | parser1 = yaml.safe_load(f_yaml) |
| | |
| | |
| | device = 'cuda' if torch.cuda.is_available() else 'cpu' |
| | print('Device: {}'.format(device)) |
| | |
| | |
| | model = RawNet(parser1['model'], device) |
| | model =(model).to(device) |
| | |
| | model.load_state_dict(torch.load(model_path, map_location=device)) |
| | print('Model loaded : {}'.format(model_path)) |
| | |
| | model.eval() |
| | |
| | out_list_multi = [] |
| | out_list_binary = [] |
| | for m_batch in load_sample(input_path): |
| | m_batch = m_batch.to(device=device, dtype=torch.float).unsqueeze(0) |
| | logits, multi_logits = model(m_batch) |
| | |
| | probs = F.softmax(logits, dim=-1) |
| | probs_multi = F.softmax(multi_logits, dim=-1) |
| | |
| | |
| | out_list_multi.append(probs_multi.tolist()[0]) |
| | out_list_binary.append(probs.tolist()[0]) |
| |
|
| | result_multi = np.average(out_list_multi, axis=0).tolist() |
| | result_binary = np.average(out_list_binary, axis=0).tolist() |
| |
|
| | print('Multi classification result : gt:{}, wavegrad:{}, diffwave:{}, parallel wave gan:{}, wavernn:{}, wavenet:{}, melgan:{}'.format(result_multi[0], result_multi[1], result_multi[2], result_multi[3], result_multi[4], result_multi[5], result_multi[6])) |
| | print('Binary classification result : fake:{}, real:{}'.format(result_binary[0], result_binary[1])) |