Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import pandas as pd | |
| import os | |
| import librosa as lr | |
| import torch | |
| import torch.nn as nn | |
| import pytorch_lightning as pl | |
| import gradio as gr | |
| from models.model import MFCC_CNN | |
| EMOTIONS = { | |
| 9 : 'Нейтральная, мужской голос', | |
| 7: 'Счастье, мужской голос', | |
| 11: 'Грусть, мужской голос', | |
| 1: 'Злость, мужской голос', | |
| 5: 'Страх, мужской голос', | |
| 3: 'Отвращение, мужской голос', | |
| 13 : 'Удивление, мужской голос', | |
| 8 : 'Нейтральная, женский голос', | |
| 6 : 'Счастье, женский голос', | |
| 10 : 'Грусть, женский голос', | |
| 0 : 'Злость, женский голос', | |
| 4 : 'Страх, женский голос', | |
| 2 : 'Отвращение, женский голос', | |
| 12 : 'Удивление, женский голос' | |
| } | |
| # LOAD AUDIO | |
| SAMPLE_RATE = 16000 | |
| DURATION = 3 | |
| # GET MFCC | |
| N_MFCC = 50 | |
| WIN_LENGTH = 2048 | |
| WINDOW = 'hann' | |
| HOP_LENGTH = 512 | |
| PATH = './chekpoint/models-epoch=97-val_loss=2.09.ckpt' | |
| ckpt = torch.load(PATH) | |
| pretrained_model = MFCC_CNN(14) | |
| pretrained_model.load_state_dict(ckpt['state_dict']) | |
| pretrained_model.eval() | |
| pretrained_model.freeze() | |
| def scaler_params(): | |
| with open('./mean_.txt', mode='r') as f: | |
| mean = f.read() | |
| mean = mean.split(',') | |
| mean_final = [] | |
| for number in mean: | |
| if number != ' ': | |
| mean_final.append(float(number)) | |
| with open('./scale_.txt', mode='r') as f: | |
| scale = f.read() | |
| scale = scale.split(',') | |
| scale_final = [] | |
| for number in scale: | |
| if number != ' ': | |
| scale_final.append(float(number)) | |
| return np.array(mean_final), np.array(scale_final) | |
| def processAudio(audio_file): | |
| audio, sr = lr.load(audio_file, | |
| duration=DURATION, | |
| sr=SAMPLE_RATE) | |
| signal = np.zeros((int(SAMPLE_RATE*3,))) | |
| signal[:len(audio)] = audio | |
| feature_set = [] | |
| mfcc = lr.feature.mfcc(y=signal, | |
| sr=sr, | |
| n_mfcc=N_MFCC, | |
| win_length=WIN_LENGTH, | |
| window=WINDOW, | |
| hop_length=HOP_LENGTH, | |
| ) | |
| feature_set = torch.tensor(mfcc, dtype=torch.float) | |
| feature_set.view(1, 1, 50, 94) | |
| feature_set = np.reshape(feature_set, newshape=(1,-1)) | |
| mean, scale = scaler_params() | |
| feature_set = (feature_set - mean)/scale | |
| feature_set = np.reshape(feature_set, newshape=(1, 1, 50, 94)) | |
| feature_set = torch.tensor(feature_set, dtype=torch.float) | |
| prediction = pretrained_model(feature_set,) | |
| prediction = torch.argmax(prediction) | |
| return EMOTIONS[prediction.item()] | |
| demo = gr.Interface( | |
| fn=processAudio, | |
| inputs=gr.Audio(type='filepath'), | |
| outputs=gr.Label(), | |
| examples=[ | |
| [os.path.join(os.path.dirname(__file__), "files/03-01-01-01-02-02-01.wav")], | |
| [os.path.join(os.path.dirname(__file__), "files/03-01-07-01-02-02-01.wav")], | |
| [os.path.join(os.path.dirname(__file__), "files/03-01-08-02-02-02-01.wav")], | |
| ], | |
| ) | |
| if __name__ == '__main__': | |
| demo.launch() |