Spaces:

AlexTolstenko
/

MFCC_CNN_16kHz

Runtime error

App Files Files Community

AlexTolstenko commited on May 28, 2023

Commit

75aea43

1 Parent(s): bc1ecc3

Create app.py

Browse files

Files changed (1) hide show

app.py +84 -0

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import numpy as np # linear algebra
+import pandas as pd
+import os
+import librosa as lr
+import torch
+import torch.nn as nn
+import pytorch_lightning as pl
+import gradio
+from model import MFCC_CNN
+EMOTIONS = {
+    'neutral_Male': 9,
+    'happy_Male': 7,
+    'sad_Male': 11,
+    'angry_Male': 1,
+    'fear_Male': 5,
+    'disgust_Male': 3,
+    'surprise_Male': 13,
+    'neutral_Female': 8,
+    'happy_Female': 6,
+    'sad_Female': 10,
+    'angry_Female': 0,
+    'fear_Female': 4,
+    'disgust_Female': 2,
+    'surprise_Female': 12
+    }
+# LOAD AUDIO
+SAMPLE_RATE = 16000
+DURATION = 3
+# GET MFCC
+N_MFCC = 50
+WIN_LENGTH = 2048
+WINDOW = 'hann'
+HOP_LENGTH = 512
+PATH = './chekpoint/models-epoch=97-val_loss=2.09.ckpt'
+ckpt = torch.load(PATH)
+pretrained_model = MFCC_CNN(14)
+pretrained_model.load_state_dict(ckpt['state_dict'])
+pretrained_model.eval()
+pretrained_model.freeze()
+def processAudio(audio_file):
+    audio, sr = lr.load(audio_file,
+                             duration=DURATION,
+                             sr=SAMPLE_RATE)
+    signal = np.zeros((int(SAMPLE_RATE*3,)))
+    signal[:len(audio)] = audio
+    feature_set = []
+    mfcc = lr.feature.mfcc(y=signal,
+                                    r=sr,
+                                    n_mfcc=N_MFCC,
+                                    win_length=WIN_LENGTH,
+                                    window=WINDOW,
+                                    hop_length=HOP_LENGTH,
+                                    )
+    feature_set = torch.tensor(mfcc, dtype=torch.float)
+    feature_set = feature_set.view(-1, 1, 50, 94)
+    prediction = pretrained_model(feature_set,)
+    prediction = torch.argmax(prediction)
+    return EMOTIONS[prediction.item()]
+demo = grad.Interface(
+    fn=processAudio,
+    inputs=gr.Audio(),
+    outputs=gr.Lable(),
+    examples=[
+        [os.path.join(os.path.dirname(__file__), "files/03-01-01-01-02-02-01.wav")],
+        [os.path.join(os.path.dirname(__file__), "files/03-01-07-01-02-02-01.wav")],
+        [os.path.join(os.path.dirname(__file__), "files/03-01-08-02-02-02-01.wav")],
+        ],
+    )
+if __name__ == '__main__':
+    demo.launch()