Spaces:
Build error
Build error
| import os | |
| import io | |
| import PIL | |
| import torch | |
| import librosa | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from transformers import BertConfig, BertTokenizer, XLMRobertaForSequenceClassification, BertForTokenClassification | |
| from keras.models import load_model | |
| def text_clf_ori(text): | |
| vocab_file = "vocab.txt" # 词汇表 | |
| tokenizer = BertTokenizer(vocab_file) | |
| # 加载模型 | |
| config = BertConfig.from_pretrained("nanaaaa/emotion_chinese_english") | |
| model = BertForTokenClassification.from_pretrained("nanaaaa/emotion_chinese_english", config=config) | |
| inputs = tokenizer(text, return_tensors="pt") | |
| # 模型推断 | |
| outputs = model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=1) | |
| # 创建标签和概率列表 | |
| labels = ["害怕", "高兴喵", "惊喜", "伤心", "生气"] | |
| probabilities = probs.detach().cpu().numpy()[0].tolist() | |
| # 返回标签和概率列表 | |
| return {labels[i]: float(probabilities[0][i]) for i in range(len(labels))} | |
| def text_clf(text): | |
| vocab_file = "vocab.txt" # 词汇表 | |
| tokenizer = BertTokenizer(vocab_file) | |
| # 加载模型 | |
| config = BertConfig.from_pretrained("nanaaaa/emotion_chinese_english") | |
| model = XLMRobertaForSequenceClassification.from_pretrained("nanaaaa/emotion_chinese_english", config=config) | |
| inputs = tokenizer(text, return_tensors="pt") | |
| # 模型推断 | |
| outputs = model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=1) | |
| # 创建标签和概率列表 | |
| labels = ["害怕", "高兴喵", "惊喜", "伤心", "生气"] | |
| probabilities = probs.detach().cpu().numpy()[0].tolist() | |
| # 返回标签和概率列表 | |
| return {labels[i]: float(probabilities[i]) for i in range(len(labels))} | |
| def audio_clf(aud): | |
| my_model = load_model('speech_mfcc_model.h5') | |
| def normalizeVoiceLen(y, normalizedLen): | |
| nframes = len(y) | |
| y = np.reshape(y, [nframes, 1]).T | |
| # 归一化音频长度为2s,32000数据点 | |
| if (nframes < normalizedLen): | |
| res = normalizedLen - nframes | |
| res_data = np.zeros([1, res], dtype=np.float32) | |
| y = np.reshape(y, [nframes, 1]).T | |
| y = np.c_[y, res_data] | |
| else: | |
| y = y[:, 0:normalizedLen] | |
| return y[0] | |
| def getNearestLen(framelength, sr): | |
| framesize = framelength * sr | |
| # 找到与当前framesize最接近的2的正整数次方 | |
| nfftdict = {} | |
| lists = [32, 64, 128, 256, 512, 1024] | |
| for i in lists: | |
| nfftdict[i] = abs(framesize - i) | |
| print(nfftdict) | |
| sortlist = sorted(nfftdict.items(), key=lambda x: x[1]) | |
| print(sortlist) | |
| framesize = int(sortlist[0][0]) # 取最接近当前framesize的那个2的正整数次方值为新的framesize | |
| return framesize | |
| VOICE_LEN = 35000 | |
| sr, y = aud | |
| N_FFT = getNearestLen(0.5, sr) | |
| y = normalizeVoiceLen(y, VOICE_LEN) # 归一化长度 | |
| mfcc_data = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=N_FFT, hop_length=int(N_FFT / 4)) | |
| feature = np.mean(mfcc_data, axis=0) | |
| # 数据标准化 | |
| data = feature.tolist() | |
| DATA_MEAN = np.mean(feature.tolist(), axis=0) | |
| DATA_STD = np.std(feature.tolist(), axis=0) | |
| data -= DATA_MEAN | |
| data /= DATA_STD | |
| data = np.array(data) | |
| data = data.reshape((1, data.shape[0], 1)) | |
| pred = my_model.predict(data) | |
| labels1 = ["angry", "fear", "joy", "neutral", "sadness", "surprise"] | |
| probabilities1 = pred[0].tolist() | |
| return {labels1[i]: float(probabilities1[i]) for i in range(len(labels1))} | |
| def cir_clf(L, R): | |
| df_4 = pd.read_csv(r'./df_4.csv', encoding="gbk") | |
| fig, ax = plt.subplots() | |
| r = df_4["R_nor"][int(L):int(R)] | |
| theta = (2 * np.pi * df_4["Theta_nor"])[int(L):int(R)] | |
| def clf_col(x): | |
| if -1.5 * np.pi > x > -2 * np.pi: | |
| return 5 | |
| if -1.5 * np.pi < x < -1.1 * np.pi: | |
| return 2 | |
| if -1.1 * np.pi < x < -1 * np.pi: | |
| return 3 | |
| if 1.04 * np.pi > x > 1 * np.pi: | |
| return 3 | |
| if 1.1 * np.pi < x < 1.375 * np.pi: | |
| return 4 | |
| if 1.625 * np.pi > x > 1.375 * np.pi: | |
| return 1 | |
| if 1.625 * np.pi < x < 2 * np.pi: | |
| return 0 | |
| theta1 = theta.copy() | |
| colors = theta1.apply(lambda x: clf_col(x)) | |
| ax = plt.subplot(111, projection="polar") | |
| c = ax.scatter(theta, r, c=colors, cmap="hsv", alpha=0.6) | |
| fig.set_size_inches(10, 10) | |
| def fig2data(fig): | |
| import PIL.Image as Image | |
| fig.canvas.draw() | |
| w, h = fig.canvas.get_width_height() | |
| buf = np.fromstring(fig.canvas.tostring_argb(), dtype=np.uint8) | |
| buf.shape = (w, h, 4) | |
| buf = np.roll(buf, 3, axis=2) | |
| image = Image.frombytes("RGBA", (w, h), buf.tostring()) | |
| image = np.asarray(image) | |
| return image | |
| return fig2data(fig) | |
| with gr.Blocks() as demo: | |
| with gr.Tab("Flip Text"): | |
| text = gr.Textbox(label="文本哟") | |
| text_output = gr.outputs.Label(label="情感呢") | |
| text_output1 = gr.outputs.Label(label="情感呢") | |
| text_button = gr.Button("确认") | |
| text_button1 = gr.Button("确认对比") | |
| with gr.Tab("Flip Audio"): | |
| audio = gr.Audio(label="音频捏") | |
| audio_output = gr.outputs.Label(label="情感哟") | |
| audio_button = gr.Button("确认") | |
| with gr.Tab("Flip Circle"): | |
| cir_l = gr.Slider(0, 30000, step=1) | |
| cir_r = gr.Slider(0, 30000, step=1) | |
| cir_output = gr.outputs.Image(type='numpy', label="情感圈") | |
| cir_button = gr.Button("确认") | |
| text_button.click(fn=text_clf, inputs=text, outputs=text_output) | |
| text_button1.click(fn=text_clf_ori, inputs=text, outputs=text_output1) | |
| audio_button.click(fn=audio_clf, inputs=audio, outputs=audio_output) | |
| cir_button.click(fn=cir_clf, inputs=[cir_l, cir_r], outputs=cir_output) | |
| demo.launch() | |