Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """demo_0113.ipynb | |
| Automatically generated by Colaboratory. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1ge4fiA7yDzLAH4vl1LN4_3NxkbLGdKhz | |
| """ | |
| pip install -qq transformers | |
| import pandas as pd | |
| # from catboost import CatBoostClassifier | |
| from sklearn.preprocessing import LabelEncoder | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from wordcloud import WordCloud | |
| from tqdm import tqdm | |
| import nltk | |
| from nltk.stem import WordNetLemmatizer | |
| from nltk.corpus import stopwords | |
| import re | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.svm import SVC,LinearSVC | |
| from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier | |
| from xgboost import XGBClassifier | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.metrics import accuracy_score | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.experimental import enable_hist_gradient_boosting | |
| from sklearn.ensemble import HistGradientBoostingClassifier | |
| from imblearn.over_sampling import SMOTE | |
| import plotly.express as px | |
| import warnings | |
| import torch | |
| torch.backends.cudnn.benchmark = True | |
| from torchvision import transforms, utils | |
| import math | |
| import random | |
| import numpy as np | |
| from torch import nn, autograd, optim | |
| import numpy as np | |
| import random | |
| warnings.filterwarnings('ignore') | |
| !pip install openai | |
| !pip install gradio | |
| import os | |
| import openai | |
| # Commented out IPython magic to ensure Python compatibility. | |
| from google.colab import drive | |
| drive.mount("/content/drive", force_remount=True) | |
| FOLDERNAME="Colab\ Notebooks/finalproject_test" | |
| # %cd drive/MyDrive/$FOLDERNAME | |
| import time | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import f1_score, accuracy_score | |
| import os,re | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.stem import PorterStemmer | |
| from wordcloud import WordCloud | |
| from tqdm import tqdm, trange | |
| import torch | |
| from torch.nn import BCEWithLogitsLoss | |
| from torch.utils.data import TensorDataset, DataLoader, RandomSampler | |
| from transformers import BertTokenizer, BertForSequenceClassification | |
| device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
| device = 'cuda' | |
| model = torch.load('mbti_model.pt') | |
| max_length = 512 | |
| threshold = 0.50 | |
| tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) | |
| # def clean_text(posts): | |
| # clean = [] | |
| # # lower case | |
| # posts = posts.lower() | |
| # # remove emali | |
| # posts = re.sub(re.compile(r'\S+@\S+'), "", posts) | |
| # # remove tag | |
| # posts = re.sub(re.compile(r'@\S+'), "", posts) | |
| # # remove ' | |
| # posts = re.sub(re.compile(r'\''), "", posts) | |
| # # posts(|||)->list | |
| # posts = posts.split('|||') | |
| # # removing links and len(posts) > 5 | |
| # posts = [s for s in posts if not re.search(r'https?:\/\/[^\s<>"]+|www\.[^\s<>"]+', s) if len(s)>5] | |
| # posts = [re.sub(r'\'', '', s) for s in posts] | |
| # return posts | |
| sentence = "Share some fun facts to break the ice" | |
| # sentence = clean_text(sentence) | |
| def data_preprocess(sentence): | |
| test_encodings = tokenizer.encode_plus(sentence,max_length=max_length,pad_to_max_length=True,truncation=True) | |
| test_input_ids = test_encodings['input_ids'] | |
| test_token_type_ids = test_encodings['token_type_ids'] | |
| test_attention_masks = test_encodings['attention_mask'] | |
| test_inputs = torch.tensor(test_input_ids).reshape(512,1).T | |
| test_masks = torch.tensor(test_attention_masks).reshape(512,1).T | |
| test_token_types = torch.tensor(test_token_type_ids).reshape(512,1).T | |
| return test_inputs, test_masks | |
| a, b = data_preprocess(sentence) | |
| def predict(test_inputs, test_masks): | |
| model.eval() | |
| logit_preds,pred_labels = [],[] | |
| with torch.no_grad(): | |
| # forward pass | |
| test_inputs = test_inputs.to(device) | |
| test_masks = test_masks.to(device) | |
| outs = model(test_inputs , token_type_ids=None, attention_mask=test_masks) | |
| b_logit_pred = outs[0] | |
| pred_label = torch.sigmoid(b_logit_pred) | |
| # print(pred_label) | |
| # converting into numpy arrays | |
| b_logit_pred = b_logit_pred.detach().cpu().numpy() | |
| pred_label = pred_label.to('cpu').numpy() | |
| # print(pred_label.tolist()) | |
| # flatten output variables | |
| # converting flattened binary values to boolean values | |
| pred_bools = [pl>threshold for pl in pred_label] | |
| # print(pred_bools) | |
| mbti = '' | |
| for i in range(4): | |
| if i == 0: | |
| mbti += 'E' if pred_bools[0][i] else 'I' | |
| if i == 1: | |
| mbti += 'S' if pred_bools[0][i] else 'N' | |
| if i == 2: | |
| mbti += 'T' if pred_bools[0][i] else 'F' | |
| if i == 3: | |
| mbti += 'J' if pred_bools[0][i] else 'P' | |
| return mbti | |
| predict(a, b) | |
| import os | |
| import openai | |
| import gradio as gr | |
| import random | |
| openai.api_key = ("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") | |
| def translation(text): | |
| response = openai.Completion.create( | |
| model="text-davinci-003", | |
| # translation = '中翻英' | |
| # text = "你好" | |
| prompt=f"中翻英{text}", | |
| max_tokens=500, | |
| top_p=1, | |
| frequency_penalty=0, | |
| presence_penalty=0 | |
| ) | |
| return response['choices'][0]['text'].strip() | |
| def predict_mbti(description): | |
| text = translation(description) | |
| text, text_masks = data_preprocess(text) | |
| mbti = predict(text, text_masks) | |
| return mbti | |
| # with gr.Blocks(css=".gradio-container {background-color: red}") as demo | |
| # demo = gr.Interface(fn=predict_mbti, #callable function | |
| # inputs=gr.inputs.Textbox(label = '讓我來分析你最近的人格><', placeholder = '個性描述、自己的故事或是曾經發過的文章'), #input format | |
| # outputs=gr.outputs.Textbox(label = '只有我最了解你,你是一位...'), | |
| # # outputs = [gr.outputs.Textbox(label = '只有我最了解你,你是一位...'), gr.outputs.Textbox(label = '專屬推薦給你的電影🍿')], | |
| # title = "AI-MBTI knows U.", | |
| # description = 'Come on. Let us predict your MBTI type !!! We will tell you what kind of movie should you watch !', | |
| # theme = 'grass', | |
| # ) #output format | |
| blocks = gr.Blocks() | |
| with blocks as demo: | |
| desc = gr.Textbox(label = '讓我來分析你最近的人格📝', placeholder= '個性描述、自己的故事或是曾經發過的文章') | |
| # verb = gr.Radio(label = '請問有聽過16型人格測驗(16pernalities)嗎 /n https://www.16personalities.com/free-personality-test', ["有", "沒有"]) | |
| survey = gr.Radio(["⭕️有聽過👂16型人格測驗(16pernalities)", "❌沒有聽過👂16型人格測驗(16pernalities)"], | |
| label = '民意調查中...') | |
| survey2 = gr.Radio(["✅曾經做過✏️16型人格測驗(16pernalities)", "❎沒有做過✏️16型人格測驗(16pernalities)"], | |
| label = '搜集民意中...') | |
| object = gr.Textbox(placeholder="object") | |
| with gr.Row(): | |
| type_btn = gr.Button("16型人格類型👨👧👦") | |
| movie_btn = gr.Button("推薦專屬電影🍿") | |
| output1 = gr.Textbox(label="👉根據這段描述,你的16型人格類型🪢會是...") | |
| output2 = gr.Textbox(label="👉由你的描述與人格特質,適合你的電影🎦有...") | |
| type_btn.click(predict_mbti, desc, output1) | |
| # movie_btn.click(None, [subject, verb, object], output2, _js="(s, v, o) => o + ' ' + v + ' ' + s") | |
| # # verb.change(lambda x: x, verb, output3, _js="(x) => [...x].reverse().join('')") | |
| # foo_bar_btn.click(None, [], subject, _js="(x) => x + ' foo'") | |
| #display the interface | |
| demo.launch(share=True, debug=True) | |