Spaces:
Runtime error
Runtime error
| from flask import render_template,redirect,url_for,flash,request | |
| from wtforms.validators import ValidationError | |
| from app import app | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from keras.layers import Input, Dense, LSTM, GRU, Embedding | |
| from keras.layers import Activation, Bidirectional, GlobalMaxPool1D, GlobalMaxPool2D, Dropout | |
| from keras.models import Model | |
| from keras.preprocessing import text, sequence | |
| import transformers | |
| from transformers import AutoTokenizer | |
| from tokenizers import BertWordPieceTokenizer | |
| from keras.initializers import Constant | |
| import numpy as np | |
| import re | |
| import tensorflow as tf | |
| import os | |
| def home_page(): | |
| return render_template('index.html') | |
| tokenizer = transformers.AutoTokenizer.from_pretrained("distilbert-base-uncased") | |
| fast_tokenizer = BertWordPieceTokenizer('distilbert_base_uncased/vocab.txt', lowercase=True) | |
| def fast_encode_sentence(text, tokenizer, maxlen=128): | |
| tokenizer.enable_truncation(max_length=maxlen) | |
| tokenizer.enable_padding(length=maxlen) | |
| all_ids = [] | |
| text_chunk = text | |
| encs = tokenizer.encode(text_chunk) | |
| all_ids.extend([encs.ids]) | |
| return np.array(all_ids) | |
| transformer_layer = transformers.TFDistilBertModel.from_pretrained('distilbert-base-uncased') | |
| embedding_size = 128 | |
| inp = Input(shape=(128, )) | |
| embedding_matrix=transformer_layer.weights[0].numpy() | |
| x = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1],embeddings_initializer=Constant(embedding_matrix),trainable=False)(inp) | |
| x = Bidirectional(LSTM(25, return_sequences=True,recurrent_regularizer='L1L2'))(x) | |
| x = GlobalMaxPool1D()(x) | |
| x = Dropout(0.9)(x) | |
| x = Dense(50, activation='relu',kernel_initializer='he_normal',kernel_regularizer="L1L2")(x) | |
| x = Dropout(0.9)(x) | |
| x = Dense(1, activation='sigmoid')(x) | |
| model = Model(inputs=[inp], outputs=x) | |
| model.load_weights('distilbert_model_weights.best.hdf5') | |
| def predict_on_sentence(model,text): | |
| text=text.lower() | |
| pattern = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') | |
| text = pattern.sub('', text) | |
| text = re.sub(r"i'm", "i am", text) | |
| text = re.sub(r"he's", "he is", text) | |
| text = re.sub(r"she's", "she is", text) | |
| text = re.sub(r"that's", "that is", text) | |
| text = re.sub(r"what's", "what is", text) | |
| text = re.sub(r"where's", "where is", text) | |
| text = re.sub(r"\'ll", " will", text) | |
| text = re.sub(r"\'ve", " have", text) | |
| text = re.sub(r"\'re", " are", text) | |
| text = re.sub(r"\'d", " would", text) | |
| text = re.sub(r"\'ve", " have", text) | |
| text = re.sub(r"won't", "will not", text) | |
| text = re.sub(r"don't", "do not", text) | |
| text = re.sub(r"did't", "did not", text) | |
| text = re.sub(r"can't", "can not", text) | |
| text = re.sub(r"it's", "it is", text) | |
| text = re.sub(r"couldn't", "could not", text) | |
| text = re.sub(r"have't", "have not", text) | |
| text=re.sub(r"(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", text) | |
| text = re.sub(r"[,.\"!@#$%^&*(){}?/;`~:<>+=-]", "", text) | |
| text = re.sub(r'(.)\1{3,}',r'\1', text) | |
| final_text=fast_encode_sentence(text,fast_tokenizer) | |
| prediction=model.predict(final_text) | |
| final_text=tf.squeeze(tf.round(prediction)) | |
| return final_text | |
| def predict(): | |
| int_features = request.form.get("sentence") | |
| int_features=str(int_features) | |
| final_result=predict_on_sentence(model,int_features) | |
| result='bad' | |
| if(final_result==1): | |
| result='good' | |
| return render_template('index.html', prediction_text='This is a {} comment'.format(result)) | |