Spaces:
Runtime error
Runtime error
File size: 3,104 Bytes
d940ac5 a0e9c41 d940ac5 3d00c67 d940ac5 4049660 d940ac5 05ab62f d940ac5 a0e9c41 d940ac5 2aff23c d940ac5 ab9ad12 d940ac5 56553fa d940ac5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import LearningRateSchedule
import tensorflow_text as text
from tensorflow.train import Checkpoint
import pandas as pd
import numpy as np
import gradio as gr
from Model import Transformer
vocab = []
with open("vocab.txt", mode = "r", encoding = "utf-8") as file:
for token in file:
vocab.append(token.replace("\n", ""))
reserved_tokens=["[START]", "[END]", "[PAD]", "[UNK]"]
START = tf.argmax(tf.constant(reserved_tokens) == "[START]")
END = tf.argmax(tf.constant(reserved_tokens) == "[END]")
PAD = tf.argmax(tf.constant(reserved_tokens) == "[PAD]")
VOCAB_SIZE = len(vocab)
D_MODEL = 256
NB_LAYERS = 6
FFN_UNITS = 2048
NB_PROJ = 8
DROPOUT_RATE = 0.1
MAX_LENGTH = 50
tokenizer = text.FastBertTokenizer(vocab, support_detokenization = True)
trimer = text.WaterfallTrimmer(max_seq_length = MAX_LENGTH)
transformer = Transformer(vocab_size_enc = VOCAB_SIZE,
vocab_size_dec = 1,
d_model = D_MODEL,
nb_layers = NB_LAYERS,
FFN_units = FFN_UNITS,
nb_proj = NB_PROJ,
dropout_rate = DROPOUT_RATE)
class CustomSchedule(LearningRateSchedule):
def __init__(self, d_model, warmup_steps = 4000):
super(CustomSchedule, self).__init__()
self.d_model = tf.cast(d_model, tf.float32)
self.warmup_steps = warmup_steps
def __call__(self, step):
arg1 = tf.math.rsqrt(step)
arg2 = step * (self.warmup_steps**-1.5)
return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)
leaning_rate = CustomSchedule(D_MODEL)
optimizer = Adam(leaning_rate,
beta_1=0.9,
beta_2=0.98,
epsilon=1e-9)
ckpt = tf.train.Checkpoint(transformer = transformer,
optimizer = optimizer)
ckpt.restore("ckpt-10")
print("Checkpoint Restaurado")
def evaluate(sentence):
sentence = str(sentence)
ragged = tokenizer.tokenize([sentence])
ragged = trimer.trim([ragged])[0]
count = ragged.bounding_shape()[0]
starts = tf.fill([count,1], START)
ends = tf.fill([count,1], END)
inputs = tf.concat([starts, ragged, ends], axis=1)
inputs, _ = text.pad_model_inputs(inputs, max_seq_length = MAX_LENGTH + 2, pad_value = PAD)
prediction = transformer(inputs, False)
print(prediction)
prediction = tf.round(prediction)
print(prediction)
if prediction == 0:
return"Negative"
else:
return"Positive"
app = gr.Interface(fn = evaluate, title = "IMDb Sentiment Classifier", description = "Write a sentence with a positive or negative sentiment", inputs = "text", outputs = "text")
app.launch(share = True)
|