File size: 3,104 Bytes
d940ac5
a0e9c41
 
d940ac5
 
 
 
3d00c67
d940ac5
 
 
 
 
 
 
4049660
 
 
 
 
d940ac5
 
 
 
 
 
05ab62f
 
 
 
d940ac5
 
 
 
 
 
 
 
a0e9c41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d940ac5
 
 
 
2aff23c
d940ac5
 
 
 
 
 
 
 
 
ab9ad12
d940ac5
 
56553fa
d940ac5
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import tensorflow as tf 
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import LearningRateSchedule
import tensorflow_text as text
from tensorflow.train import Checkpoint
import pandas as pd
import numpy as np
import gradio as gr
from Model import Transformer

vocab = []
with open("vocab.txt", mode = "r", encoding = "utf-8") as file:
    for token in file:
        vocab.append(token.replace("\n", ""))
        
reserved_tokens=["[START]", "[END]", "[PAD]", "[UNK]"]
        
START = tf.argmax(tf.constant(reserved_tokens) == "[START]")
END = tf.argmax(tf.constant(reserved_tokens) == "[END]")
PAD = tf.argmax(tf.constant(reserved_tokens) == "[PAD]")        
VOCAB_SIZE = len(vocab)
D_MODEL = 256
NB_LAYERS = 6
FFN_UNITS = 2048
NB_PROJ = 8
DROPOUT_RATE = 0.1
MAX_LENGTH = 50

tokenizer = text.FastBertTokenizer(vocab, support_detokenization = True)
trimer = text.WaterfallTrimmer(max_seq_length = MAX_LENGTH)

transformer = Transformer(vocab_size_enc = VOCAB_SIZE, 
                          vocab_size_dec = 1,
                          d_model = D_MODEL,
                          nb_layers = NB_LAYERS,
                          FFN_units = FFN_UNITS,
                          nb_proj = NB_PROJ,
                          dropout_rate = DROPOUT_RATE)

class CustomSchedule(LearningRateSchedule):
    def __init__(self, d_model, warmup_steps = 4000):
        super(CustomSchedule, self).__init__()
        
        self.d_model = tf.cast(d_model, tf.float32)
        self.warmup_steps = warmup_steps
    
    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps**-1.5)
        
        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

leaning_rate = CustomSchedule(D_MODEL)

optimizer = Adam(leaning_rate,
                 beta_1=0.9,
                 beta_2=0.98,
                 epsilon=1e-9)
                                                    
ckpt = tf.train.Checkpoint(transformer = transformer,
                           optimizer = optimizer)  
ckpt.restore("ckpt-10")  
print("Checkpoint Restaurado")  

def evaluate(sentence):
    sentence = str(sentence)
    ragged = tokenizer.tokenize([sentence])
    ragged = trimer.trim([ragged])[0]
    count = ragged.bounding_shape()[0]
    starts = tf.fill([count,1], START)
    ends = tf.fill([count,1], END)
    inputs = tf.concat([starts, ragged, ends], axis=1)
    inputs, _ = text.pad_model_inputs(inputs, max_seq_length = MAX_LENGTH + 2, pad_value = PAD)
    
    prediction = transformer(inputs, False)
    print(prediction)
    
    prediction = tf.round(prediction)
    print(prediction)
    if prediction == 0:
        return"Negative"
    else:
        return"Positive"
        
app = gr.Interface(fn = evaluate, title = "IMDb Sentiment Classifier", description = "Write a sentence with a positive or negative sentiment", inputs = "text", outputs = "text")   
app.launch(share = True)