Spaces:

SergioMtz
/

IMDb_Sentiment_Classifier

Runtime error

App Files Files Community

IMDb_Sentiment_Classifier / app.py

SergioMtz

Update app.py

a0e9c41 over 3 years ago

raw

history blame contribute delete

3.1 kB

	import tensorflow as tf
	from tensorflow.keras.optimizers import Adam
	from tensorflow.keras.optimizers.schedules import LearningRateSchedule
	import tensorflow_text as text
	from tensorflow.train import Checkpoint
	import pandas as pd
	import numpy as np
	import gradio as gr
	from Model import Transformer

	vocab = []
	with open("vocab.txt", mode = "r", encoding = "utf-8") as file:
	for token in file:
	vocab.append(token.replace("\n", ""))

	reserved_tokens=["[START]", "[END]", "[PAD]", "[UNK]"]

	START = tf.argmax(tf.constant(reserved_tokens) == "[START]")
	END = tf.argmax(tf.constant(reserved_tokens) == "[END]")
	PAD = tf.argmax(tf.constant(reserved_tokens) == "[PAD]")
	VOCAB_SIZE = len(vocab)
	D_MODEL = 256
	NB_LAYERS = 6
	FFN_UNITS = 2048
	NB_PROJ = 8
	DROPOUT_RATE = 0.1
	MAX_LENGTH = 50

	tokenizer = text.FastBertTokenizer(vocab, support_detokenization = True)
	trimer = text.WaterfallTrimmer(max_seq_length = MAX_LENGTH)

	transformer = Transformer(vocab_size_enc = VOCAB_SIZE,
	vocab_size_dec = 1,
	d_model = D_MODEL,
	nb_layers = NB_LAYERS,
	FFN_units = FFN_UNITS,
	nb_proj = NB_PROJ,
	dropout_rate = DROPOUT_RATE)

	class CustomSchedule(LearningRateSchedule):
	def __init__(self, d_model, warmup_steps = 4000):
	super(CustomSchedule, self).__init__()

	self.d_model = tf.cast(d_model, tf.float32)
	self.warmup_steps = warmup_steps

	def __call__(self, step):
	arg1 = tf.math.rsqrt(step)
	arg2 = step * (self.warmup_steps**-1.5)

	return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

	leaning_rate = CustomSchedule(D_MODEL)

	optimizer = Adam(leaning_rate,
	beta_1=0.9,
	beta_2=0.98,
	epsilon=1e-9)

	ckpt = tf.train.Checkpoint(transformer = transformer,
	optimizer = optimizer)
	ckpt.restore("ckpt-10")
	print("Checkpoint Restaurado")

	def evaluate(sentence):
	sentence = str(sentence)
	ragged = tokenizer.tokenize([sentence])
	ragged = trimer.trim([ragged])[0]
	count = ragged.bounding_shape()[0]
	starts = tf.fill([count,1], START)
	ends = tf.fill([count,1], END)
	inputs = tf.concat([starts, ragged, ends], axis=1)
	inputs, _ = text.pad_model_inputs(inputs, max_seq_length = MAX_LENGTH + 2, pad_value = PAD)

	prediction = transformer(inputs, False)
	print(prediction)

	prediction = tf.round(prediction)
	print(prediction)
	if prediction == 0:
	return"Negative"
	else:
	return"Positive"

	app = gr.Interface(fn = evaluate, title = "IMDb Sentiment Classifier", description = "Write a sentence with a positive or negative sentiment", inputs = "text", outputs = "text")
	app.launch(share = True)