Rizqi
/

Emotion-Bert

Model card Files Files and versions

Emotion-Bert / Formatting

Rizqi's picture

Update Formatting

672738d almost 4 years ago

history blame contribute delete

1.71 kB

	MAX_LEN = 70

	bertInput = bert_tokenizer.encode_plus(
	data.Text[id_data],
	add_special_tokens = True,
	padding = 'max_length',
	truncation = 'longest_first',
	max_length = 50,
	return_attention_mask = True,
	return_token_type_ids = True
	)

	bertInput.keys()

	def convert_example_to_feature(sentence):
	return bert_tokenizer.encode_plus(
	sentence,
	add_special_tokens = True,
	padding = 'max_length',
	truncation = 'longest_first',
	max_length = MAX_LEN,
	return_attention_mask = True,
	return_token_type_ids=True
	)


	def map_example_to_dict(input_ids, attention_masks, token_type_ids, label):
	return {
	"input_ids": input_ids, # Sebagai token embedding
	"token_type_ids": token_type_ids, # Sebagai segment embedding
	"attention_mask": attention_masks, # Sebagai filter informasi mana yang kalkulasi oleh model
	}, label


	def encode(data):
	input_ids_list = []
	token_type_ids_list = []
	attention_mask_list = []
	label_list = []

	for label,sentence in data.to_numpy():

	bert_input = convert_example_to_feature(sentence)
	input_ids_list.append(bert_input['input_ids'])
	token_type_ids_list.append(bert_input['token_type_ids'])
	attention_mask_list.append(bert_input['attention_mask'])
	label_list.append([label])

	return tf.data.Dataset.from_tensor_slices((input_ids_list, attention_mask_list, token_type_ids_list, label_list)).map(map_example_to_dict)


	EPOCHS = 2
	BATCH_SIZE = 64
	LEARNING_RATE = 5e-5

	train_encode = encode(df_train).batch(BATCH_SIZE)
	test_encode = encode(df_test).batch(BATCH_SIZE)
	val_encode = encode(df_val).batch(BATCH_SIZE)