Emotion-Bert / Formatting
Rizqi's picture
Update Formatting
672738d
MAX_LEN = 70
bertInput = bert_tokenizer.encode_plus(
data.Text[id_data],
add_special_tokens = True,
padding = 'max_length',
truncation = 'longest_first',
max_length = 50,
return_attention_mask = True,
return_token_type_ids = True
)
bertInput.keys()
def convert_example_to_feature(sentence):
return bert_tokenizer.encode_plus(
sentence,
add_special_tokens = True,
padding = 'max_length',
truncation = 'longest_first',
max_length = MAX_LEN,
return_attention_mask = True,
return_token_type_ids=True
)
def map_example_to_dict(input_ids, attention_masks, token_type_ids, label):
return {
"input_ids": input_ids, # Sebagai token embedding
"token_type_ids": token_type_ids, # Sebagai segment embedding
"attention_mask": attention_masks, # Sebagai filter informasi mana yang kalkulasi oleh model
}, label
def encode(data):
input_ids_list = []
token_type_ids_list = []
attention_mask_list = []
label_list = []
for label,sentence in data.to_numpy():
bert_input = convert_example_to_feature(sentence)
input_ids_list.append(bert_input['input_ids'])
token_type_ids_list.append(bert_input['token_type_ids'])
attention_mask_list.append(bert_input['attention_mask'])
label_list.append([label])
return tf.data.Dataset.from_tensor_slices((input_ids_list, attention_mask_list, token_type_ids_list, label_list)).map(map_example_to_dict)
EPOCHS = 2
BATCH_SIZE = 64
LEARNING_RATE = 5e-5
train_encode = encode(df_train).batch(BATCH_SIZE)
test_encode = encode(df_test).batch(BATCH_SIZE)
val_encode = encode(df_val).batch(BATCH_SIZE)