MAX_LEN = 70 bertInput = bert_tokenizer.encode_plus( data.Text[id_data], add_special_tokens = True, padding = 'max_length', truncation = 'longest_first', max_length = 50, return_attention_mask = True, return_token_type_ids = True ) bertInput.keys() def convert_example_to_feature(sentence): return bert_tokenizer.encode_plus( sentence, add_special_tokens = True, padding = 'max_length', truncation = 'longest_first', max_length = MAX_LEN, return_attention_mask = True, return_token_type_ids=True ) def map_example_to_dict(input_ids, attention_masks, token_type_ids, label): return { "input_ids": input_ids, # Sebagai token embedding "token_type_ids": token_type_ids, # Sebagai segment embedding "attention_mask": attention_masks, # Sebagai filter informasi mana yang kalkulasi oleh model }, label def encode(data): input_ids_list = [] token_type_ids_list = [] attention_mask_list = [] label_list = [] for label,sentence in data.to_numpy(): bert_input = convert_example_to_feature(sentence) input_ids_list.append(bert_input['input_ids']) token_type_ids_list.append(bert_input['token_type_ids']) attention_mask_list.append(bert_input['attention_mask']) label_list.append([label]) return tf.data.Dataset.from_tensor_slices((input_ids_list, attention_mask_list, token_type_ids_list, label_list)).map(map_example_to_dict) EPOCHS = 2 BATCH_SIZE = 64 LEARNING_RATE = 5e-5 train_encode = encode(df_train).batch(BATCH_SIZE) test_encode = encode(df_test).batch(BATCH_SIZE) val_encode = encode(df_val).batch(BATCH_SIZE)