Commit
·
711ea00
1
Parent(s):
440cfd5
Update DistilBERT.py
Browse files- DistilBERT.py +2 -2
DistilBERT.py
CHANGED
|
@@ -17,7 +17,7 @@ TRAIN_BATCH_SIZE = 4
|
|
| 17 |
VALID_BATCH_SIZE = 4
|
| 18 |
EPOCHS = 1
|
| 19 |
LEARNING_RATE = 1e-05
|
| 20 |
-
|
| 21 |
|
| 22 |
# Tạo dataframe
|
| 23 |
train_df_DB = pd.read_csv('./data/train.csv')
|
|
@@ -30,7 +30,7 @@ test_df_DB['label'] = test_df_DB.iloc[:, 2:].values.tolist()
|
|
| 30 |
class BinaryLabel(Dataset):
|
| 31 |
|
| 32 |
def __init__(self, dataframe, tokenizer, max_len):
|
| 33 |
-
self.tokenizer =
|
| 34 |
self.data = dataframe
|
| 35 |
self.text = dataframe.text
|
| 36 |
self.targets = self.data.label
|
|
|
|
| 17 |
VALID_BATCH_SIZE = 4
|
| 18 |
EPOCHS = 1
|
| 19 |
LEARNING_RATE = 1e-05
|
| 20 |
+
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased', truncation=True, do_lower_case=True)
|
| 21 |
|
| 22 |
# Tạo dataframe
|
| 23 |
train_df_DB = pd.read_csv('./data/train.csv')
|
|
|
|
| 30 |
class BinaryLabel(Dataset):
|
| 31 |
|
| 32 |
def __init__(self, dataframe, tokenizer, max_len):
|
| 33 |
+
self.tokenizer = tokenizer
|
| 34 |
self.data = dataframe
|
| 35 |
self.text = dataframe.text
|
| 36 |
self.targets = self.data.label
|