Update trainer.py
Browse files- trainer.py +4 -4
trainer.py
CHANGED
|
@@ -213,14 +213,14 @@ if __name__ == "__main__":
|
|
| 213 |
# 🧩 Tokenizer + data loading
|
| 214 |
# ------------------------------
|
| 215 |
tokenizer = AutoTokenizer.from_pretrained(replace_encoder_with or bert_name)
|
| 216 |
-
print(f"
|
| 217 |
|
| 218 |
-
print(f"
|
| 219 |
train_dataset, label2id, id2label = data_processing_for_loader_from_conll(
|
| 220 |
conll_path=conll_train_path,
|
| 221 |
tokenizer=tokenizer,
|
| 222 |
-
word_col_idx=
|
| 223 |
-
srl_first_col_idx=
|
| 224 |
)
|
| 225 |
|
| 226 |
pad_token_id = tokenizer.pad_token_id or tokenizer.eos_token_id
|
|
|
|
| 213 |
# 🧩 Tokenizer + data loading
|
| 214 |
# ------------------------------
|
| 215 |
tokenizer = AutoTokenizer.from_pretrained(replace_encoder_with or bert_name)
|
| 216 |
+
print(f"Using tokenizer: {replace_encoder_with or bert_name}")
|
| 217 |
|
| 218 |
+
print(f"Loading multilingual CoNLL data: {conll_train_path}")
|
| 219 |
train_dataset, label2id, id2label = data_processing_for_loader_from_conll(
|
| 220 |
conll_path=conll_train_path,
|
| 221 |
tokenizer=tokenizer,
|
| 222 |
+
word_col_idx=word_col_idx,
|
| 223 |
+
srl_first_col_idx=srl_first_col_idx,
|
| 224 |
)
|
| 225 |
|
| 226 |
pad_token_id = tokenizer.pad_token_id or tokenizer.eos_token_id
|