yeomtong commited on
Commit
5b38540
·
verified ·
1 Parent(s): f23ec2d

Update trainer.py

Browse files
Files changed (1) hide show
  1. trainer.py +4 -4
trainer.py CHANGED
@@ -213,14 +213,14 @@ if __name__ == "__main__":
213
  # 🧩 Tokenizer + data loading
214
  # ------------------------------
215
  tokenizer = AutoTokenizer.from_pretrained(replace_encoder_with or bert_name)
216
- print(f"🔤 Using tokenizer: {replace_encoder_with or bert_name}")
217
 
218
- print(f"📂 Loading French CoNLL data: {conll_train_path}")
219
  train_dataset, label2id, id2label = data_processing_for_loader_from_conll(
220
  conll_path=conll_train_path,
221
  tokenizer=tokenizer,
222
- word_col_idx=3,
223
- srl_first_col_idx=11,
224
  )
225
 
226
  pad_token_id = tokenizer.pad_token_id or tokenizer.eos_token_id
 
213
  # 🧩 Tokenizer + data loading
214
  # ------------------------------
215
  tokenizer = AutoTokenizer.from_pretrained(replace_encoder_with or bert_name)
216
+ print(f"Using tokenizer: {replace_encoder_with or bert_name}")
217
 
218
+ print(f"Loading multilingual CoNLL data: {conll_train_path}")
219
  train_dataset, label2id, id2label = data_processing_for_loader_from_conll(
220
  conll_path=conll_train_path,
221
  tokenizer=tokenizer,
222
+ word_col_idx=word_col_idx,
223
+ srl_first_col_idx=srl_first_col_idx,
224
  )
225
 
226
  pad_token_id = tokenizer.pad_token_id or tokenizer.eos_token_id