| _n_gpu: 1 |
| adafactor: false |
| adam_beta1: 0.9 |
| adam_beta2: 0.999 |
| adam_epsilon: 1.0e-08 |
| cache_dir: null |
| dataloader_drop_last: false |
| dataloader_num_workers: 0 |
| dataloader_pin_memory: true |
| ddp_find_unused_parameters: null |
| debug: [] |
| deepspeed: null |
| disable_tqdm: false |
| do_eval: true |
| do_predict: false |
| do_train: true |
| eval_accumulation_steps: 1 |
| eval_dataset_list: |
| - tquad2-valid |
| - xquad.tr |
| eval_steps: 300 |
| evaluation_strategy: &id001 !!python/object/apply:transformers.trainer_utils.IntervalStrategy |
| - steps |
| fp16: false |
| fp16_backend: auto |
| fp16_full_eval: false |
| fp16_opt_level: O1 |
| freeze_embeddings: false |
| gradient_accumulation_steps: 4 |
| greater_is_better: null |
| group_by_length: false |
| ignore_data_skip: false |
| label_names: null |
| label_smoothing_factor: 0 |
| learning_rate: 0.001 |
| length_column_name: length |
| load_best_model_at_end: false |
| local_rank: -1 |
| log_level: -1 |
| log_level_replica: -1 |
| log_on_each_node: true |
| logging_dir: null |
| logging_first_step: false |
| logging_steps: 500 |
| logging_strategy: *id001 |
| lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType |
| - linear |
| max_grad_norm: 1.0 |
| max_source_length: 512 |
| max_steps: -1 |
| max_target_length: 64 |
| metric_for_best_model: null |
| model_name_or_path: google/mt5-small |
| model_type: mt5 |
| mp_parameters: '' |
| mt5_qg_format: highlight |
| mt5_task_list: |
| - qa |
| - qg |
| - ans_ext |
| neptune_api_token: null |
| neptune_project: null |
| neptune_run: null |
| no_cuda: false |
| num_train_epochs: 15 |
| output_dir: runs/mt5-small/3task/adamw-1e3-15ep-highlight-tquad2train |
| overwrite_output_dir: false |
| past_index: -1 |
| per_device_eval_batch_size: 64 |
| per_device_train_batch_size: 64 |
| per_gpu_eval_batch_size: null |
| per_gpu_train_batch_size: null |
| prediction_loss_only: false |
| prepare_data: true |
| push_to_hub: false |
| push_to_hub_model_id: adamw-1e3-15ep-highlight-tquad2train |
| push_to_hub_organization: null |
| push_to_hub_token: null |
| remove_unused_columns: false |
| report_to: |
| - wandb |
| - neptune |
| resume_from_checkpoint: null |
| run_name: turque-mt5small-adamw-1e3-15ep-tquad2train |
| save_on_each_node: false |
| save_steps: 500 |
| save_strategy: *id001 |
| save_total_limit: 1 |
| seed: 42 |
| sharded_ddp: [] |
| skip_memory_metrics: true |
| tokenizer_path: tokenizers/mt5-small |
| tpu_metrics_debug: false |
| tpu_num_cores: null |
| train_dataset_list: |
| - tquad2-train |
| train_file_path: data/train_data.pt |
| use_legacy_prediction_loop: false |
| valid_dataset_list: |
| - tquad2-valid |
| valid_file_path: data/valid_data.pt |
| wandb_id: null |
| wandb_project: turkish-qa-qg |
| warmup_ratio: 0.0 |
| warmup_steps: 0 |
| weight_decay: 0.0 |
|
|