# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Token Classification tasks (for example, Named Entity Recognition) with pretrained BERT-like models

pretrained_model: null # pretrained TokenClassification model from list_available_models() or path to a .nemo file,
# for example: ner_en_bert or your_model.nemo
trainer:
  devices: 1 # the number of gpus, 0 for CPU
  num_nodes: 1
  max_epochs: 5
  max_steps: -1 # precedence over max_epochs
  accumulate_grad_batches: 1 # accumulates grads every k batches
  gradient_clip_val: 0.0
  precision: 16 # Should be set to 16 for O1 and O2, default is 16 as PT ignores it when am_level is O0
  accelerator: gpu
  enable_checkpointing: False  # Provided by exp_manager
  logger: False  # Provided by exp_manager
  log_every_n_steps: 1  # Interval of logging.
  val_check_interval: 1.0  # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
  resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.

exp_manager:
  exp_dir: null  # exp_dir for your experiment, if None, defaults to "./nemo_experiments"
  name: token_classification_model  # The name of your model
  create_tensorboard_logger: true  # Whether you want exp_manager to create a tb logger
  create_checkpoint_callback: true  # Whether you want exp_manager to create a model checkpoint callback

model:
  label_ids: null # will be filled during training
  class_labels:
    class_labels_file: label_ids.csv # will be generated during training and saved in .nemo file
  dataset:
    data_dir: ??? # /path/to/data
    class_balancing: null # choose from [null, weighted_loss]. Weighted_loss enables the weighted class balancing of the loss, may be used for handling unbalanced classes
    max_seq_length: 128
    pad_label: 'O'
    ignore_extra_tokens: false
    ignore_start_end: false
    use_cache: false
    # shared among dataloaders
    num_workers:  2
    pin_memory: false
    drop_last: false

  train_ds:
    text_file: text_train.txt
    labels_file: labels_train.txt
    shuffle: true
    num_samples: -1
    batch_size: 64

  validation_ds:
    text_file: text_dev.txt
    labels_file: labels_dev.txt
    shuffle: false
    num_samples: -1
    batch_size: 64

  test_ds:
    text_file: text_dev.txt
    labels_file: labels_dev.txt
    shuffle: false
    num_samples: -1
    batch_size: 64

  tokenizer:
    tokenizer_name: ${model.language_model.pretrained_model_name} # or sentencepiece
    vocab_file: null # path to vocab file
    tokenizer_model: null # only used if tokenizer is sentencepiece
    special_tokens: null

  language_model:
    pretrained_model_name: bert-base-uncased
    lm_checkpoint: null
    config_file: null # json file, precedence over config
    config: null


  head:
    num_fc_layers: 2
    fc_dropout: 0.5
    activation: 'relu'
    use_transformer_init: True

  optim:
    name: adam
    lr: 5e-5
    weight_decay: 0.00

    sched:
      name: WarmupAnnealing
      # Scheduler params
      warmup_steps: null
      warmup_ratio: 0.1
      last_epoch: -1

      # pytorch lightning args
      monitor: val_loss
      reduce_on_plateau: false

hydra:
  run:
    dir: .
  job_logging:
    root:
      handlers: null