File size: 4,550 Bytes
7934b29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Config file for text classification with pre-trained BERT models

trainer:
  devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1]
  num_nodes: 1
  max_epochs: 100
  max_steps: -1 # precedence over max_epochs
  accumulate_grad_batches: 1 # accumulates grads every k batches
  gradient_clip_val: 0.0
  precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP.
  accelerator: gpu
  log_every_n_steps: 1  # Interval of logging.
  val_check_interval: 1.0  # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
  resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
  num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it

  enable_checkpointing: False  # Provided by exp_manager
  logger: False  # Provided by exp_manager

model:
  nemo_path: text_classification_model.nemo # filename to save the model and associated artifacts to .nemo file
  tokenizer:
      tokenizer_name: ${model.language_model.pretrained_model_name} # or sentencepiece
      vocab_file: null # path to vocab file
      tokenizer_model: null # only used if tokenizer is sentencepiece
      special_tokens: null

  language_model:
    pretrained_model_name: bert-base-uncased
    lm_checkpoint: null
    config_file: null # json file, precedence over config
    config: null

  classifier_head:
    num_output_layers: 2
    fc_dropout: 0.1

  class_labels:
    class_labels_file : null # optional to specify a file containing the list of the labels

  dataset:
    num_classes: ??? # The number of classes. 0 < Label <num_classes.
    do_lower_case: false # true for uncased models, false for cased models, will be set automatically if pre-trained tokenizer model is used
    max_seq_length: 256 # the maximum length BERT supports is 512
    class_balancing: null # null or 'weighted_loss'. 'weighted_loss' enables the weighted class balancing of the loss, may be used for handling unbalanced classes
    use_cache: false # uses a cache to store the processed dataset, you may use it for large datasets for speed up

  train_ds:
    file_path: null
    batch_size: 64
    shuffle: true
    num_samples: -1 # number of samples to be considered, -1 means all the dataset
    num_workers: 3
    drop_last: false
    pin_memory: false

  validation_ds:
    file_path: null
    batch_size: 64
    shuffle: false
    num_samples: -1 # number of samples to be considered, -1 means all the dataset
    num_workers: 3
    drop_last: false
    pin_memory: false

  test_ds:
    file_path: null
    batch_size: 64
    shuffle: false
    num_samples: -1 # number of samples to be considered, -1 means all the dataset
    num_workers: 3
    drop_last: false
    pin_memory: false

  optim:
    name: adam
    lr: 2e-5
    # optimizer arguments
    betas: [0.9, 0.999]
    weight_decay: 0.01

    # scheduler setup
    sched:
      name: WarmupAnnealing
      # Scheduler params
      warmup_steps: null
      warmup_ratio: 0.1
      last_epoch: -1
      # pytorch lightning args
      monitor: val_loss
      reduce_on_plateau: false

  # List of some sample queries for inference after training is done
  infer_samples: [
    'by the end of no such thing the audience , like beatrice , has a watchful affection for the monster .',
    'director rob marshall went out gunning to make a great one .',
    'uneasy mishmash of styles and genres .',
  ]

exp_manager:
  exp_dir: null  # exp_dir for your experiment, if None, defaults to "./nemo_experiments"
  name: "TextClassification"  # The name of your model
  create_tensorboard_logger: True  # Whether you want exp_manger to create a tb logger
  create_checkpoint_callback: True  # Whether you want exp_manager to create a modelcheckpoint callback