# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Config file for text classification with pre-trained BERT models trainer: devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1] num_nodes: 1 max_epochs: 100 max_steps: -1 # precedence over max_epochs accumulate_grad_batches: 1 # accumulates grads every k batches gradient_clip_val: 0.0 precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP. accelerator: gpu log_every_n_steps: 1 # Interval of logging. val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it enable_checkpointing: False # Provided by exp_manager logger: False # Provided by exp_manager model: tensor_model_parallel_size: 1 # tensor model parallel size used in the LM model seed: 1234 nemo_path: null # filename to save the model and associated artifacts to .nemo file use_lm_finetune: False # whether fine tune the language model pseudo_token: '[PROMPT]' # pseudo prompt tokens tokenizer: library: 'megatron' type: 'GPT2BPETokenizer' model: null vocab_file: null merge_file: null language_model: nemo_file: null prompt_encoder: template: [3, 3, 0] dropout: 0.0 num_layers: 2 dataset: classes: ??? # The class labels, e.g. ['positive', 'neutral', 'negative'] train_ds: file_path: null batch_size: 64 shuffle: true num_samples: -1 # number of samples to be considered, -1 means all the dataset num_workers: 3 drop_last: false pin_memory: false validation_ds: file_path: null batch_size: 64 shuffle: false num_samples: -1 # number of samples to be considered, -1 means all the dataset num_workers: 3 drop_last: false pin_memory: false test_ds: file_path: null batch_size: 64 shuffle: false num_samples: -1 # number of samples to be considered, -1 means all the dataset num_workers: 3 drop_last: false pin_memory: false optim: name: adam lr: 1e-5 # optimizer arguments betas: [0.9, 0.999] weight_decay: 0.0005 # scheduler setup sched: name: WarmupAnnealing # Scheduler params warmup_steps: null warmup_ratio: 0.1 last_epoch: -1 # pytorch lightning args monitor: val_loss reduce_on_plateau: false # List of some sample queries for inference after training is done infer_samples: [ 'For example , net sales increased by 5.9 % from the first quarter , and EBITDA increased from a negative EUR 0.2 mn in the first quarter of 2009 .', '8 May 2009 - Finnish liquid handling products and diagnostic test systems maker Biohit Oyj ( HEL : BIOBV ) said today ( 8 May 2009 ) its net loss narrowed to EUR0 .1 m ( USD0 .14 m ) for the first quarter of 2009 from EUR0 .4 m for the same period of 2008 .', 'CHS Expo Freight is a major Finnish fair , exhibition and culture logistics company that provides logistics services to various events by land , air and sea .', ] exp_manager: exp_dir: null # exp_dir for your experiment, if None, defaults to "./nemo_experiments" name: "PTuneTextClassification" # The name of your model create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback