from datasets import DatasetDict, load_from_disk from sklearn.metrics import classification_report, precision_recall_fscore_support from transformers import ( DebertaV2Config, DebertaV2Model, DebertaV2PreTrainedModel, DebertaV2TokenizerFast, Trainer, TrainingArguments, ) import argparse import logging.config import numpy as np import torch import torch.nn as nn from utils import default_logging_config, get_uniq_training_labels, show_examples logger = logging.getLogger(__name__) arg_parser = argparse.ArgumentParser(description="Train multi-task model.") arg_parser.add_argument("-A", "--accumulation-steps", help="Gradient accumulation steps.", action="store", type=int, default=8) arg_parser.add_argument("--data-only", help='Show training data info and exit.', action="store_true", default=False) arg_parser.add_argument("--data-path", help="Load training dataset from specified path.", action="store", default="./training_data") arg_parser.add_argument("-E", "--train-epochs", help="Number of epochs to train for.", action="store", type=int, default=3) arg_parser.add_argument("-V", "--eval-batch-size", help="Per device eval batch size.", action="store", type=int, default=2) arg_parser.add_argument("--from-base", help="Load a base model.", action="store", default=None, choices=[ "microsoft/deberta-v3-base", # Requires --deberta-v3 "microsoft/deberta-v3-large", # Requires --deberta-v3 # More? ]) arg_parser.add_argument("-L", "--learning-rate", help="Learning rate.", action="store", type=float, default=5e-5) arg_parser.add_argument("--mini", help='Train model using small subset of examples for pipeline testing.', action="store_true", default=False) arg_parser.add_argument("--save-path", help="Save final model to specified path.", action="store", default="./final") arg_parser.add_argument("--show", help="Show examples: //