import argparse import ast import logging import os import random import re from enum import Enum import torch import torch.nn as nn from stanza.models.common import loss from stanza.models.common import utils from stanza.models.pos.vocab import CharVocab import stanza.models.classifiers.data as data from stanza.models.classifiers.trainer import Trainer from stanza.models.classifiers.utils import WVType, ExtraVectors, ModelType from stanza.models.common.peft_config import add_peft_args, resolve_peft_args from stanza.utils.confusion import format_confusion, confusion_to_accuracy, confusion_to_macro_f1 class Loss(Enum): CROSS = 1 WEIGHTED_CROSS = 2 LOG_CROSS = 3 FOCAL = 4 class DevScoring(Enum): ACCURACY = 'ACC' WEIGHTED_F1 = 'WF' logger = logging.getLogger('stanza') tlogger = logging.getLogger('stanza.classifiers.trainer') logging.getLogger('elmoformanylangs').setLevel(logging.WARNING) DEFAULT_TRAIN='data/sentiment/en_sstplus.train.txt' DEFAULT_DEV='data/sentiment/en_sst3roots.dev.txt' DEFAULT_TEST='data/sentiment/en_sst3roots.test.txt' """A script for training and testing classifier models, especially on the SST. If you run the script with no arguments, it will start trying to train a sentiment model. python3 -m stanza.models.classifier This requires the sentiment dataset to be in an `extern_data` directory, such as by symlinking it from somewhere else. The default model is a CNN where the word vectors are first mapped to channels with filters of a few different widths, those channels are maxpooled over the entire sentence, and then the resulting pools have fully connected layers until they reach the number of classes in the training data. You can see the defaults in the options below. https://arxiv.org/abs/1408.5882 (Currently the CNN is the only sentence classifier implemented.) To train with a more complicated CNN arch: nohup python3 -u -m stanza.models.classifier --max_epochs 400 --filter_channels 1000 --fc_shapes 400,100 > FC41.out 2>&1 & You can train models with word vectors other than the default word2vec. For example: nohup python3 -u -m stanza.models.classifier --wordvec_type google --wordvec_dir extern_data/google --max_epochs 200 --filter_channels 1000 --fc_shapes 200,100 --base_name FC21_google > FC21_google.out 2>&1 & A model trained on the 5 class dataset can be tested on the 2 class dataset with a command line like this: python3 -u -m stanza.models.classifier --no_train --load_name saved_models/classifier/sst_en_ewt_FS_3_4_5_C_1000_FC_400_100_classifier.E0165-ACC41.87.pt --test_file data/sentiment/en_sst2roots.test.txt --test_remap_labels "{0:0, 1:0, 3:1, 4:1}" python3 -u -m stanza.models.classifier --wordvec_type google --wordvec_dir extern_data/google --no_train --load_name saved_models/classifier/FC21_google_en_ewt_FS_3_4_5_C_1000_FC_200_100_classifier.E0189-ACC45.87.pt --test_file data/sentiment/en_sst2roots.test.txt --test_remap_labels "{0:0, 1:0, 3:1, 4:1}" A model trained on the 3 class dataset can be tested on the 2 class dataset with a command line like this: python3 -u -m stanza.models.classifier --wordvec_type google --wordvec_dir extern_data/google --no_train --load_name saved_models/classifier/FC21_3C_google_en_ewt_FS_3_4_5_C_1000_FC_200_100_classifier.E0101-ACC68.94.pt --test_file data/sentiment/en_sst2roots.test.txt --test_remap_labels "{0:0, 2:1}" To train models on combined 3 class datasets: nohup python3 -u -m stanza.models.classifier --max_epochs 400 --filter_channels 1000 --fc_shapes 400,100 --base_name FC41_3class --extra_wordvec_method CONCAT --extra_wordvec_dim 200 --train_file data/sentiment/en_sstplus.train.txt --dev_file data/sentiment/en_sst3roots.dev.txt --test_file data/sentiment/en_sst3roots.test.txt > FC41_3class.out 2>&1 & This tests that model: python3 -u -m stanza.models.classifier --no_train --load_name en_sstplus.pt --test_file data/sentiment/en_sst3roots.test.txt Here is an example for training a model in a different language: nohup python3 -u -m stanza.models.classifier --max_epochs 400 --filter_channels 1000 --fc_shapes 400,100 --base_name FC41_german --train_file data/sentiment/de_sb10k.train.txt --dev_file data/sentiment/de_sb10k.dev.txt --test_file data/sentiment/de_sb10k.test.txt --shorthand de_sb10k --min_train_len 3 --extra_wordvec_method CONCAT --extra_wordvec_dim 100 > de_sb10k.out 2>&1 & This uses more data, although that wound up being worse for the German model: nohup python3 -u -m stanza.models.classifier --max_epochs 400 --filter_channels 1000 --fc_shapes 400,100 --base_name FC41_german --train_file data/sentiment/de_sb10k.train.txt,data/sentiment/de_scare.train.txt,data/sentiment/de_usage.train.txt --dev_file data/sentiment/de_sb10k.dev.txt --test_file data/sentiment/de_sb10k.test.txt --shorthand de_sb10k --min_train_len 3 --extra_wordvec_method CONCAT --extra_wordvec_dim 100 > de_sb10k.out 2>&1 & nohup python3 -u -m stanza.models.classifier --max_epochs 400 --filter_channels 1000 --fc_shapes 400,100 --base_name FC41_chinese --train_file data/sentiment/zh_ren.train.txt --dev_file data/sentiment/zh_ren.dev.txt --test_file data/sentiment/zh_ren.test.txt --shorthand zh_ren --wordvec_type fasttext --extra_wordvec_method SUM --wordvec_pretrain_file ../stanza_resources/zh-hans/pretrain/gsdsimp.pt > zh_ren.out 2>&1 & nohup python3 -u -m stanza.models.classifier --max_epochs 400 --filter_channels 1000 --fc_shapes 400,100 --save_name vi_vsfc.pt --train_file data/sentiment/vi_vsfc.train.json --dev_file data/sentiment/vi_vsfc.dev.json --test_file data/sentiment/vi_vsfc.test.json --shorthand vi_vsfc --wordvec_pretrain_file ../stanza_resources/vi/pretrain/vtb.pt --wordvec_type word2vec --extra_wordvec_method SUM --dev_eval_scoring WEIGHTED_F1 > vi_vsfc.out 2>&1 & python3 -u -m stanza.models.classifier --no_train --test_file extern_data/sentiment/vietnamese/_UIT-VSFC/test.txt --shorthand vi_vsfc --wordvec_pretrain_file ../stanza_resources/vi/pretrain/vtb.pt --wordvec_type word2vec --load_name vi_vsfc.pt """ def convert_fc_shapes(arg): """ Returns a tuple of sizes to use in FC layers. For examples, converts "100" -> (100,) "100,200" -> (100,200) """ arg = arg.strip() if not arg: return () arg = ast.literal_eval(arg) if isinstance(arg, int): return (arg,) if isinstance(arg, tuple): return arg return tuple(arg) # For the most part, these values are for the constituency parser. # Only the WD for adadelta is originally for sentiment # Also LR for adadelta and madgrad # madgrad learning rate experiment on sstplus # note that the hyperparameters are not cross-validated in tandem, so # later changes may make some earlier experiments slightly out of date # LR # 0.01 failed to converge # 0.004 failed to converge # 0.003 0.5572 # 0.002 failed to converge # 0.001 0.6857 # 0.0008 0.6799 # 0.0005 0.6849 # 0.00025 0.6749 # 0.0001 0.6746 # 0.00001 0.6536 # 0.000001 0.6267 # LR 0.001 produced the best model, but it does occasionally fail to # converge to a working model, so we set the default to 0.0005 instead DEFAULT_LEARNING_RATES = { "adamw": 0.0002, "adadelta": 1.0, "sgd": 0.001, "adabelief": 0.00005, "madgrad": 0.0005, "sgd": 0.001 } DEFAULT_LEARNING_EPS = { "adabelief": 1e-12, "adadelta": 1e-6, "adamw": 1e-8 } DEFAULT_LEARNING_RHO = 0.9 DEFAULT_MOMENTUM = { "madgrad": 0.9, "sgd": 0.9 } DEFAULT_WEIGHT_DECAY = { "adamw": 0.05, "adadelta": 0.0001, "sgd": 0.01, "adabelief": 1.2e-6, "madgrad": 2e-6 } def build_argparse(): """ Build the argparse for the classifier. Refactored so that other utility scripts can use the same parser if needed. """ parser = argparse.ArgumentParser() parser.add_argument('--train', dest='train', default=True, action='store_true', help='Train the model (default)') parser.add_argument('--no_train', dest='train', action='store_false', help="Don't train the model") parser.add_argument('--shorthand', type=str, default='en_ewt', help="Treebank shorthand, eg 'en' for English") parser.add_argument('--load_name', type=str, default=None, help='Name for loading an existing model') parser.add_argument('--save_dir', type=str, default='saved_models/classifier', help='Root dir for saving models.') parser.add_argument('--save_name', type=str, default="{shorthand}_{embedding}_{bert_finetuning}_{classifier_type}_classifier.pt", help='Name for saving the model') parser.add_argument('--checkpoint_save_name', type=str, default=None, help="File name to save the most recent checkpoint") parser.add_argument('--no_checkpoint', dest='checkpoint', action='store_false', help="Don't save checkpoints") parser.add_argument('--save_intermediate_models', default=False, action='store_true', help='Save all intermediate models - this can be a lot!') parser.add_argument('--train_file', type=str, default=DEFAULT_TRAIN, help='Input file(s) to train a model from. Each line is an example. Should go