qe-language-rotation-ldd / hparams.yaml
era28's picture
Upload era28/qe-language-rotation-ldd checkpoint
37830a9
activations: Tanh
batch_size: 16
class_identifier: referenceless_regression_metric
dropout: 0.1
encoder_learning_rate: 1.0e-06
encoder_model: XLM-RoBERTa
final_activation: null
hidden_sizes:
- 2048
- 1024
keep_embeddings_frozen: true
layer: mix
layer_norm: false
layer_transformation: sparsemax
layerwise_decay: 0.95
learning_rate: 1.5e-05
load_pretrained_weights: true
local_files_only: false
loss: mse
nr_frozen_epochs: 0.3
optimizer: AdamW
pool: avg
pretrained_model: xlm-roberta-large
train_data:
- data/lowdiv_dir/combined_training_data.csv
validation_data:
- data/lowdiv_dir/xcomet_Czech_test.csv
- data/lowdiv_dir/xcomet_German_test.csv
- data/lowdiv_dir/xcomet_Finnish_test.csv
- data/lowdiv_dir/xcomet_Latvian_test.csv
- data/lowdiv_dir/xcomet_Turkish_test.csv
- data/lowdiv_dir/xcomet_Russian_test.csv
- data/lowdiv_dir/xcomet_Chinese_test.csv
- data/lowdiv_dir/xcomet_Estonian_test.csv
- data/lowdiv_dir/xcomet_Lithuanian_test.csv
- data/lowdiv_dir/xcomet_Gujarati_test.csv
- data/lowdiv_dir/xcomet_Kazakh_test.csv
- data/lowdiv_dir/xcomet_French_test.csv
- data/lowdiv_dir/xcomet_Pashto_test.csv
- data/lowdiv_dir/xcomet_Japanese_test.csv
- data/lowdiv_dir/xcomet_Central Khmer_test.csv
- data/lowdiv_dir/xcomet_Polish_test.csv
- data/lowdiv_dir/xcomet_Tamil_test.csv
- data/lowdiv_dir/xcomet_Hausa_test.csv
- data/lowdiv_dir/xcomet_Icelandic_test.csv
- data/lowdiv_dir/xcomet_Hindi_test.csv
- data/lowdiv_dir/xcomet_Bengali_test.csv
- data/lowdiv_dir/xcomet_Zulu_test.csv
- data/lowdiv_dir/xcomet_Xhosa_test.csv
- data/lowdiv_dir/xcomet_Ukrainian_test.csv
warmup_steps: 0