Upload model checkpoint
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .DS_Store +0 -0
- .gitattributes +13 -0
- mic/.DS_Store +0 -0
- mic/flan-t5-base_Q_A_TARGET_rot/.DS_Store +0 -0
- mic/flan-t5-base_Q_A_TARGET_rot/format_string.txt +1 -0
- mic/flan-t5-base_Q_A_TARGET_rot/log.txt +62 -0
- mic/flan-t5-base_Q_A_TARGET_rot/model/config.json +59 -0
- mic/flan-t5-base_Q_A_TARGET_rot/model/pytorch_model.bin +3 -0
- mic/flan-t5-base_Q_A_TARGET_rot/model/special_tokens_map.json +1 -0
- mic/flan-t5-base_Q_A_TARGET_rot/model/tokenizer.json +0 -0
- mic/flan-t5-base_Q_A_TARGET_rot/model/tokenizer_config.json +1 -0
- mic/flan-t5-base_Q_A_TARGET_rot/model/training_args.bin +3 -0
- mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv +0 -0
- mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1_results.json +16 -0
- mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1.csv +0 -0
- mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1_results.json +16 -0
- mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1.csv +0 -0
- mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1_results.json +16 -0
- mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_10.json +3 -0
- mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_5.json +3 -0
- mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_10.json +3 -0
- mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_5.json +3 -0
- mic/flan-t5-base_Q_A_TARGET_rot/tmp/dev.csv +0 -0
- mic/flan-t5-base_Q_A_TARGET_rot/tmp/test.csv +0 -0
- mic/flan-t5-base_Q_A_TARGET_rot/tmp/train.csv +3 -0
- mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_d-PM_5.json +3 -0
- mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_soft_5.json +3 -0
- mic/flan-t5-base_Q_A_TARGET_rot/train_sample_5.json +3 -0
- mic/flan-t5-base_Q_A_TARGET_rot/training_args.bin +3 -0
- mic/t5-small_Q_A_TARGET_rot/.DS_Store +0 -0
- mic/t5-small_Q_A_TARGET_rot/format_string.txt +1 -0
- mic/t5-small_Q_A_TARGET_rot/log.txt +62 -0
- mic/t5-small_Q_A_TARGET_rot/model/config.json +57 -0
- mic/t5-small_Q_A_TARGET_rot/model/pytorch_model.bin +3 -0
- mic/t5-small_Q_A_TARGET_rot/model/special_tokens_map.json +1 -0
- mic/t5-small_Q_A_TARGET_rot/model/tokenizer.json +0 -0
- mic/t5-small_Q_A_TARGET_rot/model/tokenizer_config.json +1 -0
- mic/t5-small_Q_A_TARGET_rot/model/training_args.bin +3 -0
- mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv +0 -0
- mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1_results.json +16 -0
- mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1.csv +0 -0
- mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1_results.json +16 -0
- mic/t5-small_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1.csv +0 -0
- mic/t5-small_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1_results.json +16 -0
- mic/t5-small_Q_A_TARGET_rot/tmp/dev.csv +0 -0
- mic/t5-small_Q_A_TARGET_rot/tmp/test.csv +0 -0
- mic/t5-small_Q_A_TARGET_rot/tmp/test_train.csv +0 -0
- mic/t5-small_Q_A_TARGET_rot/tmp/train.csv +0 -0
- mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_10.json +3 -0
- mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_16.json +3 -0
.DS_Store
ADDED
|
Binary file (10.2 kB). View file
|
|
|
.gitattributes
CHANGED
|
@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_10.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_5.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_10.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_5.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
mic/flan-t5-base_Q_A_TARGET_rot/tmp/train.csv filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_d-PM_5.json filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_soft_5.json filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
mic/flan-t5-base_Q_A_TARGET_rot/train_sample_5.json filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_10.json filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_16.json filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_5.json filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
mic/t5-small_Q_A_TARGET_rot/tmp/update_mle_10.json filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
mic/t5-small_Q_A_TARGET_rot/tmp/update_mle_5.json filter=lfs diff=lfs merge=lfs -text
|
mic/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
mic/flan-t5-base_Q_A_TARGET_rot/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
mic/flan-t5-base_Q_A_TARGET_rot/format_string.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Q [answ] A [rot] ~ rot
|
mic/flan-t5-base_Q_A_TARGET_rot/log.txt
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"loss": 2.6849, "learning_rate": 2.9472573839662445e-05, "epoch": 0.09, "step": 500}
|
| 2 |
+
{"loss": 1.8348, "learning_rate": 2.8945147679324896e-05, "epoch": 0.18, "step": 1000}
|
| 3 |
+
{"loss": 1.7561, "learning_rate": 2.841772151898734e-05, "epoch": 0.26, "step": 1500}
|
| 4 |
+
{"loss": 1.7158, "learning_rate": 2.7890295358649792e-05, "epoch": 0.35, "step": 2000}
|
| 5 |
+
{"loss": 1.6876, "learning_rate": 2.7362869198312236e-05, "epoch": 0.44, "step": 2500}
|
| 6 |
+
{"loss": 1.6535, "learning_rate": 2.6835443037974684e-05, "epoch": 0.53, "step": 3000}
|
| 7 |
+
{"loss": 1.6553, "learning_rate": 2.6308016877637132e-05, "epoch": 0.62, "step": 3500}
|
| 8 |
+
{"loss": 1.6279, "learning_rate": 2.578059071729958e-05, "epoch": 0.7, "step": 4000}
|
| 9 |
+
{"loss": 1.6317, "learning_rate": 2.5253164556962027e-05, "epoch": 0.79, "step": 4500}
|
| 10 |
+
{"loss": 1.616, "learning_rate": 2.4725738396624472e-05, "epoch": 0.88, "step": 5000}
|
| 11 |
+
{"loss": 1.5978, "learning_rate": 2.419831223628692e-05, "epoch": 0.97, "step": 5500}
|
| 12 |
+
{"eval_loss": 1.4766592979431152, "eval_runtime": 37.3341, "eval_samples_per_second": 306.047, "eval_steps_per_second": 19.151, "epoch": 1.0, "step": 5688}
|
| 13 |
+
{"loss": 1.5798, "learning_rate": 2.3670886075949367e-05, "epoch": 1.05, "step": 6000}
|
| 14 |
+
{"loss": 1.5592, "learning_rate": 2.3143459915611815e-05, "epoch": 1.14, "step": 6500}
|
| 15 |
+
{"loss": 1.5583, "learning_rate": 2.2616033755274263e-05, "epoch": 1.23, "step": 7000}
|
| 16 |
+
{"loss": 1.5656, "learning_rate": 2.208860759493671e-05, "epoch": 1.32, "step": 7500}
|
| 17 |
+
{"loss": 1.5566, "learning_rate": 2.1561181434599155e-05, "epoch": 1.41, "step": 8000}
|
| 18 |
+
{"loss": 1.5281, "learning_rate": 2.1033755274261603e-05, "epoch": 1.49, "step": 8500}
|
| 19 |
+
{"loss": 1.5308, "learning_rate": 2.050632911392405e-05, "epoch": 1.58, "step": 9000}
|
| 20 |
+
{"loss": 1.5254, "learning_rate": 1.99789029535865e-05, "epoch": 1.67, "step": 9500}
|
| 21 |
+
{"loss": 1.5199, "learning_rate": 1.9451476793248946e-05, "epoch": 1.76, "step": 10000}
|
| 22 |
+
{"loss": 1.5313, "learning_rate": 1.892405063291139e-05, "epoch": 1.85, "step": 10500}
|
| 23 |
+
{"loss": 1.5262, "learning_rate": 1.8396624472573842e-05, "epoch": 1.93, "step": 11000}
|
| 24 |
+
{"eval_loss": 1.4375780820846558, "eval_runtime": 37.255, "eval_samples_per_second": 306.697, "eval_steps_per_second": 19.192, "epoch": 2.0, "step": 11376}
|
| 25 |
+
{"loss": 1.513, "learning_rate": 1.7869198312236286e-05, "epoch": 2.02, "step": 11500}
|
| 26 |
+
{"loss": 1.4815, "learning_rate": 1.7341772151898738e-05, "epoch": 2.11, "step": 12000}
|
| 27 |
+
{"loss": 1.4919, "learning_rate": 1.6814345991561182e-05, "epoch": 2.2, "step": 12500}
|
| 28 |
+
{"loss": 1.4872, "learning_rate": 1.6286919831223626e-05, "epoch": 2.29, "step": 13000}
|
| 29 |
+
{"loss": 1.4861, "learning_rate": 1.5759493670886078e-05, "epoch": 2.37, "step": 13500}
|
| 30 |
+
{"loss": 1.4832, "learning_rate": 1.5232067510548524e-05, "epoch": 2.46, "step": 14000}
|
| 31 |
+
{"loss": 1.458, "learning_rate": 1.470464135021097e-05, "epoch": 2.55, "step": 14500}
|
| 32 |
+
{"loss": 1.4824, "learning_rate": 1.4177215189873418e-05, "epoch": 2.64, "step": 15000}
|
| 33 |
+
{"loss": 1.4965, "learning_rate": 1.3649789029535865e-05, "epoch": 2.73, "step": 15500}
|
| 34 |
+
{"loss": 1.472, "learning_rate": 1.3122362869198313e-05, "epoch": 2.81, "step": 16000}
|
| 35 |
+
{"loss": 1.4915, "learning_rate": 1.259493670886076e-05, "epoch": 2.9, "step": 16500}
|
| 36 |
+
{"loss": 1.4871, "learning_rate": 1.2067510548523207e-05, "epoch": 2.99, "step": 17000}
|
| 37 |
+
{"eval_loss": 1.4210575819015503, "eval_runtime": 37.3541, "eval_samples_per_second": 305.883, "eval_steps_per_second": 19.141, "epoch": 3.0, "step": 17064}
|
| 38 |
+
{"loss": 1.4511, "learning_rate": 1.1540084388185655e-05, "epoch": 3.08, "step": 17500}
|
| 39 |
+
{"loss": 1.4494, "learning_rate": 1.1012658227848103e-05, "epoch": 3.16, "step": 18000}
|
| 40 |
+
{"loss": 1.4517, "learning_rate": 1.048523206751055e-05, "epoch": 3.25, "step": 18500}
|
| 41 |
+
{"loss": 1.444, "learning_rate": 9.957805907172995e-06, "epoch": 3.34, "step": 19000}
|
| 42 |
+
{"loss": 1.4565, "learning_rate": 9.430379746835443e-06, "epoch": 3.43, "step": 19500}
|
| 43 |
+
{"loss": 1.4412, "learning_rate": 8.90295358649789e-06, "epoch": 3.52, "step": 20000}
|
| 44 |
+
{"loss": 1.4589, "learning_rate": 8.375527426160338e-06, "epoch": 3.6, "step": 20500}
|
| 45 |
+
{"loss": 1.4603, "learning_rate": 7.848101265822786e-06, "epoch": 3.69, "step": 21000}
|
| 46 |
+
{"loss": 1.4542, "learning_rate": 7.320675105485233e-06, "epoch": 3.78, "step": 21500}
|
| 47 |
+
{"loss": 1.4565, "learning_rate": 6.793248945147679e-06, "epoch": 3.87, "step": 22000}
|
| 48 |
+
{"loss": 1.4463, "learning_rate": 6.265822784810127e-06, "epoch": 3.96, "step": 22500}
|
| 49 |
+
{"eval_loss": 1.4135338068008423, "eval_runtime": 37.4259, "eval_samples_per_second": 305.297, "eval_steps_per_second": 19.104, "epoch": 4.0, "step": 22752}
|
| 50 |
+
{"loss": 1.456, "learning_rate": 5.738396624472574e-06, "epoch": 4.04, "step": 23000}
|
| 51 |
+
{"loss": 1.4418, "learning_rate": 5.2109704641350215e-06, "epoch": 4.13, "step": 23500}
|
| 52 |
+
{"loss": 1.4338, "learning_rate": 4.683544303797468e-06, "epoch": 4.22, "step": 24000}
|
| 53 |
+
{"loss": 1.4337, "learning_rate": 4.156118143459915e-06, "epoch": 4.31, "step": 24500}
|
| 54 |
+
{"loss": 1.4358, "learning_rate": 3.628691983122363e-06, "epoch": 4.4, "step": 25000}
|
| 55 |
+
{"loss": 1.4284, "learning_rate": 3.10126582278481e-06, "epoch": 4.48, "step": 25500}
|
| 56 |
+
{"loss": 1.4267, "learning_rate": 2.5738396624472574e-06, "epoch": 4.57, "step": 26000}
|
| 57 |
+
{"loss": 1.4251, "learning_rate": 2.0464135021097044e-06, "epoch": 4.66, "step": 26500}
|
| 58 |
+
{"loss": 1.4427, "learning_rate": 1.518987341772152e-06, "epoch": 4.75, "step": 27000}
|
| 59 |
+
{"loss": 1.4318, "learning_rate": 9.915611814345993e-07, "epoch": 4.83, "step": 27500}
|
| 60 |
+
{"loss": 1.4344, "learning_rate": 4.6413502109704643e-07, "epoch": 4.92, "step": 28000}
|
| 61 |
+
{"eval_loss": 1.4122037887573242, "eval_runtime": 37.375, "eval_samples_per_second": 305.713, "eval_steps_per_second": 19.13, "epoch": 5.0, "step": 28440}
|
| 62 |
+
{"train_runtime": 5676.8026, "train_samples_per_second": 80.158, "train_steps_per_second": 5.01, "total_flos": 8.832548854812672e+16, "train_loss": 1.5344804573327344, "epoch": 5.0, "step": 28440}
|
mic/flan-t5-base_Q_A_TARGET_rot/model/config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "google/flan-t5-base",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"T5ForConditionalGeneration"
|
| 5 |
+
],
|
| 6 |
+
"d_ff": 2048,
|
| 7 |
+
"d_kv": 64,
|
| 8 |
+
"d_model": 768,
|
| 9 |
+
"decoder_start_token_id": 0,
|
| 10 |
+
"dropout_rate": 0.1,
|
| 11 |
+
"eos_token_id": 1,
|
| 12 |
+
"feed_forward_proj": "gated-gelu",
|
| 13 |
+
"initializer_factor": 1.0,
|
| 14 |
+
"is_encoder_decoder": true,
|
| 15 |
+
"layer_norm_epsilon": 1e-06,
|
| 16 |
+
"model_type": "t5",
|
| 17 |
+
"n_positions": 512,
|
| 18 |
+
"num_decoder_layers": 12,
|
| 19 |
+
"num_heads": 12,
|
| 20 |
+
"num_layers": 12,
|
| 21 |
+
"output_past": true,
|
| 22 |
+
"pad_token_id": 0,
|
| 23 |
+
"relative_attention_max_distance": 128,
|
| 24 |
+
"relative_attention_num_buckets": 32,
|
| 25 |
+
"task_specific_params": {
|
| 26 |
+
"summarization": {
|
| 27 |
+
"early_stopping": true,
|
| 28 |
+
"length_penalty": 2.0,
|
| 29 |
+
"max_length": 200,
|
| 30 |
+
"min_length": 30,
|
| 31 |
+
"no_repeat_ngram_size": 3,
|
| 32 |
+
"num_beams": 4,
|
| 33 |
+
"prefix": "summarize: "
|
| 34 |
+
},
|
| 35 |
+
"translation_en_to_de": {
|
| 36 |
+
"early_stopping": true,
|
| 37 |
+
"max_length": 300,
|
| 38 |
+
"num_beams": 4,
|
| 39 |
+
"prefix": "translate English to German: "
|
| 40 |
+
},
|
| 41 |
+
"translation_en_to_fr": {
|
| 42 |
+
"early_stopping": true,
|
| 43 |
+
"max_length": 300,
|
| 44 |
+
"num_beams": 4,
|
| 45 |
+
"prefix": "translate English to French: "
|
| 46 |
+
},
|
| 47 |
+
"translation_en_to_ro": {
|
| 48 |
+
"early_stopping": true,
|
| 49 |
+
"max_length": 300,
|
| 50 |
+
"num_beams": 4,
|
| 51 |
+
"prefix": "translate English to Romanian: "
|
| 52 |
+
}
|
| 53 |
+
},
|
| 54 |
+
"tie_word_embeddings": false,
|
| 55 |
+
"torch_dtype": "float32",
|
| 56 |
+
"transformers_version": "4.16.2",
|
| 57 |
+
"use_cache": true,
|
| 58 |
+
"vocab_size": 32125
|
| 59 |
+
}
|
mic/flan-t5-base_Q_A_TARGET_rot/model/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37211da66f5804d1aba878b22d6d0e65c5d8af2c50c6ed3afe19a89f1dd1e660
|
| 3 |
+
size 990419917
|
mic/flan-t5-base_Q_A_TARGET_rot/model/special_tokens_map.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"eos_token": "<eos>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
|
mic/flan-t5-base_Q_A_TARGET_rot/model/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mic/flan-t5-base_Q_A_TARGET_rot/model/tokenizer_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "name_or_path": "google/flan-t5-base", "sp_model_kwargs": {}, "special_tokens_map_file": "/home/younes_huggingface_co/.cache/huggingface/hub/models--google--t5-v1_1-base/snapshots/650d7745bf1e502d6949b22cc19155cd656d3d4e/special_tokens_map.json", "tokenizer_class": "T5Tokenizer"}
|
mic/flan-t5-base_Q_A_TARGET_rot/model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cb0256e0f5d6f2fc1ea002259e87848c97e8385553df007cf201e472bd3d63c
|
| 3 |
+
size 3311
|
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1_results.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"rouge1": 0.41405274871333886,
|
| 3 |
+
"rouge2": 0.20410173251757238,
|
| 4 |
+
"rougeL": 0.3969533843177795,
|
| 5 |
+
"rougeLsum": 0.3969533843177795,
|
| 6 |
+
"BERTScore_Precision": 0.9219520688056946,
|
| 7 |
+
"BERTScore_Recall": 0.9207043051719666,
|
| 8 |
+
"BERTScore": 0.9202384352684021,
|
| 9 |
+
"sacrebleu": 18.017794937736255,
|
| 10 |
+
"bleu": 0,
|
| 11 |
+
"mean_length": 9.300923076923077,
|
| 12 |
+
"decoding": "p=0.9",
|
| 13 |
+
"train_size": "full",
|
| 14 |
+
"model": "t5",
|
| 15 |
+
"fn": "rot_generation/output/flan-t5-base_Q_A_TARGET_rot_epochs5_batch16_seed1/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv"
|
| 16 |
+
}
|
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1_results.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"rouge1": 0.37943165867066264,
|
| 3 |
+
"rouge2": 0.1722986392769147,
|
| 4 |
+
"rougeL": 0.36126971991333545,
|
| 5 |
+
"rougeLsum": 0.36126971991333545,
|
| 6 |
+
"BERTScore_Precision": 0.9139366149902344,
|
| 7 |
+
"BERTScore_Recall": 0.9160142540931702,
|
| 8 |
+
"BERTScore": 0.9138739109039307,
|
| 9 |
+
"sacrebleu": 15.362114235644004,
|
| 10 |
+
"bleu": 0,
|
| 11 |
+
"mean_length": 9.775296703296704,
|
| 12 |
+
"decoding": "greedy",
|
| 13 |
+
"train_size": "full",
|
| 14 |
+
"model": "t5",
|
| 15 |
+
"fn": "rot_generation/output/flan-t5-base_Q_A_TARGET_rot_epochs5_batch16_seed1/test_generations_beams0_p0_k0_temp1.0_seed1.csv"
|
| 16 |
+
}
|
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1_results.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"rouge1": 0.5507494024510283,
|
| 3 |
+
"rouge2": 0.3496228495311189,
|
| 4 |
+
"rougeL": 0.5374227474345985,
|
| 5 |
+
"rougeLsum": 0.5374227474345985,
|
| 6 |
+
"BERTScore_Precision": 0.9413657188415527,
|
| 7 |
+
"BERTScore_Recall": 0.9361890554428101,
|
| 8 |
+
"BERTScore": 0.9377807378768921,
|
| 9 |
+
"sacrebleu": 30.679671337078904,
|
| 10 |
+
"bleu": 0,
|
| 11 |
+
"mean_length": 9.008527472527472,
|
| 12 |
+
"decoding": "beam",
|
| 13 |
+
"train_size": "full",
|
| 14 |
+
"model": "t5",
|
| 15 |
+
"fn": "rot_generation/output/flan-t5-base_Q_A_TARGET_rot_epochs5_batch16_seed1/test_generations_beams3_p0_k0_temp1.0_seed1.csv"
|
| 16 |
+
}
|
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_10.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c7d30371f10339a2655d965a525a3aa31d6e23c56a1d4c2b77ffe83b01a80eb
|
| 3 |
+
size 130504776
|
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_5.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d4611991d8d818d5f99200e8f55fccfef57d7070d1d528696ed6deff35d4bd8
|
| 3 |
+
size 109800806
|
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_10.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9ffb3aa194458aaeaf01646be609aeb89c5482e3a73ebb4109cc152885160ec
|
| 3 |
+
size 131855393
|
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_5.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:132268b3e2e040f1380afbe6372b51ed91cc3ee537eb19a1d6684fb9abcf01e2
|
| 3 |
+
size 110940132
|
mic/flan-t5-base_Q_A_TARGET_rot/tmp/dev.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mic/flan-t5-base_Q_A_TARGET_rot/tmp/test.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mic/flan-t5-base_Q_A_TARGET_rot/tmp/train.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a85c124cabb5e27843113888b7f98135abf56fff95012d8b4d8ffe02f0294bd
|
| 3 |
+
size 60631233
|
mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_d-PM_5.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ceb2d25bfdec04ac5fbc6fd3d558a92613c198aa06007974ba6210149f97fed5
|
| 3 |
+
size 92687899
|
mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_soft_5.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c033019631e6e87cd89960115456fa9b7a0a6af6d913750f10b4f9b640235c00
|
| 3 |
+
size 94743133
|
mic/flan-t5-base_Q_A_TARGET_rot/train_sample_5.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b62f84e59d3eb2762edf1d02359ad72344266084436b027358f25207712fccf
|
| 3 |
+
size 18092031
|
mic/flan-t5-base_Q_A_TARGET_rot/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fc5b14543083eb0661a5f3575d692d3d641d542ed2f4f56922e12c3b6fce717
|
| 3 |
+
size 1007
|
mic/t5-small_Q_A_TARGET_rot/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
mic/t5-small_Q_A_TARGET_rot/format_string.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Q [answ] A [rot] ~ rot
|
mic/t5-small_Q_A_TARGET_rot/log.txt
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"loss": 4.035, "learning_rate": 2.9475738396624472e-05, "epoch": 0.09, "step": 500}
|
| 2 |
+
{"loss": 2.9369, "learning_rate": 2.894831223628692e-05, "epoch": 0.18, "step": 1000}
|
| 3 |
+
{"loss": 2.7696, "learning_rate": 2.8420886075949368e-05, "epoch": 0.26, "step": 1500}
|
| 4 |
+
{"loss": 2.6421, "learning_rate": 2.7893459915611816e-05, "epoch": 0.35, "step": 2000}
|
| 5 |
+
{"loss": 2.5651, "learning_rate": 2.7366033755274263e-05, "epoch": 0.44, "step": 2500}
|
| 6 |
+
{"loss": 2.4838, "learning_rate": 2.6839662447257383e-05, "epoch": 0.53, "step": 3000}
|
| 7 |
+
{"loss": 2.4457, "learning_rate": 2.631223628691983e-05, "epoch": 0.62, "step": 3500}
|
| 8 |
+
{"loss": 2.4027, "learning_rate": 2.5785864978902955e-05, "epoch": 0.7, "step": 4000}
|
| 9 |
+
{"loss": 2.3666, "learning_rate": 2.5258438818565402e-05, "epoch": 0.79, "step": 4500}
|
| 10 |
+
{"loss": 2.3564, "learning_rate": 2.4731012658227847e-05, "epoch": 0.88, "step": 5000}
|
| 11 |
+
{"loss": 2.3104, "learning_rate": 2.4203586497890298e-05, "epoch": 0.97, "step": 5500}
|
| 12 |
+
{"eval_loss": 2.0744245052337646, "eval_runtime": 10.6369, "eval_samples_per_second": 1074.183, "eval_steps_per_second": 67.219, "epoch": 1.0, "step": 5688}
|
| 13 |
+
{"loss": 2.2971, "learning_rate": 2.3676160337552742e-05, "epoch": 1.05, "step": 6000}
|
| 14 |
+
{"loss": 2.2592, "learning_rate": 2.314873417721519e-05, "epoch": 1.14, "step": 6500}
|
| 15 |
+
{"loss": 2.2456, "learning_rate": 2.2621308016877638e-05, "epoch": 1.23, "step": 7000}
|
| 16 |
+
{"loss": 2.2567, "learning_rate": 2.2094936708860758e-05, "epoch": 1.32, "step": 7500}
|
| 17 |
+
{"loss": 2.2198, "learning_rate": 2.156751054852321e-05, "epoch": 1.41, "step": 8000}
|
| 18 |
+
{"loss": 2.1795, "learning_rate": 2.1040084388185654e-05, "epoch": 1.49, "step": 8500}
|
| 19 |
+
{"loss": 2.1834, "learning_rate": 2.0512658227848105e-05, "epoch": 1.58, "step": 9000}
|
| 20 |
+
{"loss": 2.1675, "learning_rate": 1.998523206751055e-05, "epoch": 1.67, "step": 9500}
|
| 21 |
+
{"loss": 2.1539, "learning_rate": 1.9457805907172994e-05, "epoch": 1.76, "step": 10000}
|
| 22 |
+
{"loss": 2.1553, "learning_rate": 1.8930379746835445e-05, "epoch": 1.85, "step": 10500}
|
| 23 |
+
{"loss": 2.1493, "learning_rate": 1.8404008438818565e-05, "epoch": 1.93, "step": 11000}
|
| 24 |
+
{"eval_loss": 1.9509127140045166, "eval_runtime": 10.4535, "eval_samples_per_second": 1093.031, "eval_steps_per_second": 68.398, "epoch": 2.0, "step": 11376}
|
| 25 |
+
{"loss": 2.1474, "learning_rate": 1.7876582278481013e-05, "epoch": 2.02, "step": 11500}
|
| 26 |
+
{"loss": 2.1227, "learning_rate": 1.734915611814346e-05, "epoch": 2.11, "step": 12000}
|
| 27 |
+
{"loss": 2.1167, "learning_rate": 1.6821729957805908e-05, "epoch": 2.2, "step": 12500}
|
| 28 |
+
{"loss": 2.1139, "learning_rate": 1.6294303797468356e-05, "epoch": 2.29, "step": 13000}
|
| 29 |
+
{"loss": 2.1019, "learning_rate": 1.5766877637130804e-05, "epoch": 2.37, "step": 13500}
|
| 30 |
+
{"loss": 2.093, "learning_rate": 1.5239451476793248e-05, "epoch": 2.46, "step": 14000}
|
| 31 |
+
{"loss": 2.0748, "learning_rate": 1.4712025316455698e-05, "epoch": 2.55, "step": 14500}
|
| 32 |
+
{"loss": 2.0969, "learning_rate": 1.4184599156118144e-05, "epoch": 2.64, "step": 15000}
|
| 33 |
+
{"loss": 2.107, "learning_rate": 1.3657172995780592e-05, "epoch": 2.73, "step": 15500}
|
| 34 |
+
{"loss": 2.083, "learning_rate": 1.3129746835443038e-05, "epoch": 2.81, "step": 16000}
|
| 35 |
+
{"loss": 2.0988, "learning_rate": 1.2602320675105485e-05, "epoch": 2.9, "step": 16500}
|
| 36 |
+
{"loss": 2.0887, "learning_rate": 1.2074894514767933e-05, "epoch": 2.99, "step": 17000}
|
| 37 |
+
{"eval_loss": 1.9086660146713257, "eval_runtime": 10.548, "eval_samples_per_second": 1083.243, "eval_steps_per_second": 67.786, "epoch": 3.0, "step": 17064}
|
| 38 |
+
{"loss": 2.0637, "learning_rate": 1.1548523206751055e-05, "epoch": 3.08, "step": 17500}
|
| 39 |
+
{"loss": 2.0555, "learning_rate": 1.1021097046413503e-05, "epoch": 3.16, "step": 18000}
|
| 40 |
+
{"loss": 2.0547, "learning_rate": 1.0494725738396626e-05, "epoch": 3.25, "step": 18500}
|
| 41 |
+
{"loss": 2.0452, "learning_rate": 9.96729957805907e-06, "epoch": 3.34, "step": 19000}
|
| 42 |
+
{"loss": 2.0674, "learning_rate": 9.439873417721518e-06, "epoch": 3.43, "step": 19500}
|
| 43 |
+
{"loss": 2.044, "learning_rate": 8.912447257383966e-06, "epoch": 3.52, "step": 20000}
|
| 44 |
+
{"loss": 2.0607, "learning_rate": 8.385021097046414e-06, "epoch": 3.6, "step": 20500}
|
| 45 |
+
{"loss": 2.0626, "learning_rate": 7.857594936708862e-06, "epoch": 3.69, "step": 21000}
|
| 46 |
+
{"loss": 2.0536, "learning_rate": 7.330168776371309e-06, "epoch": 3.78, "step": 21500}
|
| 47 |
+
{"loss": 2.0569, "learning_rate": 6.802742616033756e-06, "epoch": 3.87, "step": 22000}
|
| 48 |
+
{"loss": 2.0488, "learning_rate": 6.276371308016877e-06, "epoch": 3.96, "step": 22500}
|
| 49 |
+
{"eval_loss": 1.8879072666168213, "eval_runtime": 10.5748, "eval_samples_per_second": 1080.492, "eval_steps_per_second": 67.613, "epoch": 4.0, "step": 22752}
|
| 50 |
+
{"loss": 2.0564, "learning_rate": 5.750000000000001e-06, "epoch": 4.04, "step": 23000}
|
| 51 |
+
{"loss": 2.0471, "learning_rate": 5.222573839662447e-06, "epoch": 4.13, "step": 23500}
|
| 52 |
+
{"loss": 2.0343, "learning_rate": 4.695147679324895e-06, "epoch": 4.22, "step": 24000}
|
| 53 |
+
{"loss": 2.0357, "learning_rate": 4.167721518987342e-06, "epoch": 4.31, "step": 24500}
|
| 54 |
+
{"loss": 2.031, "learning_rate": 3.640295358649789e-06, "epoch": 4.4, "step": 25000}
|
| 55 |
+
{"loss": 2.0332, "learning_rate": 3.1139240506329112e-06, "epoch": 4.48, "step": 25500}
|
| 56 |
+
{"loss": 2.0257, "learning_rate": 2.5864978902953586e-06, "epoch": 4.57, "step": 26000}
|
| 57 |
+
{"loss": 2.0251, "learning_rate": 2.059071729957806e-06, "epoch": 4.66, "step": 26500}
|
| 58 |
+
{"loss": 2.0403, "learning_rate": 1.5316455696202531e-06, "epoch": 4.75, "step": 27000}
|
| 59 |
+
{"loss": 2.0305, "learning_rate": 1.0042194092827005e-06, "epoch": 4.83, "step": 27500}
|
| 60 |
+
{"loss": 2.0299, "learning_rate": 4.778481012658228e-07, "epoch": 4.92, "step": 28000}
|
| 61 |
+
{"eval_loss": 1.8821401596069336, "eval_runtime": 11.142, "eval_samples_per_second": 1025.492, "eval_steps_per_second": 64.172, "epoch": 5.0, "step": 28440}
|
| 62 |
+
{"train_runtime": 1789.4171, "train_samples_per_second": 254.295, "train_steps_per_second": 15.893, "total_flos": 1.7217080101699584e+16, "train_loss": 2.2082438644645253, "epoch": 5.0, "step": 28440}
|
mic/t5-small_Q_A_TARGET_rot/model/config.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "t5-small",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"T5ForConditionalGeneration"
|
| 5 |
+
],
|
| 6 |
+
"d_ff": 2048,
|
| 7 |
+
"d_kv": 64,
|
| 8 |
+
"d_model": 512,
|
| 9 |
+
"decoder_start_token_id": 0,
|
| 10 |
+
"dropout_rate": 0.1,
|
| 11 |
+
"eos_token_id": 1,
|
| 12 |
+
"feed_forward_proj": "relu",
|
| 13 |
+
"initializer_factor": 1.0,
|
| 14 |
+
"is_encoder_decoder": true,
|
| 15 |
+
"layer_norm_epsilon": 1e-06,
|
| 16 |
+
"model_type": "t5",
|
| 17 |
+
"n_positions": 512,
|
| 18 |
+
"num_decoder_layers": 6,
|
| 19 |
+
"num_heads": 8,
|
| 20 |
+
"num_layers": 6,
|
| 21 |
+
"output_past": true,
|
| 22 |
+
"pad_token_id": 0,
|
| 23 |
+
"relative_attention_num_buckets": 32,
|
| 24 |
+
"task_specific_params": {
|
| 25 |
+
"summarization": {
|
| 26 |
+
"early_stopping": true,
|
| 27 |
+
"length_penalty": 2.0,
|
| 28 |
+
"max_length": 200,
|
| 29 |
+
"min_length": 30,
|
| 30 |
+
"no_repeat_ngram_size": 3,
|
| 31 |
+
"num_beams": 4,
|
| 32 |
+
"prefix": "summarize: "
|
| 33 |
+
},
|
| 34 |
+
"translation_en_to_de": {
|
| 35 |
+
"early_stopping": true,
|
| 36 |
+
"max_length": 300,
|
| 37 |
+
"num_beams": 4,
|
| 38 |
+
"prefix": "translate English to German: "
|
| 39 |
+
},
|
| 40 |
+
"translation_en_to_fr": {
|
| 41 |
+
"early_stopping": true,
|
| 42 |
+
"max_length": 300,
|
| 43 |
+
"num_beams": 4,
|
| 44 |
+
"prefix": "translate English to French: "
|
| 45 |
+
},
|
| 46 |
+
"translation_en_to_ro": {
|
| 47 |
+
"early_stopping": true,
|
| 48 |
+
"max_length": 300,
|
| 49 |
+
"num_beams": 4,
|
| 50 |
+
"prefix": "translate English to Romanian: "
|
| 51 |
+
}
|
| 52 |
+
},
|
| 53 |
+
"torch_dtype": "float32",
|
| 54 |
+
"transformers_version": "4.16.2",
|
| 55 |
+
"use_cache": true,
|
| 56 |
+
"vocab_size": 32125
|
| 57 |
+
}
|
mic/t5-small_Q_A_TARGET_rot/model/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c465507866aa7674b50b2a82849cc80aef6fa50cad40f13706a572f07dd0516
|
| 3 |
+
size 532480
|
mic/t5-small_Q_A_TARGET_rot/model/special_tokens_map.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"eos_token": "<eos>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
|
mic/t5-small_Q_A_TARGET_rot/model/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mic/t5-small_Q_A_TARGET_rot/model/tokenizer_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "t5-small", "tokenizer_class": "T5Tokenizer"}
|
mic/t5-small_Q_A_TARGET_rot/model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b572aa89af159bcd67bf4a8e958d164e0085ba4d1099f1d9852dcf8834c368a4
|
| 3 |
+
size 3311
|
mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1_results.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"rouge1": 0.4022840192368789,
|
| 3 |
+
"rouge2": 0.19232970454852946,
|
| 4 |
+
"rougeL": 0.385595060978265,
|
| 5 |
+
"rougeLsum": 0.3855983071573733,
|
| 6 |
+
"BERTScore_Precision": 0.9169009327888489,
|
| 7 |
+
"BERTScore_Recall": 0.9170882701873779,
|
| 8 |
+
"BERTScore": 0.9158875346183777,
|
| 9 |
+
"sacrebleu": 16.713080596394033,
|
| 10 |
+
"bleu": 0,
|
| 11 |
+
"mean_length": 9.85178021978022,
|
| 12 |
+
"decoding": "p=0.9",
|
| 13 |
+
"train_size": "full",
|
| 14 |
+
"model": "t5",
|
| 15 |
+
"fn": "./output/t5-small_Q_A_TARGET_rot_epochs5_batch16/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv"
|
| 16 |
+
}
|
mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1_results.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"rouge1": 0.37043166966379115,
|
| 3 |
+
"rouge2": 0.16314706876754545,
|
| 4 |
+
"rougeL": 0.3526899761055531,
|
| 5 |
+
"rougeLsum": 0.3527425259487391,
|
| 6 |
+
"BERTScore_Precision": 0.9086111187934875,
|
| 7 |
+
"BERTScore_Recall": 0.91246497631073,
|
| 8 |
+
"BERTScore": 0.9093813300132751,
|
| 9 |
+
"sacrebleu": 14.267250605439102,
|
| 10 |
+
"bleu": 0,
|
| 11 |
+
"mean_length": 10.465142857142856,
|
| 12 |
+
"decoding": "greedy",
|
| 13 |
+
"train_size": "full",
|
| 14 |
+
"model": "t5",
|
| 15 |
+
"fn": "./output/t5-small_Q_A_TARGET_rot_epochs5_batch16/test_generations_beams0_p0_k0_temp1.0_seed1.csv"
|
| 16 |
+
}
|
mic/t5-small_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mic/t5-small_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1_results.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"rouge1": 0.5343929184448398,
|
| 3 |
+
"rouge2": 0.32966635784127496,
|
| 4 |
+
"rougeL": 0.5217162957331931,
|
| 5 |
+
"rougeLsum": 0.5217162957331931,
|
| 6 |
+
"BERTScore_Precision": 0.938477098941803,
|
| 7 |
+
"BERTScore_Recall": 0.9323572516441345,
|
| 8 |
+
"BERTScore": 0.9344239234924316,
|
| 9 |
+
"sacrebleu": 29.04899883650727,
|
| 10 |
+
"bleu": 0,
|
| 11 |
+
"mean_length": 8.944615384615384,
|
| 12 |
+
"decoding": "beam",
|
| 13 |
+
"train_size": "full",
|
| 14 |
+
"model": "t5",
|
| 15 |
+
"fn": "./output/t5-small_Q_A_TARGET_rot_epochs5_batch16/test_generations_beams3_p0_k0_temp1.0_seed1.csv"
|
| 16 |
+
}
|
mic/t5-small_Q_A_TARGET_rot/tmp/dev.csv
ADDED
|
File without changes
|
mic/t5-small_Q_A_TARGET_rot/tmp/test.csv
ADDED
|
File without changes
|
mic/t5-small_Q_A_TARGET_rot/tmp/test_train.csv
ADDED
|
File without changes
|
mic/t5-small_Q_A_TARGET_rot/tmp/train.csv
ADDED
|
File without changes
|
mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_10.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5408e36cc2ef9e26fca6a8d6a5ce75e091bbdd20bef9c2fc19a0a797767b5f97
|
| 3 |
+
size 103183561
|
mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_16.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34a96fff58c80d33ff530cc4304b4b34c24a4f2b4bf779e54d7e51c799934c42
|
| 3 |
+
size 104441746
|