Jessie09 commited on
Commit
31be5c4
·
verified ·
1 Parent(s): 0e5bb60

Upload model checkpoint

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +13 -0
  3. mic/.DS_Store +0 -0
  4. mic/flan-t5-base_Q_A_TARGET_rot/.DS_Store +0 -0
  5. mic/flan-t5-base_Q_A_TARGET_rot/format_string.txt +1 -0
  6. mic/flan-t5-base_Q_A_TARGET_rot/log.txt +62 -0
  7. mic/flan-t5-base_Q_A_TARGET_rot/model/config.json +59 -0
  8. mic/flan-t5-base_Q_A_TARGET_rot/model/pytorch_model.bin +3 -0
  9. mic/flan-t5-base_Q_A_TARGET_rot/model/special_tokens_map.json +1 -0
  10. mic/flan-t5-base_Q_A_TARGET_rot/model/tokenizer.json +0 -0
  11. mic/flan-t5-base_Q_A_TARGET_rot/model/tokenizer_config.json +1 -0
  12. mic/flan-t5-base_Q_A_TARGET_rot/model/training_args.bin +3 -0
  13. mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv +0 -0
  14. mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1_results.json +16 -0
  15. mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1.csv +0 -0
  16. mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1_results.json +16 -0
  17. mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1.csv +0 -0
  18. mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1_results.json +16 -0
  19. mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_10.json +3 -0
  20. mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_5.json +3 -0
  21. mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_10.json +3 -0
  22. mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_5.json +3 -0
  23. mic/flan-t5-base_Q_A_TARGET_rot/tmp/dev.csv +0 -0
  24. mic/flan-t5-base_Q_A_TARGET_rot/tmp/test.csv +0 -0
  25. mic/flan-t5-base_Q_A_TARGET_rot/tmp/train.csv +3 -0
  26. mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_d-PM_5.json +3 -0
  27. mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_soft_5.json +3 -0
  28. mic/flan-t5-base_Q_A_TARGET_rot/train_sample_5.json +3 -0
  29. mic/flan-t5-base_Q_A_TARGET_rot/training_args.bin +3 -0
  30. mic/t5-small_Q_A_TARGET_rot/.DS_Store +0 -0
  31. mic/t5-small_Q_A_TARGET_rot/format_string.txt +1 -0
  32. mic/t5-small_Q_A_TARGET_rot/log.txt +62 -0
  33. mic/t5-small_Q_A_TARGET_rot/model/config.json +57 -0
  34. mic/t5-small_Q_A_TARGET_rot/model/pytorch_model.bin +3 -0
  35. mic/t5-small_Q_A_TARGET_rot/model/special_tokens_map.json +1 -0
  36. mic/t5-small_Q_A_TARGET_rot/model/tokenizer.json +0 -0
  37. mic/t5-small_Q_A_TARGET_rot/model/tokenizer_config.json +1 -0
  38. mic/t5-small_Q_A_TARGET_rot/model/training_args.bin +3 -0
  39. mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv +0 -0
  40. mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1_results.json +16 -0
  41. mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1.csv +0 -0
  42. mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1_results.json +16 -0
  43. mic/t5-small_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1.csv +0 -0
  44. mic/t5-small_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1_results.json +16 -0
  45. mic/t5-small_Q_A_TARGET_rot/tmp/dev.csv +0 -0
  46. mic/t5-small_Q_A_TARGET_rot/tmp/test.csv +0 -0
  47. mic/t5-small_Q_A_TARGET_rot/tmp/test_train.csv +0 -0
  48. mic/t5-small_Q_A_TARGET_rot/tmp/train.csv +0 -0
  49. mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_10.json +3 -0
  50. mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_16.json +3 -0
.DS_Store ADDED
Binary file (10.2 kB). View file
 
.gitattributes CHANGED
@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_10.json filter=lfs diff=lfs merge=lfs -text
37
+ mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_5.json filter=lfs diff=lfs merge=lfs -text
38
+ mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_10.json filter=lfs diff=lfs merge=lfs -text
39
+ mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_5.json filter=lfs diff=lfs merge=lfs -text
40
+ mic/flan-t5-base_Q_A_TARGET_rot/tmp/train.csv filter=lfs diff=lfs merge=lfs -text
41
+ mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_d-PM_5.json filter=lfs diff=lfs merge=lfs -text
42
+ mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_soft_5.json filter=lfs diff=lfs merge=lfs -text
43
+ mic/flan-t5-base_Q_A_TARGET_rot/train_sample_5.json filter=lfs diff=lfs merge=lfs -text
44
+ mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_10.json filter=lfs diff=lfs merge=lfs -text
45
+ mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_16.json filter=lfs diff=lfs merge=lfs -text
46
+ mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_5.json filter=lfs diff=lfs merge=lfs -text
47
+ mic/t5-small_Q_A_TARGET_rot/tmp/update_mle_10.json filter=lfs diff=lfs merge=lfs -text
48
+ mic/t5-small_Q_A_TARGET_rot/tmp/update_mle_5.json filter=lfs diff=lfs merge=lfs -text
mic/.DS_Store ADDED
Binary file (6.15 kB). View file
 
mic/flan-t5-base_Q_A_TARGET_rot/.DS_Store ADDED
Binary file (6.15 kB). View file
 
mic/flan-t5-base_Q_A_TARGET_rot/format_string.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Q [answ] A [rot] ~ rot
mic/flan-t5-base_Q_A_TARGET_rot/log.txt ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"loss": 2.6849, "learning_rate": 2.9472573839662445e-05, "epoch": 0.09, "step": 500}
2
+ {"loss": 1.8348, "learning_rate": 2.8945147679324896e-05, "epoch": 0.18, "step": 1000}
3
+ {"loss": 1.7561, "learning_rate": 2.841772151898734e-05, "epoch": 0.26, "step": 1500}
4
+ {"loss": 1.7158, "learning_rate": 2.7890295358649792e-05, "epoch": 0.35, "step": 2000}
5
+ {"loss": 1.6876, "learning_rate": 2.7362869198312236e-05, "epoch": 0.44, "step": 2500}
6
+ {"loss": 1.6535, "learning_rate": 2.6835443037974684e-05, "epoch": 0.53, "step": 3000}
7
+ {"loss": 1.6553, "learning_rate": 2.6308016877637132e-05, "epoch": 0.62, "step": 3500}
8
+ {"loss": 1.6279, "learning_rate": 2.578059071729958e-05, "epoch": 0.7, "step": 4000}
9
+ {"loss": 1.6317, "learning_rate": 2.5253164556962027e-05, "epoch": 0.79, "step": 4500}
10
+ {"loss": 1.616, "learning_rate": 2.4725738396624472e-05, "epoch": 0.88, "step": 5000}
11
+ {"loss": 1.5978, "learning_rate": 2.419831223628692e-05, "epoch": 0.97, "step": 5500}
12
+ {"eval_loss": 1.4766592979431152, "eval_runtime": 37.3341, "eval_samples_per_second": 306.047, "eval_steps_per_second": 19.151, "epoch": 1.0, "step": 5688}
13
+ {"loss": 1.5798, "learning_rate": 2.3670886075949367e-05, "epoch": 1.05, "step": 6000}
14
+ {"loss": 1.5592, "learning_rate": 2.3143459915611815e-05, "epoch": 1.14, "step": 6500}
15
+ {"loss": 1.5583, "learning_rate": 2.2616033755274263e-05, "epoch": 1.23, "step": 7000}
16
+ {"loss": 1.5656, "learning_rate": 2.208860759493671e-05, "epoch": 1.32, "step": 7500}
17
+ {"loss": 1.5566, "learning_rate": 2.1561181434599155e-05, "epoch": 1.41, "step": 8000}
18
+ {"loss": 1.5281, "learning_rate": 2.1033755274261603e-05, "epoch": 1.49, "step": 8500}
19
+ {"loss": 1.5308, "learning_rate": 2.050632911392405e-05, "epoch": 1.58, "step": 9000}
20
+ {"loss": 1.5254, "learning_rate": 1.99789029535865e-05, "epoch": 1.67, "step": 9500}
21
+ {"loss": 1.5199, "learning_rate": 1.9451476793248946e-05, "epoch": 1.76, "step": 10000}
22
+ {"loss": 1.5313, "learning_rate": 1.892405063291139e-05, "epoch": 1.85, "step": 10500}
23
+ {"loss": 1.5262, "learning_rate": 1.8396624472573842e-05, "epoch": 1.93, "step": 11000}
24
+ {"eval_loss": 1.4375780820846558, "eval_runtime": 37.255, "eval_samples_per_second": 306.697, "eval_steps_per_second": 19.192, "epoch": 2.0, "step": 11376}
25
+ {"loss": 1.513, "learning_rate": 1.7869198312236286e-05, "epoch": 2.02, "step": 11500}
26
+ {"loss": 1.4815, "learning_rate": 1.7341772151898738e-05, "epoch": 2.11, "step": 12000}
27
+ {"loss": 1.4919, "learning_rate": 1.6814345991561182e-05, "epoch": 2.2, "step": 12500}
28
+ {"loss": 1.4872, "learning_rate": 1.6286919831223626e-05, "epoch": 2.29, "step": 13000}
29
+ {"loss": 1.4861, "learning_rate": 1.5759493670886078e-05, "epoch": 2.37, "step": 13500}
30
+ {"loss": 1.4832, "learning_rate": 1.5232067510548524e-05, "epoch": 2.46, "step": 14000}
31
+ {"loss": 1.458, "learning_rate": 1.470464135021097e-05, "epoch": 2.55, "step": 14500}
32
+ {"loss": 1.4824, "learning_rate": 1.4177215189873418e-05, "epoch": 2.64, "step": 15000}
33
+ {"loss": 1.4965, "learning_rate": 1.3649789029535865e-05, "epoch": 2.73, "step": 15500}
34
+ {"loss": 1.472, "learning_rate": 1.3122362869198313e-05, "epoch": 2.81, "step": 16000}
35
+ {"loss": 1.4915, "learning_rate": 1.259493670886076e-05, "epoch": 2.9, "step": 16500}
36
+ {"loss": 1.4871, "learning_rate": 1.2067510548523207e-05, "epoch": 2.99, "step": 17000}
37
+ {"eval_loss": 1.4210575819015503, "eval_runtime": 37.3541, "eval_samples_per_second": 305.883, "eval_steps_per_second": 19.141, "epoch": 3.0, "step": 17064}
38
+ {"loss": 1.4511, "learning_rate": 1.1540084388185655e-05, "epoch": 3.08, "step": 17500}
39
+ {"loss": 1.4494, "learning_rate": 1.1012658227848103e-05, "epoch": 3.16, "step": 18000}
40
+ {"loss": 1.4517, "learning_rate": 1.048523206751055e-05, "epoch": 3.25, "step": 18500}
41
+ {"loss": 1.444, "learning_rate": 9.957805907172995e-06, "epoch": 3.34, "step": 19000}
42
+ {"loss": 1.4565, "learning_rate": 9.430379746835443e-06, "epoch": 3.43, "step": 19500}
43
+ {"loss": 1.4412, "learning_rate": 8.90295358649789e-06, "epoch": 3.52, "step": 20000}
44
+ {"loss": 1.4589, "learning_rate": 8.375527426160338e-06, "epoch": 3.6, "step": 20500}
45
+ {"loss": 1.4603, "learning_rate": 7.848101265822786e-06, "epoch": 3.69, "step": 21000}
46
+ {"loss": 1.4542, "learning_rate": 7.320675105485233e-06, "epoch": 3.78, "step": 21500}
47
+ {"loss": 1.4565, "learning_rate": 6.793248945147679e-06, "epoch": 3.87, "step": 22000}
48
+ {"loss": 1.4463, "learning_rate": 6.265822784810127e-06, "epoch": 3.96, "step": 22500}
49
+ {"eval_loss": 1.4135338068008423, "eval_runtime": 37.4259, "eval_samples_per_second": 305.297, "eval_steps_per_second": 19.104, "epoch": 4.0, "step": 22752}
50
+ {"loss": 1.456, "learning_rate": 5.738396624472574e-06, "epoch": 4.04, "step": 23000}
51
+ {"loss": 1.4418, "learning_rate": 5.2109704641350215e-06, "epoch": 4.13, "step": 23500}
52
+ {"loss": 1.4338, "learning_rate": 4.683544303797468e-06, "epoch": 4.22, "step": 24000}
53
+ {"loss": 1.4337, "learning_rate": 4.156118143459915e-06, "epoch": 4.31, "step": 24500}
54
+ {"loss": 1.4358, "learning_rate": 3.628691983122363e-06, "epoch": 4.4, "step": 25000}
55
+ {"loss": 1.4284, "learning_rate": 3.10126582278481e-06, "epoch": 4.48, "step": 25500}
56
+ {"loss": 1.4267, "learning_rate": 2.5738396624472574e-06, "epoch": 4.57, "step": 26000}
57
+ {"loss": 1.4251, "learning_rate": 2.0464135021097044e-06, "epoch": 4.66, "step": 26500}
58
+ {"loss": 1.4427, "learning_rate": 1.518987341772152e-06, "epoch": 4.75, "step": 27000}
59
+ {"loss": 1.4318, "learning_rate": 9.915611814345993e-07, "epoch": 4.83, "step": 27500}
60
+ {"loss": 1.4344, "learning_rate": 4.6413502109704643e-07, "epoch": 4.92, "step": 28000}
61
+ {"eval_loss": 1.4122037887573242, "eval_runtime": 37.375, "eval_samples_per_second": 305.713, "eval_steps_per_second": 19.13, "epoch": 5.0, "step": 28440}
62
+ {"train_runtime": 5676.8026, "train_samples_per_second": 80.158, "train_steps_per_second": 5.01, "total_flos": 8.832548854812672e+16, "train_loss": 1.5344804573327344, "epoch": 5.0, "step": 28440}
mic/flan-t5-base_Q_A_TARGET_rot/model/config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dropout_rate": 0.1,
11
+ "eos_token_id": 1,
12
+ "feed_forward_proj": "gated-gelu",
13
+ "initializer_factor": 1.0,
14
+ "is_encoder_decoder": true,
15
+ "layer_norm_epsilon": 1e-06,
16
+ "model_type": "t5",
17
+ "n_positions": 512,
18
+ "num_decoder_layers": 12,
19
+ "num_heads": 12,
20
+ "num_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "task_specific_params": {
26
+ "summarization": {
27
+ "early_stopping": true,
28
+ "length_penalty": 2.0,
29
+ "max_length": 200,
30
+ "min_length": 30,
31
+ "no_repeat_ngram_size": 3,
32
+ "num_beams": 4,
33
+ "prefix": "summarize: "
34
+ },
35
+ "translation_en_to_de": {
36
+ "early_stopping": true,
37
+ "max_length": 300,
38
+ "num_beams": 4,
39
+ "prefix": "translate English to German: "
40
+ },
41
+ "translation_en_to_fr": {
42
+ "early_stopping": true,
43
+ "max_length": 300,
44
+ "num_beams": 4,
45
+ "prefix": "translate English to French: "
46
+ },
47
+ "translation_en_to_ro": {
48
+ "early_stopping": true,
49
+ "max_length": 300,
50
+ "num_beams": 4,
51
+ "prefix": "translate English to Romanian: "
52
+ }
53
+ },
54
+ "tie_word_embeddings": false,
55
+ "torch_dtype": "float32",
56
+ "transformers_version": "4.16.2",
57
+ "use_cache": true,
58
+ "vocab_size": 32125
59
+ }
mic/flan-t5-base_Q_A_TARGET_rot/model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37211da66f5804d1aba878b22d6d0e65c5d8af2c50c6ed3afe19a89f1dd1e660
3
+ size 990419917
mic/flan-t5-base_Q_A_TARGET_rot/model/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "<eos>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
mic/flan-t5-base_Q_A_TARGET_rot/model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
mic/flan-t5-base_Q_A_TARGET_rot/model/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "name_or_path": "google/flan-t5-base", "sp_model_kwargs": {}, "special_tokens_map_file": "/home/younes_huggingface_co/.cache/huggingface/hub/models--google--t5-v1_1-base/snapshots/650d7745bf1e502d6949b22cc19155cd656d3d4e/special_tokens_map.json", "tokenizer_class": "T5Tokenizer"}
mic/flan-t5-base_Q_A_TARGET_rot/model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb0256e0f5d6f2fc1ea002259e87848c97e8385553df007cf201e472bd3d63c
3
+ size 3311
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv ADDED
The diff for this file is too large to render. See raw diff
 
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "rouge1": 0.41405274871333886,
3
+ "rouge2": 0.20410173251757238,
4
+ "rougeL": 0.3969533843177795,
5
+ "rougeLsum": 0.3969533843177795,
6
+ "BERTScore_Precision": 0.9219520688056946,
7
+ "BERTScore_Recall": 0.9207043051719666,
8
+ "BERTScore": 0.9202384352684021,
9
+ "sacrebleu": 18.017794937736255,
10
+ "bleu": 0,
11
+ "mean_length": 9.300923076923077,
12
+ "decoding": "p=0.9",
13
+ "train_size": "full",
14
+ "model": "t5",
15
+ "fn": "rot_generation/output/flan-t5-base_Q_A_TARGET_rot_epochs5_batch16_seed1/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv"
16
+ }
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1.csv ADDED
The diff for this file is too large to render. See raw diff
 
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "rouge1": 0.37943165867066264,
3
+ "rouge2": 0.1722986392769147,
4
+ "rougeL": 0.36126971991333545,
5
+ "rougeLsum": 0.36126971991333545,
6
+ "BERTScore_Precision": 0.9139366149902344,
7
+ "BERTScore_Recall": 0.9160142540931702,
8
+ "BERTScore": 0.9138739109039307,
9
+ "sacrebleu": 15.362114235644004,
10
+ "bleu": 0,
11
+ "mean_length": 9.775296703296704,
12
+ "decoding": "greedy",
13
+ "train_size": "full",
14
+ "model": "t5",
15
+ "fn": "rot_generation/output/flan-t5-base_Q_A_TARGET_rot_epochs5_batch16_seed1/test_generations_beams0_p0_k0_temp1.0_seed1.csv"
16
+ }
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1.csv ADDED
The diff for this file is too large to render. See raw diff
 
mic/flan-t5-base_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "rouge1": 0.5507494024510283,
3
+ "rouge2": 0.3496228495311189,
4
+ "rougeL": 0.5374227474345985,
5
+ "rougeLsum": 0.5374227474345985,
6
+ "BERTScore_Precision": 0.9413657188415527,
7
+ "BERTScore_Recall": 0.9361890554428101,
8
+ "BERTScore": 0.9377807378768921,
9
+ "sacrebleu": 30.679671337078904,
10
+ "bleu": 0,
11
+ "mean_length": 9.008527472527472,
12
+ "decoding": "beam",
13
+ "train_size": "full",
14
+ "model": "t5",
15
+ "fn": "rot_generation/output/flan-t5-base_Q_A_TARGET_rot_epochs5_batch16_seed1/test_generations_beams3_p0_k0_temp1.0_seed1.csv"
16
+ }
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_10.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c7d30371f10339a2655d965a525a3aa31d6e23c56a1d4c2b77ffe83b01a80eb
3
+ size 130504776
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_d-PM_5.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d4611991d8d818d5f99200e8f55fccfef57d7070d1d528696ed6deff35d4bd8
3
+ size 109800806
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_10.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9ffb3aa194458aaeaf01646be609aeb89c5482e3a73ebb4109cc152885160ec
3
+ size 131855393
mic/flan-t5-base_Q_A_TARGET_rot/tmp/aligned_soft_5.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:132268b3e2e040f1380afbe6372b51ed91cc3ee537eb19a1d6684fb9abcf01e2
3
+ size 110940132
mic/flan-t5-base_Q_A_TARGET_rot/tmp/dev.csv ADDED
The diff for this file is too large to render. See raw diff
 
mic/flan-t5-base_Q_A_TARGET_rot/tmp/test.csv ADDED
The diff for this file is too large to render. See raw diff
 
mic/flan-t5-base_Q_A_TARGET_rot/tmp/train.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a85c124cabb5e27843113888b7f98135abf56fff95012d8b4d8ffe02f0294bd
3
+ size 60631233
mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_d-PM_5.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceb2d25bfdec04ac5fbc6fd3d558a92613c198aa06007974ba6210149f97fed5
3
+ size 92687899
mic/flan-t5-base_Q_A_TARGET_rot/train_candidates_soft_5.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c033019631e6e87cd89960115456fa9b7a0a6af6d913750f10b4f9b640235c00
3
+ size 94743133
mic/flan-t5-base_Q_A_TARGET_rot/train_sample_5.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b62f84e59d3eb2762edf1d02359ad72344266084436b027358f25207712fccf
3
+ size 18092031
mic/flan-t5-base_Q_A_TARGET_rot/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc5b14543083eb0661a5f3575d692d3d641d542ed2f4f56922e12c3b6fce717
3
+ size 1007
mic/t5-small_Q_A_TARGET_rot/.DS_Store ADDED
Binary file (6.15 kB). View file
 
mic/t5-small_Q_A_TARGET_rot/format_string.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Q [answ] A [rot] ~ rot
mic/t5-small_Q_A_TARGET_rot/log.txt ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"loss": 4.035, "learning_rate": 2.9475738396624472e-05, "epoch": 0.09, "step": 500}
2
+ {"loss": 2.9369, "learning_rate": 2.894831223628692e-05, "epoch": 0.18, "step": 1000}
3
+ {"loss": 2.7696, "learning_rate": 2.8420886075949368e-05, "epoch": 0.26, "step": 1500}
4
+ {"loss": 2.6421, "learning_rate": 2.7893459915611816e-05, "epoch": 0.35, "step": 2000}
5
+ {"loss": 2.5651, "learning_rate": 2.7366033755274263e-05, "epoch": 0.44, "step": 2500}
6
+ {"loss": 2.4838, "learning_rate": 2.6839662447257383e-05, "epoch": 0.53, "step": 3000}
7
+ {"loss": 2.4457, "learning_rate": 2.631223628691983e-05, "epoch": 0.62, "step": 3500}
8
+ {"loss": 2.4027, "learning_rate": 2.5785864978902955e-05, "epoch": 0.7, "step": 4000}
9
+ {"loss": 2.3666, "learning_rate": 2.5258438818565402e-05, "epoch": 0.79, "step": 4500}
10
+ {"loss": 2.3564, "learning_rate": 2.4731012658227847e-05, "epoch": 0.88, "step": 5000}
11
+ {"loss": 2.3104, "learning_rate": 2.4203586497890298e-05, "epoch": 0.97, "step": 5500}
12
+ {"eval_loss": 2.0744245052337646, "eval_runtime": 10.6369, "eval_samples_per_second": 1074.183, "eval_steps_per_second": 67.219, "epoch": 1.0, "step": 5688}
13
+ {"loss": 2.2971, "learning_rate": 2.3676160337552742e-05, "epoch": 1.05, "step": 6000}
14
+ {"loss": 2.2592, "learning_rate": 2.314873417721519e-05, "epoch": 1.14, "step": 6500}
15
+ {"loss": 2.2456, "learning_rate": 2.2621308016877638e-05, "epoch": 1.23, "step": 7000}
16
+ {"loss": 2.2567, "learning_rate": 2.2094936708860758e-05, "epoch": 1.32, "step": 7500}
17
+ {"loss": 2.2198, "learning_rate": 2.156751054852321e-05, "epoch": 1.41, "step": 8000}
18
+ {"loss": 2.1795, "learning_rate": 2.1040084388185654e-05, "epoch": 1.49, "step": 8500}
19
+ {"loss": 2.1834, "learning_rate": 2.0512658227848105e-05, "epoch": 1.58, "step": 9000}
20
+ {"loss": 2.1675, "learning_rate": 1.998523206751055e-05, "epoch": 1.67, "step": 9500}
21
+ {"loss": 2.1539, "learning_rate": 1.9457805907172994e-05, "epoch": 1.76, "step": 10000}
22
+ {"loss": 2.1553, "learning_rate": 1.8930379746835445e-05, "epoch": 1.85, "step": 10500}
23
+ {"loss": 2.1493, "learning_rate": 1.8404008438818565e-05, "epoch": 1.93, "step": 11000}
24
+ {"eval_loss": 1.9509127140045166, "eval_runtime": 10.4535, "eval_samples_per_second": 1093.031, "eval_steps_per_second": 68.398, "epoch": 2.0, "step": 11376}
25
+ {"loss": 2.1474, "learning_rate": 1.7876582278481013e-05, "epoch": 2.02, "step": 11500}
26
+ {"loss": 2.1227, "learning_rate": 1.734915611814346e-05, "epoch": 2.11, "step": 12000}
27
+ {"loss": 2.1167, "learning_rate": 1.6821729957805908e-05, "epoch": 2.2, "step": 12500}
28
+ {"loss": 2.1139, "learning_rate": 1.6294303797468356e-05, "epoch": 2.29, "step": 13000}
29
+ {"loss": 2.1019, "learning_rate": 1.5766877637130804e-05, "epoch": 2.37, "step": 13500}
30
+ {"loss": 2.093, "learning_rate": 1.5239451476793248e-05, "epoch": 2.46, "step": 14000}
31
+ {"loss": 2.0748, "learning_rate": 1.4712025316455698e-05, "epoch": 2.55, "step": 14500}
32
+ {"loss": 2.0969, "learning_rate": 1.4184599156118144e-05, "epoch": 2.64, "step": 15000}
33
+ {"loss": 2.107, "learning_rate": 1.3657172995780592e-05, "epoch": 2.73, "step": 15500}
34
+ {"loss": 2.083, "learning_rate": 1.3129746835443038e-05, "epoch": 2.81, "step": 16000}
35
+ {"loss": 2.0988, "learning_rate": 1.2602320675105485e-05, "epoch": 2.9, "step": 16500}
36
+ {"loss": 2.0887, "learning_rate": 1.2074894514767933e-05, "epoch": 2.99, "step": 17000}
37
+ {"eval_loss": 1.9086660146713257, "eval_runtime": 10.548, "eval_samples_per_second": 1083.243, "eval_steps_per_second": 67.786, "epoch": 3.0, "step": 17064}
38
+ {"loss": 2.0637, "learning_rate": 1.1548523206751055e-05, "epoch": 3.08, "step": 17500}
39
+ {"loss": 2.0555, "learning_rate": 1.1021097046413503e-05, "epoch": 3.16, "step": 18000}
40
+ {"loss": 2.0547, "learning_rate": 1.0494725738396626e-05, "epoch": 3.25, "step": 18500}
41
+ {"loss": 2.0452, "learning_rate": 9.96729957805907e-06, "epoch": 3.34, "step": 19000}
42
+ {"loss": 2.0674, "learning_rate": 9.439873417721518e-06, "epoch": 3.43, "step": 19500}
43
+ {"loss": 2.044, "learning_rate": 8.912447257383966e-06, "epoch": 3.52, "step": 20000}
44
+ {"loss": 2.0607, "learning_rate": 8.385021097046414e-06, "epoch": 3.6, "step": 20500}
45
+ {"loss": 2.0626, "learning_rate": 7.857594936708862e-06, "epoch": 3.69, "step": 21000}
46
+ {"loss": 2.0536, "learning_rate": 7.330168776371309e-06, "epoch": 3.78, "step": 21500}
47
+ {"loss": 2.0569, "learning_rate": 6.802742616033756e-06, "epoch": 3.87, "step": 22000}
48
+ {"loss": 2.0488, "learning_rate": 6.276371308016877e-06, "epoch": 3.96, "step": 22500}
49
+ {"eval_loss": 1.8879072666168213, "eval_runtime": 10.5748, "eval_samples_per_second": 1080.492, "eval_steps_per_second": 67.613, "epoch": 4.0, "step": 22752}
50
+ {"loss": 2.0564, "learning_rate": 5.750000000000001e-06, "epoch": 4.04, "step": 23000}
51
+ {"loss": 2.0471, "learning_rate": 5.222573839662447e-06, "epoch": 4.13, "step": 23500}
52
+ {"loss": 2.0343, "learning_rate": 4.695147679324895e-06, "epoch": 4.22, "step": 24000}
53
+ {"loss": 2.0357, "learning_rate": 4.167721518987342e-06, "epoch": 4.31, "step": 24500}
54
+ {"loss": 2.031, "learning_rate": 3.640295358649789e-06, "epoch": 4.4, "step": 25000}
55
+ {"loss": 2.0332, "learning_rate": 3.1139240506329112e-06, "epoch": 4.48, "step": 25500}
56
+ {"loss": 2.0257, "learning_rate": 2.5864978902953586e-06, "epoch": 4.57, "step": 26000}
57
+ {"loss": 2.0251, "learning_rate": 2.059071729957806e-06, "epoch": 4.66, "step": 26500}
58
+ {"loss": 2.0403, "learning_rate": 1.5316455696202531e-06, "epoch": 4.75, "step": 27000}
59
+ {"loss": 2.0305, "learning_rate": 1.0042194092827005e-06, "epoch": 4.83, "step": 27500}
60
+ {"loss": 2.0299, "learning_rate": 4.778481012658228e-07, "epoch": 4.92, "step": 28000}
61
+ {"eval_loss": 1.8821401596069336, "eval_runtime": 11.142, "eval_samples_per_second": 1025.492, "eval_steps_per_second": 64.172, "epoch": 5.0, "step": 28440}
62
+ {"train_runtime": 1789.4171, "train_samples_per_second": 254.295, "train_steps_per_second": 15.893, "total_flos": 1.7217080101699584e+16, "train_loss": 2.2082438644645253, "epoch": 5.0, "step": 28440}
mic/t5-small_Q_A_TARGET_rot/model/config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dropout_rate": 0.1,
11
+ "eos_token_id": 1,
12
+ "feed_forward_proj": "relu",
13
+ "initializer_factor": 1.0,
14
+ "is_encoder_decoder": true,
15
+ "layer_norm_epsilon": 1e-06,
16
+ "model_type": "t5",
17
+ "n_positions": 512,
18
+ "num_decoder_layers": 6,
19
+ "num_heads": 8,
20
+ "num_layers": 6,
21
+ "output_past": true,
22
+ "pad_token_id": 0,
23
+ "relative_attention_num_buckets": 32,
24
+ "task_specific_params": {
25
+ "summarization": {
26
+ "early_stopping": true,
27
+ "length_penalty": 2.0,
28
+ "max_length": 200,
29
+ "min_length": 30,
30
+ "no_repeat_ngram_size": 3,
31
+ "num_beams": 4,
32
+ "prefix": "summarize: "
33
+ },
34
+ "translation_en_to_de": {
35
+ "early_stopping": true,
36
+ "max_length": 300,
37
+ "num_beams": 4,
38
+ "prefix": "translate English to German: "
39
+ },
40
+ "translation_en_to_fr": {
41
+ "early_stopping": true,
42
+ "max_length": 300,
43
+ "num_beams": 4,
44
+ "prefix": "translate English to French: "
45
+ },
46
+ "translation_en_to_ro": {
47
+ "early_stopping": true,
48
+ "max_length": 300,
49
+ "num_beams": 4,
50
+ "prefix": "translate English to Romanian: "
51
+ }
52
+ },
53
+ "torch_dtype": "float32",
54
+ "transformers_version": "4.16.2",
55
+ "use_cache": true,
56
+ "vocab_size": 32125
57
+ }
mic/t5-small_Q_A_TARGET_rot/model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c465507866aa7674b50b2a82849cc80aef6fa50cad40f13706a572f07dd0516
3
+ size 532480
mic/t5-small_Q_A_TARGET_rot/model/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "<eos>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
mic/t5-small_Q_A_TARGET_rot/model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
mic/t5-small_Q_A_TARGET_rot/model/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "t5-small", "tokenizer_class": "T5Tokenizer"}
mic/t5-small_Q_A_TARGET_rot/model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b572aa89af159bcd67bf4a8e958d164e0085ba4d1099f1d9852dcf8834c368a4
3
+ size 3311
mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv ADDED
The diff for this file is too large to render. See raw diff
 
mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0.9_k0_temp1.0_seed1_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "rouge1": 0.4022840192368789,
3
+ "rouge2": 0.19232970454852946,
4
+ "rougeL": 0.385595060978265,
5
+ "rougeLsum": 0.3855983071573733,
6
+ "BERTScore_Precision": 0.9169009327888489,
7
+ "BERTScore_Recall": 0.9170882701873779,
8
+ "BERTScore": 0.9158875346183777,
9
+ "sacrebleu": 16.713080596394033,
10
+ "bleu": 0,
11
+ "mean_length": 9.85178021978022,
12
+ "decoding": "p=0.9",
13
+ "train_size": "full",
14
+ "model": "t5",
15
+ "fn": "./output/t5-small_Q_A_TARGET_rot_epochs5_batch16/test_generations_beams0_p0.9_k0_temp1.0_seed1.csv"
16
+ }
mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1.csv ADDED
The diff for this file is too large to render. See raw diff
 
mic/t5-small_Q_A_TARGET_rot/test_generations_beams0_p0_k0_temp1.0_seed1_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "rouge1": 0.37043166966379115,
3
+ "rouge2": 0.16314706876754545,
4
+ "rougeL": 0.3526899761055531,
5
+ "rougeLsum": 0.3527425259487391,
6
+ "BERTScore_Precision": 0.9086111187934875,
7
+ "BERTScore_Recall": 0.91246497631073,
8
+ "BERTScore": 0.9093813300132751,
9
+ "sacrebleu": 14.267250605439102,
10
+ "bleu": 0,
11
+ "mean_length": 10.465142857142856,
12
+ "decoding": "greedy",
13
+ "train_size": "full",
14
+ "model": "t5",
15
+ "fn": "./output/t5-small_Q_A_TARGET_rot_epochs5_batch16/test_generations_beams0_p0_k0_temp1.0_seed1.csv"
16
+ }
mic/t5-small_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1.csv ADDED
The diff for this file is too large to render. See raw diff
 
mic/t5-small_Q_A_TARGET_rot/test_generations_beams3_p0_k0_temp1.0_seed1_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "rouge1": 0.5343929184448398,
3
+ "rouge2": 0.32966635784127496,
4
+ "rougeL": 0.5217162957331931,
5
+ "rougeLsum": 0.5217162957331931,
6
+ "BERTScore_Precision": 0.938477098941803,
7
+ "BERTScore_Recall": 0.9323572516441345,
8
+ "BERTScore": 0.9344239234924316,
9
+ "sacrebleu": 29.04899883650727,
10
+ "bleu": 0,
11
+ "mean_length": 8.944615384615384,
12
+ "decoding": "beam",
13
+ "train_size": "full",
14
+ "model": "t5",
15
+ "fn": "./output/t5-small_Q_A_TARGET_rot_epochs5_batch16/test_generations_beams3_p0_k0_temp1.0_seed1.csv"
16
+ }
mic/t5-small_Q_A_TARGET_rot/tmp/dev.csv ADDED
File without changes
mic/t5-small_Q_A_TARGET_rot/tmp/test.csv ADDED
File without changes
mic/t5-small_Q_A_TARGET_rot/tmp/test_train.csv ADDED
File without changes
mic/t5-small_Q_A_TARGET_rot/tmp/train.csv ADDED
File without changes
mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_10.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5408e36cc2ef9e26fca6a8d6a5ce75e091bbdd20bef9c2fc19a0a797767b5f97
3
+ size 103183561
mic/t5-small_Q_A_TARGET_rot/tmp/update_dist_16.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34a96fff58c80d33ff530cc4304b4b34c24a4f2b4bf779e54d7e51c799934c42
3
+ size 104441746