natmin322 commited on
Commit
e84f283
·
1 Parent(s): 2b87f4b

feat: add T5_small benchmark scripts for 4 comparison scenarios

Browse files

- Create root_gainlora/T5_small/ with 8 scripts (inflora + gainlora_inflora
for long_order3, long_order4, superni_order1, superni_order2)
- Create improve_gainlora/T5_small/ with 12 scripts (same + specroute)
- Model: google/flan-t5-small (passed as $2 arg, no code changes needed)
- Long sequence: train_bsz=32, ga=1, eval_bsz=256 (from bsz=8-16, ga=2-4)
- SuperNI: train_bsz=16, ga=2, eval_bsz=8
- Specroute T4 1GPU: BSZ=32; GA=1; EVAL_BSZ=256 (long); BSZ=16; GA=2 (superni)
- Removed --gradient_checkpointing (flan-t5-small fits in ~2GB VRAM)
- All output_dir/run_name/checkpoint paths updated with t5_small_ prefix
- Added gen_t5_small_scripts.py for reproducibility

Files changed (21) hide show
  1. gen_t5_small_scripts.py +196 -0
  2. improve_gainlora/T5_small/gen_script_long_order3_t5_small_gainlora_inflora.sh +763 -0
  3. improve_gainlora/T5_small/gen_script_long_order3_t5_small_inflora.sh +744 -0
  4. improve_gainlora/T5_small/gen_script_long_order3_t5_small_specroute.sh +849 -0
  5. improve_gainlora/T5_small/gen_script_long_order4_t5_small_gainlora_inflora.sh +774 -0
  6. improve_gainlora/T5_small/gen_script_long_order4_t5_small_inflora.sh +744 -0
  7. improve_gainlora/T5_small/gen_script_long_order4_t5_small_specroute.sh +849 -0
  8. improve_gainlora/T5_small/gen_script_superni_order1_t5_small_gainlora_inflora.sh +744 -0
  9. improve_gainlora/T5_small/gen_script_superni_order1_t5_small_inflora.sh +713 -0
  10. improve_gainlora/T5_small/gen_script_superni_order1_t5_small_specroute.sh +821 -0
  11. improve_gainlora/T5_small/gen_script_superni_order2_t5_small_gainlora_inflora.sh +743 -0
  12. improve_gainlora/T5_small/gen_script_superni_order2_t5_small_inflora.sh +713 -0
  13. improve_gainlora/T5_small/gen_script_superni_order2_t5_small_specroute.sh +804 -0
  14. root_gainlora/T5_small/gen_script_long_order3_t5_small_gainlora_inflora.sh +763 -0
  15. root_gainlora/T5_small/gen_script_long_order3_t5_small_inflora.sh +744 -0
  16. root_gainlora/T5_small/gen_script_long_order4_t5_small_gainlora_inflora.sh +774 -0
  17. root_gainlora/T5_small/gen_script_long_order4_t5_small_inflora.sh +744 -0
  18. root_gainlora/T5_small/gen_script_superni_order1_t5_small_gainlora_inflora.sh +744 -0
  19. root_gainlora/T5_small/gen_script_superni_order1_t5_small_inflora.sh +713 -0
  20. root_gainlora/T5_small/gen_script_superni_order2_t5_small_gainlora_inflora.sh +743 -0
  21. root_gainlora/T5_small/gen_script_superni_order2_t5_small_inflora.sh +713 -0
gen_t5_small_scripts.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Generate T5_small benchmark scripts from original T5 scripts.
4
+
5
+ For all scripts:
6
+ - Rename experiment: t5_METHOD -> t5_small_METHOD (output_dir, run_name, checkpoint paths)
7
+ - Remove --gradient_checkpointing standalone flag (not needed for small model)
8
+
9
+ Batch sizes (flan-t5-small fits easily on T4 with large batches):
10
+ Long sequence non-specroute : train=32, ga=1, eval=256
11
+ SuperNI non-specroute : train=16, ga=2, eval=8
12
+
13
+ Specroute GPU-mode blocks (long):
14
+ t4_2gpu : BSZ=16; GA=1; EVAL_BSZ=256
15
+ t4_1gpu : BSZ=32; GA=1; EVAL_BSZ=256
16
+ a100 : BSZ=64; GA=1; EVAL_BSZ=512
17
+
18
+ Specroute GPU-mode blocks (superni):
19
+ t4_2gpu : BSZ=8 ; GA=2; EVAL_BSZ=16
20
+ t4_1gpu : BSZ=16; GA=2; EVAL_BSZ=16
21
+ a100 : BSZ=32; GA=1; EVAL_BSZ=32
22
+ """
23
+
24
+ import re
25
+ import os
26
+ from pathlib import Path
27
+
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Helpers
31
+ # ---------------------------------------------------------------------------
32
+
33
+ SPECROUTE_LONG_MODES = {
34
+ "t4_2gpu": "BSZ=16; GA=1; EVAL_BSZ=256",
35
+ "t4_1gpu": "BSZ=32; GA=1; EVAL_BSZ=256",
36
+ "a100": "BSZ=64; GA=1; EVAL_BSZ=512",
37
+ }
38
+
39
+ SPECROUTE_SUPERNI_MODES = {
40
+ "t4_2gpu": "BSZ=8; GA=2; EVAL_BSZ=16",
41
+ "t4_1gpu": "BSZ=16; GA=2; EVAL_BSZ=16",
42
+ "a100": "BSZ=32; GA=1; EVAL_BSZ=32",
43
+ }
44
+
45
+
46
+ def replace_experiment_names(content: str) -> str:
47
+ """Replace gen_script_X_t5_METHOD → gen_script_X_t5_small_METHOD."""
48
+ # Works in output_dir, run_name, checkpoint paths
49
+ return re.sub(
50
+ r'(gen_script_(?:long_order[34]|superni_order[12])_t5_)(?!small_)',
51
+ r'\1small_',
52
+ content,
53
+ )
54
+
55
+
56
+ def remove_gradient_checkpointing_flag(content: str) -> str:
57
+ """Remove --gradient_checkpointing \\ as a standalone argument line."""
58
+ # Matches lines like: " --gradient_checkpointing \\\n" or " --gradient_checkpointing\n"
59
+ content = re.sub(r'[ \t]*--gradient_checkpointing \\\n', '', content)
60
+ content = re.sub(r'[ \t]*--gradient_checkpointing\n', '', content)
61
+ return content
62
+
63
+
64
+ def set_non_specroute_batch_sizes(content: str, script_type: str) -> str:
65
+ """Replace hardcoded --per_device_* and --gradient_accumulation_steps."""
66
+ if script_type == 'long':
67
+ train_bsz, ga, eval_bsz = 32, 1, 256
68
+ else: # superni
69
+ train_bsz, ga, eval_bsz = 16, 2, 8
70
+
71
+ content = re.sub(r'--per_device_train_batch_size \d+',
72
+ f'--per_device_train_batch_size {train_bsz}', content)
73
+ content = re.sub(r'--per_device_eval_batch_size \d+',
74
+ f'--per_device_eval_batch_size {eval_bsz}', content)
75
+ content = re.sub(r'--gradient_accumulation_steps \d+',
76
+ f'--gradient_accumulation_steps {ga}', content)
77
+ return content
78
+
79
+
80
+ def fix_specroute_gpu_modes(content: str, script_type: str) -> str:
81
+ """Replace BSZ/GA/EVAL_BSZ inside the GPU-mode if/elif/else block."""
82
+ modes = SPECROUTE_LONG_MODES if script_type == 'long' else SPECROUTE_SUPERNI_MODES
83
+
84
+ # t4_2gpu block: " BSZ=N; GA=N; EVAL_BSZ=N"
85
+ content = re.sub(
86
+ r'(if \[ "\$GPU_MODE" = "t4_2gpu" \]; then\n)[ \t]*BSZ=\d+; GA=\d+; EVAL_BSZ=\d+',
87
+ r'\g<1> ' + modes['t4_2gpu'],
88
+ content,
89
+ )
90
+ # t4_1gpu block
91
+ content = re.sub(
92
+ r'(elif \[ "\$GPU_MODE" = "t4_1gpu" \]; then\n)[ \t]*BSZ=\d+; GA=\d+; EVAL_BSZ=\d+',
93
+ r'\g<1> ' + modes['t4_1gpu'],
94
+ content,
95
+ )
96
+ # a100 block (else)
97
+ content = re.sub(
98
+ r'(else\n)[ \t]*BSZ=\d+; GA=\d+; EVAL_BSZ=\d+',
99
+ r'\g<1> ' + modes['a100'],
100
+ content,
101
+ )
102
+
103
+ # Remove gradient_checkpointing from FP16_FLAG (set it to empty for all modes)
104
+ content = re.sub(r'FP16_FLAG="--gradient_checkpointing"', 'FP16_FLAG=""', content)
105
+ return content
106
+
107
+
108
+ def transform(content: str, is_specroute: bool, script_type: str) -> str:
109
+ content = replace_experiment_names(content)
110
+ content = remove_gradient_checkpointing_flag(content)
111
+ if is_specroute:
112
+ content = fix_specroute_gpu_modes(content, script_type)
113
+ else:
114
+ content = set_non_specroute_batch_sizes(content, script_type)
115
+ return content
116
+
117
+
118
+ def process_dir(src_dir: Path, dst_dir: Path, scripts_long: list, scripts_superni: list):
119
+ dst_dir.mkdir(exist_ok=True)
120
+
121
+ for script in scripts_long:
122
+ src = src_dir / script
123
+ if not src.exists():
124
+ print(f" SKIP (not found): {src}")
125
+ continue
126
+ content = src.read_text()
127
+ is_specroute = 'specroute' in script
128
+ new_content = transform(content, is_specroute, 'long')
129
+ new_name = script.replace('_t5_', '_t5_small_')
130
+ dst = dst_dir / new_name
131
+ dst.write_text(new_content)
132
+ os.chmod(dst, 0o755)
133
+ print(f" Created {dst.relative_to(src_dir.parent.parent)}")
134
+
135
+ for script in scripts_superni:
136
+ src = src_dir / script
137
+ if not src.exists():
138
+ print(f" SKIP (not found): {src}")
139
+ continue
140
+ content = src.read_text()
141
+ is_specroute = 'specroute' in script
142
+ new_content = transform(content, is_specroute, 'superni')
143
+ new_name = script.replace('_t5_', '_t5_small_')
144
+ dst = dst_dir / new_name
145
+ dst.write_text(new_content)
146
+ os.chmod(dst, 0o755)
147
+ print(f" Created {dst.relative_to(src_dir.parent.parent)}")
148
+
149
+
150
+ # ---------------------------------------------------------------------------
151
+ # Root gainlora
152
+ # ---------------------------------------------------------------------------
153
+ ROOT = Path('/Users/nnminh322/Desktop/personal/Continual/root_gainlora')
154
+
155
+ ROOT_LONG = [
156
+ 'gen_script_long_order3_t5_inflora.sh',
157
+ 'gen_script_long_order3_t5_gainlora_inflora.sh',
158
+ 'gen_script_long_order4_t5_inflora.sh',
159
+ 'gen_script_long_order4_t5_gainlora_inflora.sh',
160
+ ]
161
+ ROOT_SUPERNI = [
162
+ 'gen_script_superni_order1_t5_inflora.sh',
163
+ 'gen_script_superni_order1_t5_gainlora_inflora.sh',
164
+ 'gen_script_superni_order2_t5_inflora.sh',
165
+ 'gen_script_superni_order2_t5_gainlora_inflora.sh',
166
+ ]
167
+
168
+ print("=== root_gainlora/T5_small/ ===")
169
+ process_dir(ROOT, ROOT / 'T5_small', ROOT_LONG, ROOT_SUPERNI)
170
+
171
+ # ---------------------------------------------------------------------------
172
+ # Improve gainlora
173
+ # ---------------------------------------------------------------------------
174
+ IMPROVE = Path('/Users/nnminh322/Desktop/personal/Continual/improve_gainlora')
175
+
176
+ IMPROVE_LONG = [
177
+ 'gen_script_long_order3_t5_inflora.sh',
178
+ 'gen_script_long_order3_t5_gainlora_inflora.sh',
179
+ 'gen_script_long_order3_t5_specroute.sh',
180
+ 'gen_script_long_order4_t5_inflora.sh',
181
+ 'gen_script_long_order4_t5_gainlora_inflora.sh',
182
+ 'gen_script_long_order4_t5_specroute.sh',
183
+ ]
184
+ IMPROVE_SUPERNI = [
185
+ 'gen_script_superni_order1_t5_inflora.sh',
186
+ 'gen_script_superni_order1_t5_gainlora_inflora.sh',
187
+ 'gen_script_superni_order1_t5_specroute.sh',
188
+ 'gen_script_superni_order2_t5_inflora.sh',
189
+ 'gen_script_superni_order2_t5_gainlora_inflora.sh',
190
+ 'gen_script_superni_order2_t5_specroute.sh',
191
+ ]
192
+
193
+ print("\n=== improve_gainlora/T5_small/ ===")
194
+ process_dir(IMPROVE, IMPROVE / 'T5_small', IMPROVE_LONG, IMPROVE_SUPERNI)
195
+
196
+ print("\nDone!")
improve_gainlora/T5_small/gen_script_long_order3_t5_small_gainlora_inflora.sh ADDED
@@ -0,0 +1,763 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --predict_with_generate \
17
+ --model_name_or_path $2 \
18
+ --data_dir CL_Benchmark \
19
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
20
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/yelp \
21
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp \
22
+ --per_device_train_batch_size 32 \
23
+ --per_device_eval_batch_size 256 \
24
+ --gradient_accumulation_steps 1 \
25
+ --learning_rate 0.0003 \
26
+ --num_train_epochs 10 \
27
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
28
+ --max_source_length 512 \
29
+ --max_target_length 50 \
30
+ --generation_max_length 50 \
31
+ --add_task_name False \
32
+ --add_dataset_name False \
33
+ --overwrite_output_dir \
34
+ --overwrite_cache \
35
+ --lr_scheduler_type constant \
36
+ --warmup_steps 0 \
37
+ --logging_strategy steps \
38
+ --logging_steps 10 \
39
+ --metric_for_best_model eval_exact_match \
40
+ --evaluation_strategy steps \
41
+ --save_strategy steps \
42
+ --save_total_limit 1 \
43
+ --load_best_model_at_end \
44
+ --lora_r 8 \
45
+ --lora_alpha 32 \
46
+ --lora_dropout 0.0 \
47
+ --add_instruction_replay \
48
+ --data_replay_freq -1 \
49
+ --replay_after_n_epoch 0 \
50
+ --mlp_hidden_dim 100 \
51
+ --model_name gainlora_inflora \
52
+ --threshold 0.995 \
53
+ --transthreshold 0.995
54
+
55
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/checkpoint*
56
+
57
+ sleep 5
58
+
59
+
60
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
61
+ --do_train \
62
+ --predict_with_generate \
63
+ --model_name_or_path $2 \
64
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights/trans_input.pt \
65
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights \
66
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights/prompts_keys_till_now.pt \
67
+ --data_dir CL_Benchmark \
68
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
69
+ --gen_data_dir generated_data/lora_gen_long_t5 \
70
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/amazon \
71
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon \
72
+ --per_device_train_batch_size 32 \
73
+ --per_device_eval_batch_size 256 \
74
+ --gradient_accumulation_steps 1 \
75
+ --learning_rate 0.0003 \
76
+ --num_train_epochs 10 \
77
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
78
+ --max_source_length 512 \
79
+ --max_target_length 50 \
80
+ --generation_max_length 50 \
81
+ --add_task_name False \
82
+ --add_dataset_name False \
83
+ --overwrite_output_dir \
84
+ --overwrite_cache \
85
+ --lr_scheduler_type constant \
86
+ --warmup_steps 0 \
87
+ --logging_strategy steps \
88
+ --logging_steps 10 \
89
+ --metric_for_best_model eval_exact_match_for_amazon \
90
+ --evaluation_strategy steps \
91
+ --save_strategy steps \
92
+ --save_total_limit 1 \
93
+ --load_best_model_at_end \
94
+ --lora_r 8 \
95
+ --lora_alpha 32 \
96
+ --lora_dropout 0.0 \
97
+ --data_replay_freq -1 \
98
+ --kl_ratio 0.1 \
99
+ --attn_temperature 1 \
100
+ --mlp_hidden_dim 100 \
101
+ --model_name gainlora_inflora \
102
+ --threshold 0.995 \
103
+ --transthreshold 0.995
104
+
105
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/checkpoint*
106
+
107
+ sleep 5
108
+
109
+
110
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
111
+ --do_train \
112
+ --predict_with_generate \
113
+ --model_name_or_path $2 \
114
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights/trans_input.pt \
115
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights \
116
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights/prompts_keys_till_now.pt \
117
+ --data_dir CL_Benchmark \
118
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
119
+ --gen_data_dir generated_data/lora_gen_long_t5 \
120
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/mnli \
121
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli \
122
+ --per_device_train_batch_size 32 \
123
+ --per_device_eval_batch_size 256 \
124
+ --gradient_accumulation_steps 1 \
125
+ --learning_rate 0.0003 \
126
+ --num_train_epochs 10 \
127
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
128
+ --max_source_length 512 \
129
+ --max_target_length 50 \
130
+ --generation_max_length 50 \
131
+ --add_task_name False \
132
+ --add_dataset_name False \
133
+ --overwrite_output_dir \
134
+ --overwrite_cache \
135
+ --lr_scheduler_type constant \
136
+ --warmup_steps 0 \
137
+ --logging_strategy steps \
138
+ --logging_steps 10 \
139
+ --metric_for_best_model eval_exact_match_for_mnli \
140
+ --evaluation_strategy steps \
141
+ --save_strategy steps \
142
+ --save_total_limit 1 \
143
+ --load_best_model_at_end \
144
+ --lora_r 8 \
145
+ --lora_alpha 32 \
146
+ --lora_dropout 0.0 \
147
+ --data_replay_freq -1 \
148
+ --kl_ratio 0.1 \
149
+ --attn_temperature 1 \
150
+ --mlp_hidden_dim 100 \
151
+ --model_name gainlora_inflora \
152
+ --threshold 0.995 \
153
+ --transthreshold 0.995
154
+
155
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/checkpoint*
156
+
157
+ sleep 5
158
+
159
+
160
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
161
+ --do_train \
162
+ --predict_with_generate \
163
+ --model_name_or_path $2 \
164
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights/trans_input.pt \
165
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights \
166
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights/prompts_keys_till_now.pt \
167
+ --data_dir CL_Benchmark \
168
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
169
+ --gen_data_dir generated_data/lora_gen_long_t5 \
170
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/cb \
171
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb \
172
+ --per_device_train_batch_size 32 \
173
+ --per_device_eval_batch_size 256 \
174
+ --gradient_accumulation_steps 1 \
175
+ --learning_rate 0.0003 \
176
+ --num_train_epochs 10 \
177
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
178
+ --max_source_length 512 \
179
+ --max_target_length 50 \
180
+ --generation_max_length 50 \
181
+ --add_task_name False \
182
+ --add_dataset_name False \
183
+ --overwrite_output_dir \
184
+ --overwrite_cache \
185
+ --lr_scheduler_type constant \
186
+ --warmup_steps 0 \
187
+ --logging_strategy steps \
188
+ --logging_steps 10 \
189
+ --metric_for_best_model eval_exact_match_for_cb \
190
+ --evaluation_strategy steps \
191
+ --save_strategy steps \
192
+ --save_total_limit 1 \
193
+ --load_best_model_at_end \
194
+ --lora_r 8 \
195
+ --lora_alpha 32 \
196
+ --lora_dropout 0.0 \
197
+ --data_replay_freq -1 \
198
+ --kl_ratio 0.1 \
199
+ --attn_temperature 1 \
200
+ --mlp_hidden_dim 100 \
201
+ --model_name gainlora_inflora \
202
+ --threshold 0.995 \
203
+ --transthreshold 0.995
204
+
205
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/checkpoint*
206
+
207
+ sleep 5
208
+
209
+
210
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
211
+ --do_train \
212
+ --predict_with_generate \
213
+ --model_name_or_path $2 \
214
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights/trans_input.pt \
215
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights \
216
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights/prompts_keys_till_now.pt \
217
+ --data_dir CL_Benchmark \
218
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
219
+ --gen_data_dir generated_data/lora_gen_long_t5 \
220
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/copa \
221
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa \
222
+ --per_device_train_batch_size 32 \
223
+ --per_device_eval_batch_size 256 \
224
+ --gradient_accumulation_steps 1 \
225
+ --learning_rate 0.0003 \
226
+ --num_train_epochs 10 \
227
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
228
+ --max_source_length 512 \
229
+ --max_target_length 50 \
230
+ --generation_max_length 50 \
231
+ --add_task_name False \
232
+ --add_dataset_name False \
233
+ --overwrite_output_dir \
234
+ --overwrite_cache \
235
+ --lr_scheduler_type constant \
236
+ --warmup_steps 0 \
237
+ --logging_strategy steps \
238
+ --logging_steps 10 \
239
+ --metric_for_best_model eval_exact_match_for_copa \
240
+ --evaluation_strategy steps \
241
+ --save_strategy steps \
242
+ --save_total_limit 1 \
243
+ --load_best_model_at_end \
244
+ --lora_r 8 \
245
+ --lora_alpha 32 \
246
+ --lora_dropout 0.0 \
247
+ --data_replay_freq -1 \
248
+ --kl_ratio 0.1 \
249
+ --attn_temperature 1 \
250
+ --mlp_hidden_dim 100 \
251
+ --model_name gainlora_inflora \
252
+ --threshold 0.995 \
253
+ --transthreshold 0.995
254
+
255
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/checkpoint*
256
+
257
+ sleep 5
258
+
259
+
260
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
261
+ --do_train \
262
+ --predict_with_generate \
263
+ --model_name_or_path $2 \
264
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights/trans_input.pt \
265
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights \
266
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights/prompts_keys_till_now.pt \
267
+ --data_dir CL_Benchmark \
268
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
269
+ --gen_data_dir generated_data/lora_gen_long_t5 \
270
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/qqp \
271
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp \
272
+ --per_device_train_batch_size 32 \
273
+ --per_device_eval_batch_size 256 \
274
+ --gradient_accumulation_steps 1 \
275
+ --learning_rate 0.0003 \
276
+ --num_train_epochs 10 \
277
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
278
+ --max_source_length 512 \
279
+ --max_target_length 50 \
280
+ --generation_max_length 50 \
281
+ --add_task_name False \
282
+ --add_dataset_name False \
283
+ --overwrite_output_dir \
284
+ --overwrite_cache \
285
+ --lr_scheduler_type constant \
286
+ --warmup_steps 0 \
287
+ --logging_strategy steps \
288
+ --logging_steps 10 \
289
+ --metric_for_best_model eval_exact_match_for_qqp \
290
+ --evaluation_strategy steps \
291
+ --save_strategy steps \
292
+ --save_total_limit 1 \
293
+ --load_best_model_at_end \
294
+ --lora_r 8 \
295
+ --lora_alpha 32 \
296
+ --lora_dropout 0.0 \
297
+ --data_replay_freq -1 \
298
+ --kl_ratio 0.1 \
299
+ --attn_temperature 1 \
300
+ --mlp_hidden_dim 100 \
301
+ --model_name gainlora_inflora \
302
+ --threshold 0.995 \
303
+ --transthreshold 0.995
304
+
305
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/checkpoint*
306
+
307
+ sleep 5
308
+
309
+
310
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
311
+ --do_train \
312
+ --predict_with_generate \
313
+ --model_name_or_path $2 \
314
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights/trans_input.pt \
315
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights \
316
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights/prompts_keys_till_now.pt \
317
+ --data_dir CL_Benchmark \
318
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
319
+ --gen_data_dir generated_data/lora_gen_long_t5 \
320
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/rte \
321
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte \
322
+ --per_device_train_batch_size 32 \
323
+ --per_device_eval_batch_size 256 \
324
+ --gradient_accumulation_steps 1 \
325
+ --learning_rate 0.0003 \
326
+ --num_train_epochs 10 \
327
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
328
+ --max_source_length 512 \
329
+ --max_target_length 50 \
330
+ --generation_max_length 50 \
331
+ --add_task_name False \
332
+ --add_dataset_name False \
333
+ --overwrite_output_dir \
334
+ --overwrite_cache \
335
+ --lr_scheduler_type constant \
336
+ --warmup_steps 0 \
337
+ --logging_strategy steps \
338
+ --logging_steps 10 \
339
+ --metric_for_best_model eval_exact_match_for_rte \
340
+ --evaluation_strategy steps \
341
+ --save_strategy steps \
342
+ --save_total_limit 1 \
343
+ --load_best_model_at_end \
344
+ --lora_r 8 \
345
+ --lora_alpha 32 \
346
+ --lora_dropout 0.0 \
347
+ --data_replay_freq -1 \
348
+ --kl_ratio 0.1 \
349
+ --attn_temperature 1 \
350
+ --mlp_hidden_dim 100 \
351
+ --model_name gainlora_inflora \
352
+ --threshold 0.995 \
353
+ --transthreshold 0.995
354
+
355
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/checkpoint*
356
+
357
+ sleep 5
358
+
359
+
360
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
361
+ --do_train \
362
+ --predict_with_generate \
363
+ --model_name_or_path $2 \
364
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights/trans_input.pt \
365
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights \
366
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights/prompts_keys_till_now.pt \
367
+ --data_dir CL_Benchmark \
368
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
369
+ --gen_data_dir generated_data/lora_gen_long_t5 \
370
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/imdb \
371
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb \
372
+ --per_device_train_batch_size 32 \
373
+ --per_device_eval_batch_size 256 \
374
+ --gradient_accumulation_steps 1 \
375
+ --learning_rate 0.0003 \
376
+ --num_train_epochs 10 \
377
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
378
+ --max_source_length 512 \
379
+ --max_target_length 50 \
380
+ --generation_max_length 50 \
381
+ --add_task_name False \
382
+ --add_dataset_name False \
383
+ --overwrite_output_dir \
384
+ --overwrite_cache \
385
+ --lr_scheduler_type constant \
386
+ --warmup_steps 0 \
387
+ --logging_strategy steps \
388
+ --logging_steps 10 \
389
+ --metric_for_best_model eval_exact_match_for_imdb \
390
+ --evaluation_strategy steps \
391
+ --save_strategy steps \
392
+ --save_total_limit 1 \
393
+ --load_best_model_at_end \
394
+ --lora_r 8 \
395
+ --lora_alpha 32 \
396
+ --lora_dropout 0.0 \
397
+ --data_replay_freq -1 \
398
+ --kl_ratio 0.1 \
399
+ --attn_temperature 1 \
400
+ --mlp_hidden_dim 100 \
401
+ --model_name gainlora_inflora \
402
+ --threshold 0.995 \
403
+ --transthreshold 0.995
404
+
405
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/checkpoint*
406
+
407
+ sleep 5
408
+
409
+
410
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
411
+ --do_train \
412
+ --predict_with_generate \
413
+ --model_name_or_path $2 \
414
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights/trans_input.pt \
415
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights \
416
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights/prompts_keys_till_now.pt \
417
+ --data_dir CL_Benchmark \
418
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
419
+ --gen_data_dir generated_data/lora_gen_long_t5 \
420
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/sst2 \
421
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2 \
422
+ --per_device_train_batch_size 32 \
423
+ --per_device_eval_batch_size 256 \
424
+ --gradient_accumulation_steps 1 \
425
+ --learning_rate 0.0003 \
426
+ --num_train_epochs 10 \
427
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
428
+ --max_source_length 512 \
429
+ --max_target_length 50 \
430
+ --generation_max_length 50 \
431
+ --add_task_name False \
432
+ --add_dataset_name False \
433
+ --overwrite_output_dir \
434
+ --overwrite_cache \
435
+ --lr_scheduler_type constant \
436
+ --warmup_steps 0 \
437
+ --logging_strategy steps \
438
+ --logging_steps 10 \
439
+ --metric_for_best_model eval_exact_match_for_sst2 \
440
+ --evaluation_strategy steps \
441
+ --save_strategy steps \
442
+ --save_total_limit 1 \
443
+ --load_best_model_at_end \
444
+ --lora_r 8 \
445
+ --lora_alpha 32 \
446
+ --lora_dropout 0.0 \
447
+ --data_replay_freq -1 \
448
+ --kl_ratio 0.1 \
449
+ --attn_temperature 1 \
450
+ --mlp_hidden_dim 100 \
451
+ --model_name gainlora_inflora \
452
+ --threshold 0.995 \
453
+ --transthreshold 0.995
454
+
455
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/checkpoint*
456
+
457
+ sleep 5
458
+
459
+
460
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
461
+ --do_train \
462
+ --predict_with_generate \
463
+ --model_name_or_path $2 \
464
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights/trans_input.pt \
465
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights \
466
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights/prompts_keys_till_now.pt \
467
+ --data_dir CL_Benchmark \
468
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
469
+ --gen_data_dir generated_data/lora_gen_long_t5 \
470
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/dbpedia \
471
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia \
472
+ --per_device_train_batch_size 32 \
473
+ --per_device_eval_batch_size 256 \
474
+ --gradient_accumulation_steps 1 \
475
+ --learning_rate 0.0003 \
476
+ --num_train_epochs 10 \
477
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
478
+ --max_source_length 512 \
479
+ --max_target_length 50 \
480
+ --generation_max_length 50 \
481
+ --add_task_name False \
482
+ --add_dataset_name False \
483
+ --overwrite_output_dir \
484
+ --overwrite_cache \
485
+ --lr_scheduler_type constant \
486
+ --warmup_steps 0 \
487
+ --logging_strategy steps \
488
+ --logging_steps 10 \
489
+ --metric_for_best_model eval_exact_match_for_dbpedia \
490
+ --evaluation_strategy steps \
491
+ --save_strategy steps \
492
+ --save_total_limit 1 \
493
+ --load_best_model_at_end \
494
+ --lora_r 8 \
495
+ --lora_alpha 32 \
496
+ --lora_dropout 0.0 \
497
+ --data_replay_freq -1 \
498
+ --kl_ratio 0.1 \
499
+ --attn_temperature 1 \
500
+ --mlp_hidden_dim 100 \
501
+ --model_name gainlora_inflora \
502
+ --threshold 0.995 \
503
+ --transthreshold 0.995
504
+
505
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/checkpoint*
506
+
507
+ sleep 5
508
+
509
+
510
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
511
+ --do_train \
512
+ --predict_with_generate \
513
+ --model_name_or_path $2 \
514
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights/trans_input.pt \
515
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights \
516
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights/prompts_keys_till_now.pt \
517
+ --data_dir CL_Benchmark \
518
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
519
+ --gen_data_dir generated_data/lora_gen_long_t5 \
520
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/agnews \
521
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews \
522
+ --per_device_train_batch_size 32 \
523
+ --per_device_eval_batch_size 256 \
524
+ --gradient_accumulation_steps 1 \
525
+ --learning_rate 0.0003 \
526
+ --num_train_epochs 10 \
527
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
528
+ --max_source_length 512 \
529
+ --max_target_length 50 \
530
+ --generation_max_length 50 \
531
+ --add_task_name False \
532
+ --add_dataset_name False \
533
+ --overwrite_output_dir \
534
+ --overwrite_cache \
535
+ --lr_scheduler_type constant \
536
+ --warmup_steps 0 \
537
+ --logging_strategy steps \
538
+ --logging_steps 10 \
539
+ --metric_for_best_model eval_exact_match_for_agnews \
540
+ --evaluation_strategy steps \
541
+ --save_strategy steps \
542
+ --save_total_limit 1 \
543
+ --load_best_model_at_end \
544
+ --lora_r 8 \
545
+ --lora_alpha 32 \
546
+ --lora_dropout 0.0 \
547
+ --data_replay_freq -1 \
548
+ --kl_ratio 0.1 \
549
+ --attn_temperature 1 \
550
+ --mlp_hidden_dim 100 \
551
+ --model_name gainlora_inflora \
552
+ --threshold 0.995 \
553
+ --transthreshold 0.995
554
+
555
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/checkpoint*
556
+
557
+ sleep 5
558
+
559
+
560
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
561
+ --do_train \
562
+ --predict_with_generate \
563
+ --model_name_or_path $2 \
564
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights/trans_input.pt \
565
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights \
566
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights/prompts_keys_till_now.pt \
567
+ --data_dir CL_Benchmark \
568
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
569
+ --gen_data_dir generated_data/lora_gen_long_t5 \
570
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/yahoo \
571
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo \
572
+ --per_device_train_batch_size 32 \
573
+ --per_device_eval_batch_size 256 \
574
+ --gradient_accumulation_steps 1 \
575
+ --learning_rate 0.0003 \
576
+ --num_train_epochs 10 \
577
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
578
+ --max_source_length 512 \
579
+ --max_target_length 50 \
580
+ --generation_max_length 50 \
581
+ --add_task_name False \
582
+ --add_dataset_name False \
583
+ --overwrite_output_dir \
584
+ --overwrite_cache \
585
+ --lr_scheduler_type constant \
586
+ --warmup_steps 0 \
587
+ --logging_strategy steps \
588
+ --logging_steps 10 \
589
+ --metric_for_best_model eval_exact_match_for_yahoo \
590
+ --evaluation_strategy steps \
591
+ --save_strategy steps \
592
+ --save_total_limit 1 \
593
+ --load_best_model_at_end \
594
+ --lora_r 8 \
595
+ --lora_alpha 32 \
596
+ --lora_dropout 0.0 \
597
+ --data_replay_freq -1 \
598
+ --kl_ratio 0.1 \
599
+ --attn_temperature 1 \
600
+ --mlp_hidden_dim 100 \
601
+ --model_name gainlora_inflora \
602
+ --threshold 0.995 \
603
+ --transthreshold 0.995
604
+
605
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/checkpoint*
606
+
607
+ sleep 5
608
+
609
+
610
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
611
+ --do_train \
612
+ --predict_with_generate \
613
+ --model_name_or_path $2 \
614
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/saved_weights/trans_input.pt \
615
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/saved_weights \
616
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/saved_weights/prompts_keys_till_now.pt \
617
+ --data_dir CL_Benchmark \
618
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
619
+ --gen_data_dir generated_data/lora_gen_long_t5 \
620
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/multirc \
621
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc \
622
+ --per_device_train_batch_size 32 \
623
+ --per_device_eval_batch_size 256 \
624
+ --gradient_accumulation_steps 1 \
625
+ --learning_rate 0.0003 \
626
+ --num_train_epochs 10 \
627
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
628
+ --max_source_length 512 \
629
+ --max_target_length 50 \
630
+ --generation_max_length 50 \
631
+ --add_task_name False \
632
+ --add_dataset_name False \
633
+ --overwrite_output_dir \
634
+ --overwrite_cache \
635
+ --lr_scheduler_type constant \
636
+ --warmup_steps 0 \
637
+ --logging_strategy steps \
638
+ --logging_steps 10 \
639
+ --metric_for_best_model eval_exact_match_for_multirc \
640
+ --evaluation_strategy steps \
641
+ --save_strategy steps \
642
+ --save_total_limit 1 \
643
+ --load_best_model_at_end \
644
+ --lora_r 8 \
645
+ --lora_alpha 32 \
646
+ --lora_dropout 0.0 \
647
+ --data_replay_freq -1 \
648
+ --kl_ratio 0.1 \
649
+ --attn_temperature 1 \
650
+ --mlp_hidden_dim 100 \
651
+ --model_name gainlora_inflora \
652
+ --threshold 0.995 \
653
+ --transthreshold 0.995
654
+
655
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc/checkpoint*
656
+
657
+ sleep 5
658
+
659
+
660
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
661
+ --do_train \
662
+ --predict_with_generate \
663
+ --model_name_or_path $2 \
664
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc/saved_weights/trans_input.pt \
665
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc/saved_weights \
666
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc/saved_weights/prompts_keys_till_now.pt \
667
+ --data_dir CL_Benchmark \
668
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
669
+ --gen_data_dir generated_data/lora_gen_long_t5 \
670
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/boolq \
671
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/14-boolq \
672
+ --per_device_train_batch_size 32 \
673
+ --per_device_eval_batch_size 256 \
674
+ --gradient_accumulation_steps 1 \
675
+ --learning_rate 0.0003 \
676
+ --num_train_epochs 10 \
677
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
678
+ --max_source_length 512 \
679
+ --max_target_length 50 \
680
+ --generation_max_length 50 \
681
+ --add_task_name False \
682
+ --add_dataset_name False \
683
+ --overwrite_output_dir \
684
+ --overwrite_cache \
685
+ --lr_scheduler_type constant \
686
+ --warmup_steps 0 \
687
+ --logging_strategy steps \
688
+ --logging_steps 10 \
689
+ --metric_for_best_model eval_exact_match_for_boolq \
690
+ --evaluation_strategy steps \
691
+ --save_strategy steps \
692
+ --save_total_limit 1 \
693
+ --load_best_model_at_end \
694
+ --lora_r 8 \
695
+ --lora_alpha 32 \
696
+ --lora_dropout 0.0 \
697
+ --data_replay_freq -1 \
698
+ --kl_ratio 0.1 \
699
+ --attn_temperature 1 \
700
+ --mlp_hidden_dim 100 \
701
+ --model_name gainlora_inflora \
702
+ --threshold 0.995 \
703
+ --transthreshold 0.995
704
+
705
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/14-boolq/checkpoint*
706
+
707
+ sleep 5
708
+
709
+
710
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
711
+ --do_train \
712
+ --do_predict \
713
+ --predict_with_generate \
714
+ --model_name_or_path $2 \
715
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/14-boolq/saved_weights/trans_input.pt \
716
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/14-boolq/saved_weights \
717
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/14-boolq/saved_weights/prompts_keys_till_now.pt \
718
+ --data_dir CL_Benchmark \
719
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
720
+ --gen_data_dir generated_data/lora_gen_long_t5 \
721
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/wic \
722
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/15-wic \
723
+ --per_device_train_batch_size 32 \
724
+ --per_device_eval_batch_size 256 \
725
+ --gradient_accumulation_steps 1 \
726
+ --learning_rate 0.0003 \
727
+ --num_train_epochs 10 \
728
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
729
+ --max_source_length 512 \
730
+ --max_target_length 50 \
731
+ --generation_max_length 50 \
732
+ --add_task_name False \
733
+ --add_dataset_name False \
734
+ --overwrite_output_dir \
735
+ --overwrite_cache \
736
+ --lr_scheduler_type constant \
737
+ --warmup_steps 0 \
738
+ --logging_strategy steps \
739
+ --logging_steps 10 \
740
+ --metric_for_best_model eval_exact_match_for_wic \
741
+ --evaluation_strategy steps \
742
+ --save_strategy steps \
743
+ --save_total_limit 1 \
744
+ --load_best_model_at_end \
745
+ --lora_r 8 \
746
+ --lora_alpha 32 \
747
+ --lora_dropout 0.0 \
748
+ --data_replay_freq -1 \
749
+ --kl_ratio 0.1 \
750
+ --attn_temperature 1 \
751
+ --mlp_hidden_dim 100 \
752
+ --model_name gainlora_inflora \
753
+ --threshold 0.995 \
754
+ --transthreshold 0.995
755
+
756
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/15-wic/checkpoint*
757
+
758
+ sleep 5
759
+
760
+ CUDA_VISIBLE_DEVICES=$1 python score.py gen_script_long_order3_t5_small_gainlora_inflora gen_script_long_order3_t5_small_gainlora_inflora
761
+
762
+
763
+
improve_gainlora/T5_small/gen_script_long_order3_t5_small_inflora.sh ADDED
@@ -0,0 +1,744 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
21
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/yelp \
22
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp \
23
+ --per_device_train_batch_size 32 \
24
+ --per_device_eval_batch_size 256 \
25
+ --gradient_accumulation_steps 1 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 10 \
28
+ --run_name gen_script_long_order3_t5_small_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_exact_match \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --load_best_model_at_end \
45
+ --lora_r 8 \
46
+ --lora_alpha 32 \
47
+ --lora_dropout 0.0 \
48
+ --add_instruction_replay \
49
+ --data_replay_freq -1 \
50
+ --replay_after_n_epoch 0 \
51
+ --model_name inflora \
52
+ --threshold 0.995
53
+
54
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/checkpoint*
55
+
56
+ sleep 5
57
+
58
+
59
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
60
+ --do_train \
61
+ --do_predict \
62
+ --predict_with_generate \
63
+ --model_name_or_path $2 \
64
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights/trans_input.pt \
65
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights \
66
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights/prompts_keys_till_now.pt \
67
+ --data_dir CL_Benchmark \
68
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
69
+ --gen_data_dir generated_data/lora_gen_long_t5 \
70
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/amazon \
71
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon \
72
+ --per_device_train_batch_size 32 \
73
+ --per_device_eval_batch_size 256 \
74
+ --gradient_accumulation_steps 1 \
75
+ --learning_rate 0.0003 \
76
+ --num_train_epochs 10 \
77
+ --run_name gen_script_long_order3_t5_small_inflora \
78
+ --max_source_length 512 \
79
+ --max_target_length 50 \
80
+ --generation_max_length 50 \
81
+ --add_task_name False \
82
+ --add_dataset_name False \
83
+ --overwrite_output_dir \
84
+ --overwrite_cache \
85
+ --lr_scheduler_type constant \
86
+ --warmup_steps 0 \
87
+ --logging_strategy steps \
88
+ --logging_steps 10 \
89
+ --metric_for_best_model eval_exact_match_for_amazon \
90
+ --evaluation_strategy steps \
91
+ --save_strategy steps \
92
+ --save_total_limit 1 \
93
+ --load_best_model_at_end \
94
+ --lora_r 8 \
95
+ --lora_alpha 32 \
96
+ --lora_dropout 0.0 \
97
+ --data_replay_freq -1 \
98
+ --kl_ratio 0.1 \
99
+ --attn_temperature 1 \
100
+ --model_name inflora \
101
+ --threshold 0.995
102
+
103
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/checkpoint*
104
+
105
+ sleep 5
106
+
107
+
108
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
109
+ --do_train \
110
+ --do_predict \
111
+ --predict_with_generate \
112
+ --model_name_or_path $2 \
113
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights/trans_input.pt \
114
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights \
115
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights/prompts_keys_till_now.pt \
116
+ --data_dir CL_Benchmark \
117
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
118
+ --gen_data_dir generated_data/lora_gen_long_t5 \
119
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/mnli \
120
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli \
121
+ --per_device_train_batch_size 32 \
122
+ --per_device_eval_batch_size 256 \
123
+ --gradient_accumulation_steps 1 \
124
+ --learning_rate 0.0003 \
125
+ --num_train_epochs 10 \
126
+ --run_name gen_script_long_order3_t5_small_inflora \
127
+ --max_source_length 512 \
128
+ --max_target_length 50 \
129
+ --generation_max_length 50 \
130
+ --add_task_name False \
131
+ --add_dataset_name False \
132
+ --overwrite_output_dir \
133
+ --overwrite_cache \
134
+ --lr_scheduler_type constant \
135
+ --warmup_steps 0 \
136
+ --logging_strategy steps \
137
+ --logging_steps 10 \
138
+ --metric_for_best_model eval_exact_match_for_mnli \
139
+ --evaluation_strategy steps \
140
+ --save_strategy steps \
141
+ --save_total_limit 1 \
142
+ --load_best_model_at_end \
143
+ --lora_r 8 \
144
+ --lora_alpha 32 \
145
+ --lora_dropout 0.0 \
146
+ --data_replay_freq -1 \
147
+ --kl_ratio 0.1 \
148
+ --attn_temperature 1 \
149
+ --model_name inflora \
150
+ --threshold 0.995
151
+
152
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/checkpoint*
153
+
154
+ sleep 5
155
+
156
+
157
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
158
+ --do_train \
159
+ --do_predict \
160
+ --predict_with_generate \
161
+ --model_name_or_path $2 \
162
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights/trans_input.pt \
163
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights \
164
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights/prompts_keys_till_now.pt \
165
+ --data_dir CL_Benchmark \
166
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
167
+ --gen_data_dir generated_data/lora_gen_long_t5 \
168
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/cb \
169
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb \
170
+ --per_device_train_batch_size 32 \
171
+ --per_device_eval_batch_size 256 \
172
+ --gradient_accumulation_steps 1 \
173
+ --learning_rate 0.0003 \
174
+ --num_train_epochs 10 \
175
+ --run_name gen_script_long_order3_t5_small_inflora \
176
+ --max_source_length 512 \
177
+ --max_target_length 50 \
178
+ --generation_max_length 50 \
179
+ --add_task_name False \
180
+ --add_dataset_name False \
181
+ --overwrite_output_dir \
182
+ --overwrite_cache \
183
+ --lr_scheduler_type constant \
184
+ --warmup_steps 0 \
185
+ --logging_strategy steps \
186
+ --logging_steps 10 \
187
+ --metric_for_best_model eval_exact_match_for_cb \
188
+ --evaluation_strategy steps \
189
+ --save_strategy steps \
190
+ --save_total_limit 1 \
191
+ --load_best_model_at_end \
192
+ --lora_r 8 \
193
+ --lora_alpha 32 \
194
+ --lora_dropout 0.0 \
195
+ --data_replay_freq -1 \
196
+ --kl_ratio 0.1 \
197
+ --attn_temperature 1 \
198
+ --model_name inflora \
199
+ --threshold 0.995
200
+
201
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/checkpoint*
202
+
203
+ sleep 5
204
+
205
+
206
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
207
+ --do_train \
208
+ --do_predict \
209
+ --predict_with_generate \
210
+ --model_name_or_path $2 \
211
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights/trans_input.pt \
212
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights \
213
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights/prompts_keys_till_now.pt \
214
+ --data_dir CL_Benchmark \
215
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
216
+ --gen_data_dir generated_data/lora_gen_long_t5 \
217
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/copa \
218
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa \
219
+ --per_device_train_batch_size 32 \
220
+ --per_device_eval_batch_size 256 \
221
+ --gradient_accumulation_steps 1 \
222
+ --learning_rate 0.0003 \
223
+ --num_train_epochs 10 \
224
+ --run_name gen_script_long_order3_t5_small_inflora \
225
+ --max_source_length 512 \
226
+ --max_target_length 50 \
227
+ --generation_max_length 50 \
228
+ --add_task_name False \
229
+ --add_dataset_name False \
230
+ --overwrite_output_dir \
231
+ --overwrite_cache \
232
+ --lr_scheduler_type constant \
233
+ --warmup_steps 0 \
234
+ --logging_strategy steps \
235
+ --logging_steps 10 \
236
+ --metric_for_best_model eval_exact_match_for_copa \
237
+ --evaluation_strategy steps \
238
+ --save_strategy steps \
239
+ --save_total_limit 1 \
240
+ --load_best_model_at_end \
241
+ --lora_r 8 \
242
+ --lora_alpha 32 \
243
+ --lora_dropout 0.0 \
244
+ --data_replay_freq -1 \
245
+ --kl_ratio 0.1 \
246
+ --attn_temperature 1 \
247
+ --model_name inflora \
248
+ --threshold 0.995
249
+
250
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/checkpoint*
251
+
252
+ sleep 5
253
+
254
+
255
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
256
+ --do_train \
257
+ --do_predict \
258
+ --predict_with_generate \
259
+ --model_name_or_path $2 \
260
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights/trans_input.pt \
261
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights \
262
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights/prompts_keys_till_now.pt \
263
+ --data_dir CL_Benchmark \
264
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
265
+ --gen_data_dir generated_data/lora_gen_long_t5 \
266
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/qqp \
267
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp \
268
+ --per_device_train_batch_size 32 \
269
+ --per_device_eval_batch_size 256 \
270
+ --gradient_accumulation_steps 1 \
271
+ --learning_rate 0.0003 \
272
+ --num_train_epochs 10 \
273
+ --run_name gen_script_long_order3_t5_small_inflora \
274
+ --max_source_length 512 \
275
+ --max_target_length 50 \
276
+ --generation_max_length 50 \
277
+ --add_task_name False \
278
+ --add_dataset_name False \
279
+ --overwrite_output_dir \
280
+ --overwrite_cache \
281
+ --lr_scheduler_type constant \
282
+ --warmup_steps 0 \
283
+ --logging_strategy steps \
284
+ --logging_steps 10 \
285
+ --metric_for_best_model eval_exact_match_for_qqp \
286
+ --evaluation_strategy steps \
287
+ --save_strategy steps \
288
+ --save_total_limit 1 \
289
+ --load_best_model_at_end \
290
+ --lora_r 8 \
291
+ --lora_alpha 32 \
292
+ --lora_dropout 0.0 \
293
+ --data_replay_freq -1 \
294
+ --kl_ratio 0.1 \
295
+ --attn_temperature 1 \
296
+ --model_name inflora \
297
+ --threshold 0.995
298
+
299
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/checkpoint*
300
+
301
+ sleep 5
302
+
303
+
304
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
305
+ --do_train \
306
+ --do_predict \
307
+ --predict_with_generate \
308
+ --model_name_or_path $2 \
309
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights/trans_input.pt \
310
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights \
311
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights/prompts_keys_till_now.pt \
312
+ --data_dir CL_Benchmark \
313
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
314
+ --gen_data_dir generated_data/lora_gen_long_t5 \
315
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/rte \
316
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte \
317
+ --per_device_train_batch_size 32 \
318
+ --per_device_eval_batch_size 256 \
319
+ --gradient_accumulation_steps 1 \
320
+ --learning_rate 0.0003 \
321
+ --num_train_epochs 10 \
322
+ --run_name gen_script_long_order3_t5_small_inflora \
323
+ --max_source_length 512 \
324
+ --max_target_length 50 \
325
+ --generation_max_length 50 \
326
+ --add_task_name False \
327
+ --add_dataset_name False \
328
+ --overwrite_output_dir \
329
+ --overwrite_cache \
330
+ --lr_scheduler_type constant \
331
+ --warmup_steps 0 \
332
+ --logging_strategy steps \
333
+ --logging_steps 10 \
334
+ --metric_for_best_model eval_exact_match_for_rte \
335
+ --evaluation_strategy steps \
336
+ --save_strategy steps \
337
+ --save_total_limit 1 \
338
+ --load_best_model_at_end \
339
+ --lora_r 8 \
340
+ --lora_alpha 32 \
341
+ --lora_dropout 0.0 \
342
+ --data_replay_freq -1 \
343
+ --kl_ratio 0.1 \
344
+ --attn_temperature 1 \
345
+ --model_name inflora \
346
+ --threshold 0.995
347
+
348
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/checkpoint*
349
+
350
+ sleep 5
351
+
352
+
353
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
354
+ --do_train \
355
+ --do_predict \
356
+ --predict_with_generate \
357
+ --model_name_or_path $2 \
358
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights/trans_input.pt \
359
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights \
360
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights/prompts_keys_till_now.pt \
361
+ --data_dir CL_Benchmark \
362
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
363
+ --gen_data_dir generated_data/lora_gen_long_t5 \
364
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/imdb \
365
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb \
366
+ --per_device_train_batch_size 32 \
367
+ --per_device_eval_batch_size 256 \
368
+ --gradient_accumulation_steps 1 \
369
+ --learning_rate 0.0003 \
370
+ --num_train_epochs 10 \
371
+ --run_name gen_script_long_order3_t5_small_inflora \
372
+ --max_source_length 512 \
373
+ --max_target_length 50 \
374
+ --generation_max_length 50 \
375
+ --add_task_name False \
376
+ --add_dataset_name False \
377
+ --overwrite_output_dir \
378
+ --overwrite_cache \
379
+ --lr_scheduler_type constant \
380
+ --warmup_steps 0 \
381
+ --logging_strategy steps \
382
+ --logging_steps 10 \
383
+ --metric_for_best_model eval_exact_match_for_imdb \
384
+ --evaluation_strategy steps \
385
+ --save_strategy steps \
386
+ --save_total_limit 1 \
387
+ --load_best_model_at_end \
388
+ --lora_r 8 \
389
+ --lora_alpha 32 \
390
+ --lora_dropout 0.0 \
391
+ --data_replay_freq -1 \
392
+ --kl_ratio 0.1 \
393
+ --attn_temperature 1 \
394
+ --model_name inflora \
395
+ --threshold 0.995
396
+
397
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/checkpoint*
398
+
399
+ sleep 5
400
+
401
+
402
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
403
+ --do_train \
404
+ --do_predict \
405
+ --predict_with_generate \
406
+ --model_name_or_path $2 \
407
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights/trans_input.pt \
408
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights \
409
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights/prompts_keys_till_now.pt \
410
+ --data_dir CL_Benchmark \
411
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
412
+ --gen_data_dir generated_data/lora_gen_long_t5 \
413
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/sst2 \
414
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2 \
415
+ --per_device_train_batch_size 32 \
416
+ --per_device_eval_batch_size 256 \
417
+ --gradient_accumulation_steps 1 \
418
+ --learning_rate 0.0003 \
419
+ --num_train_epochs 10 \
420
+ --run_name gen_script_long_order3_t5_small_inflora \
421
+ --max_source_length 512 \
422
+ --max_target_length 50 \
423
+ --generation_max_length 50 \
424
+ --add_task_name False \
425
+ --add_dataset_name False \
426
+ --overwrite_output_dir \
427
+ --overwrite_cache \
428
+ --lr_scheduler_type constant \
429
+ --warmup_steps 0 \
430
+ --logging_strategy steps \
431
+ --logging_steps 10 \
432
+ --metric_for_best_model eval_exact_match_for_sst2 \
433
+ --evaluation_strategy steps \
434
+ --save_strategy steps \
435
+ --save_total_limit 1 \
436
+ --load_best_model_at_end \
437
+ --lora_r 8 \
438
+ --lora_alpha 32 \
439
+ --lora_dropout 0.0 \
440
+ --data_replay_freq -1 \
441
+ --kl_ratio 0.1 \
442
+ --attn_temperature 1 \
443
+ --model_name inflora \
444
+ --threshold 0.995
445
+
446
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/checkpoint*
447
+
448
+ sleep 5
449
+
450
+
451
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
452
+ --do_train \
453
+ --do_predict \
454
+ --predict_with_generate \
455
+ --model_name_or_path $2 \
456
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights/trans_input.pt \
457
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights \
458
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights/prompts_keys_till_now.pt \
459
+ --data_dir CL_Benchmark \
460
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
461
+ --gen_data_dir generated_data/lora_gen_long_t5 \
462
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/dbpedia \
463
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia \
464
+ --per_device_train_batch_size 32 \
465
+ --per_device_eval_batch_size 256 \
466
+ --gradient_accumulation_steps 1 \
467
+ --learning_rate 0.0003 \
468
+ --num_train_epochs 10 \
469
+ --run_name gen_script_long_order3_t5_small_inflora \
470
+ --max_source_length 512 \
471
+ --max_target_length 50 \
472
+ --generation_max_length 50 \
473
+ --add_task_name False \
474
+ --add_dataset_name False \
475
+ --overwrite_output_dir \
476
+ --overwrite_cache \
477
+ --lr_scheduler_type constant \
478
+ --warmup_steps 0 \
479
+ --logging_strategy steps \
480
+ --logging_steps 10 \
481
+ --metric_for_best_model eval_exact_match_for_dbpedia \
482
+ --evaluation_strategy steps \
483
+ --save_strategy steps \
484
+ --save_total_limit 1 \
485
+ --load_best_model_at_end \
486
+ --lora_r 8 \
487
+ --lora_alpha 32 \
488
+ --lora_dropout 0.0 \
489
+ --data_replay_freq -1 \
490
+ --kl_ratio 0.1 \
491
+ --attn_temperature 1 \
492
+ --model_name inflora \
493
+ --threshold 0.995
494
+
495
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/checkpoint*
496
+
497
+ sleep 5
498
+
499
+
500
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
501
+ --do_train \
502
+ --do_predict \
503
+ --predict_with_generate \
504
+ --model_name_or_path $2 \
505
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights/trans_input.pt \
506
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights \
507
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights/prompts_keys_till_now.pt \
508
+ --data_dir CL_Benchmark \
509
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
510
+ --gen_data_dir generated_data/lora_gen_long_t5 \
511
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/agnews \
512
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews \
513
+ --per_device_train_batch_size 32 \
514
+ --per_device_eval_batch_size 256 \
515
+ --gradient_accumulation_steps 1 \
516
+ --learning_rate 0.0003 \
517
+ --num_train_epochs 10 \
518
+ --run_name gen_script_long_order3_t5_small_inflora \
519
+ --max_source_length 512 \
520
+ --max_target_length 50 \
521
+ --generation_max_length 50 \
522
+ --add_task_name False \
523
+ --add_dataset_name False \
524
+ --overwrite_output_dir \
525
+ --overwrite_cache \
526
+ --lr_scheduler_type constant \
527
+ --warmup_steps 0 \
528
+ --logging_strategy steps \
529
+ --logging_steps 10 \
530
+ --metric_for_best_model eval_exact_match_for_agnews \
531
+ --evaluation_strategy steps \
532
+ --save_strategy steps \
533
+ --save_total_limit 1 \
534
+ --load_best_model_at_end \
535
+ --lora_r 8 \
536
+ --lora_alpha 32 \
537
+ --lora_dropout 0.0 \
538
+ --data_replay_freq -1 \
539
+ --kl_ratio 0.1 \
540
+ --attn_temperature 1 \
541
+ --model_name inflora \
542
+ --threshold 0.995
543
+
544
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/checkpoint*
545
+
546
+ sleep 5
547
+
548
+
549
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
550
+ --do_train \
551
+ --do_predict \
552
+ --predict_with_generate \
553
+ --model_name_or_path $2 \
554
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights/trans_input.pt \
555
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights \
556
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights/prompts_keys_till_now.pt \
557
+ --data_dir CL_Benchmark \
558
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
559
+ --gen_data_dir generated_data/lora_gen_long_t5 \
560
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/yahoo \
561
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo \
562
+ --per_device_train_batch_size 32 \
563
+ --per_device_eval_batch_size 256 \
564
+ --gradient_accumulation_steps 1 \
565
+ --learning_rate 0.0003 \
566
+ --num_train_epochs 10 \
567
+ --run_name gen_script_long_order3_t5_small_inflora \
568
+ --max_source_length 512 \
569
+ --max_target_length 50 \
570
+ --generation_max_length 50 \
571
+ --add_task_name False \
572
+ --add_dataset_name False \
573
+ --overwrite_output_dir \
574
+ --overwrite_cache \
575
+ --lr_scheduler_type constant \
576
+ --warmup_steps 0 \
577
+ --logging_strategy steps \
578
+ --logging_steps 10 \
579
+ --metric_for_best_model eval_exact_match_for_yahoo \
580
+ --evaluation_strategy steps \
581
+ --save_strategy steps \
582
+ --save_total_limit 1 \
583
+ --load_best_model_at_end \
584
+ --lora_r 8 \
585
+ --lora_alpha 32 \
586
+ --lora_dropout 0.0 \
587
+ --data_replay_freq -1 \
588
+ --kl_ratio 0.1 \
589
+ --attn_temperature 1 \
590
+ --model_name inflora \
591
+ --threshold 0.995
592
+
593
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/checkpoint*
594
+
595
+ sleep 5
596
+
597
+
598
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
599
+ --do_train \
600
+ --do_predict \
601
+ --predict_with_generate \
602
+ --model_name_or_path $2 \
603
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/saved_weights/trans_input.pt \
604
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/saved_weights \
605
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/saved_weights/prompts_keys_till_now.pt \
606
+ --data_dir CL_Benchmark \
607
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
608
+ --gen_data_dir generated_data/lora_gen_long_t5 \
609
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/multirc \
610
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc \
611
+ --per_device_train_batch_size 32 \
612
+ --per_device_eval_batch_size 256 \
613
+ --gradient_accumulation_steps 1 \
614
+ --learning_rate 0.0003 \
615
+ --num_train_epochs 10 \
616
+ --run_name gen_script_long_order3_t5_small_inflora \
617
+ --max_source_length 512 \
618
+ --max_target_length 50 \
619
+ --generation_max_length 50 \
620
+ --add_task_name False \
621
+ --add_dataset_name False \
622
+ --overwrite_output_dir \
623
+ --overwrite_cache \
624
+ --lr_scheduler_type constant \
625
+ --warmup_steps 0 \
626
+ --logging_strategy steps \
627
+ --logging_steps 10 \
628
+ --metric_for_best_model eval_exact_match_for_multirc \
629
+ --evaluation_strategy steps \
630
+ --save_strategy steps \
631
+ --save_total_limit 1 \
632
+ --load_best_model_at_end \
633
+ --lora_r 8 \
634
+ --lora_alpha 32 \
635
+ --lora_dropout 0.0 \
636
+ --data_replay_freq -1 \
637
+ --kl_ratio 0.1 \
638
+ --attn_temperature 1 \
639
+ --model_name inflora \
640
+ --threshold 0.995
641
+
642
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc/checkpoint*
643
+
644
+ sleep 5
645
+
646
+
647
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
648
+ --do_train \
649
+ --do_predict \
650
+ --predict_with_generate \
651
+ --model_name_or_path $2 \
652
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc/saved_weights/trans_input.pt \
653
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc/saved_weights \
654
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc/saved_weights/prompts_keys_till_now.pt \
655
+ --data_dir CL_Benchmark \
656
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
657
+ --gen_data_dir generated_data/lora_gen_long_t5 \
658
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/boolq \
659
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/14-boolq \
660
+ --per_device_train_batch_size 32 \
661
+ --per_device_eval_batch_size 256 \
662
+ --gradient_accumulation_steps 1 \
663
+ --learning_rate 0.0003 \
664
+ --num_train_epochs 10 \
665
+ --run_name gen_script_long_order3_t5_small_inflora \
666
+ --max_source_length 512 \
667
+ --max_target_length 50 \
668
+ --generation_max_length 50 \
669
+ --add_task_name False \
670
+ --add_dataset_name False \
671
+ --overwrite_output_dir \
672
+ --overwrite_cache \
673
+ --lr_scheduler_type constant \
674
+ --warmup_steps 0 \
675
+ --logging_strategy steps \
676
+ --logging_steps 10 \
677
+ --metric_for_best_model eval_exact_match_for_boolq \
678
+ --evaluation_strategy steps \
679
+ --save_strategy steps \
680
+ --save_total_limit 1 \
681
+ --load_best_model_at_end \
682
+ --lora_r 8 \
683
+ --lora_alpha 32 \
684
+ --lora_dropout 0.0 \
685
+ --data_replay_freq -1 \
686
+ --kl_ratio 0.1 \
687
+ --attn_temperature 1 \
688
+ --model_name inflora \
689
+ --threshold 0.995
690
+
691
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/14-boolq/checkpoint*
692
+
693
+ sleep 5
694
+
695
+
696
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
697
+ --do_train \
698
+ --do_predict \
699
+ --predict_with_generate \
700
+ --model_name_or_path $2 \
701
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/14-boolq/saved_weights/trans_input.pt \
702
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/14-boolq/saved_weights \
703
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/14-boolq/saved_weights/prompts_keys_till_now.pt \
704
+ --data_dir CL_Benchmark \
705
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
706
+ --gen_data_dir generated_data/lora_gen_long_t5 \
707
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/wic \
708
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/15-wic \
709
+ --per_device_train_batch_size 32 \
710
+ --per_device_eval_batch_size 256 \
711
+ --gradient_accumulation_steps 1 \
712
+ --learning_rate 0.0003 \
713
+ --num_train_epochs 10 \
714
+ --run_name gen_script_long_order3_t5_small_inflora \
715
+ --max_source_length 512 \
716
+ --max_target_length 50 \
717
+ --generation_max_length 50 \
718
+ --add_task_name False \
719
+ --add_dataset_name False \
720
+ --overwrite_output_dir \
721
+ --overwrite_cache \
722
+ --lr_scheduler_type constant \
723
+ --warmup_steps 0 \
724
+ --logging_strategy steps \
725
+ --logging_steps 10 \
726
+ --metric_for_best_model eval_exact_match_for_wic \
727
+ --evaluation_strategy steps \
728
+ --save_strategy steps \
729
+ --save_total_limit 1 \
730
+ --load_best_model_at_end \
731
+ --lora_r 8 \
732
+ --lora_alpha 32 \
733
+ --lora_dropout 0.0 \
734
+ --data_replay_freq -1 \
735
+ --kl_ratio 0.1 \
736
+ --attn_temperature 1 \
737
+ --model_name inflora \
738
+ --threshold 0.995
739
+
740
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/15-wic/checkpoint*
741
+
742
+ sleep 5
743
+
744
+ CUDA_VISIBLE_DEVICES=$1 python score.py gen_script_long_order3_t5_small_inflora gen_script_long_order3_t5_small_inflora
improve_gainlora/T5_small/gen_script_long_order3_t5_small_specroute.sh ADDED
@@ -0,0 +1,849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:2
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ # ============================================================
15
+ # Auto-detect GPU count and type for optimal parallelism
16
+ # ============================================================
17
+ NUM_GPUS=$(nvidia-smi -L 2>/dev/null | wc -l)
18
+ GPU_MEM=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits 2>/dev/null | head -1)
19
+
20
+ if [ -z "$GPU_MEM" ]; then
21
+ echo "ERROR: No GPU detected!"
22
+ exit 1
23
+ fi
24
+
25
+ # Determine GPU type
26
+ if [ "$GPU_MEM" -lt 20000 ]; then
27
+ IS_T4=1
28
+ echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
29
+ else
30
+ IS_T4=0
31
+ echo "[GPU] Detected high-memory GPUs (${GPU_MEM}MB VRAM each)"
32
+ fi
33
+
34
+ # Determine parallelism strategy
35
+ if [ "$IS_T4" -eq 1 ] && [ "$NUM_GPUS" -ge 2 ]; then
36
+ GPU_MODE="t4_2gpu"
37
+ GPU_IDS="0,1"
38
+ FP16_FLAG=""
39
+ echo "[GPU] Strategy: 2x T4 DataParallel + fp32 + gradient_checkpointing"
40
+ elif [ "$IS_T4" -eq 1 ]; then
41
+ GPU_MODE="t4_1gpu"
42
+ GPU_IDS="${1:-0}"
43
+ FP16_FLAG=""
44
+ echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
45
+ else
46
+ GPU_MODE="a100"
47
+ GPU_IDS="${1:-0}"
48
+ FP16_FLAG=""
49
+ echo "[GPU] Strategy: A100 (single GPU, fp32)"
50
+ fi
51
+
52
+ echo "[GPU] Using CUDA_VISIBLE_DEVICES=$GPU_IDS"
53
+ echo "============================================================"
54
+ echo ""
55
+
56
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
57
+ BSZ=16; GA=1; EVAL_BSZ=256
58
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
59
+ BSZ=32; GA=1; EVAL_BSZ=256
60
+ else
61
+ BSZ=64; GA=1; EVAL_BSZ=512
62
+ fi
63
+
64
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
65
+ --do_train \
66
+ --predict_with_generate \
67
+ --model_name_or_path $2 \
68
+ --data_dir CL_Benchmark \
69
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
70
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/yelp \
71
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp \
72
+ --per_device_train_batch_size $BSZ \
73
+ --per_device_eval_batch_size $EVAL_BSZ \
74
+ --gradient_accumulation_steps $GA \
75
+ --learning_rate 0.0003 \
76
+ --num_train_epochs 10 \
77
+ --run_name gen_script_long_order3_t5_small_specroute \
78
+ --max_source_length 512 \
79
+ --max_target_length 50 \
80
+ --generation_max_length 50 \
81
+ --add_task_name False \
82
+ --add_dataset_name False \
83
+ --overwrite_output_dir \
84
+ --overwrite_cache \
85
+ --lr_scheduler_type constant \
86
+ --warmup_steps 0 \
87
+ --logging_strategy steps \
88
+ --logging_steps 10 \
89
+ --metric_for_best_model eval_exact_match \
90
+ --evaluation_strategy steps \
91
+ --save_strategy steps \
92
+ --save_total_limit 1 \
93
+ --load_best_model_at_end \
94
+ --lora_r 8 \
95
+ --lora_alpha 32 \
96
+ --lora_dropout 0.0 \
97
+ --run_single True \
98
+ --data_replay_freq -1 \
99
+ --mlp_hidden_dim 100 \
100
+ --model_name specroute \
101
+ --threshold 0.995 \
102
+ --transthreshold 0.995 \
103
+ $FP16_FLAG
104
+
105
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/checkpoint*
106
+
107
+ sleep 5
108
+
109
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
110
+ BSZ=16; GA=1; EVAL_BSZ=256
111
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
112
+ BSZ=32; GA=1; EVAL_BSZ=256
113
+ else
114
+ BSZ=64; GA=1; EVAL_BSZ=512
115
+ fi
116
+
117
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
118
+ --do_train \
119
+ --predict_with_generate \
120
+ --model_name_or_path $2 \
121
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights \
122
+ --data_dir CL_Benchmark \
123
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
124
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/amazon \
125
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon \
126
+ --per_device_train_batch_size $BSZ \
127
+ --per_device_eval_batch_size $EVAL_BSZ \
128
+ --gradient_accumulation_steps $GA \
129
+ --learning_rate 0.0003 \
130
+ --num_train_epochs 10 \
131
+ --run_name gen_script_long_order3_t5_small_specroute \
132
+ --max_source_length 512 \
133
+ --max_target_length 50 \
134
+ --generation_max_length 50 \
135
+ --add_task_name False \
136
+ --add_dataset_name False \
137
+ --overwrite_output_dir \
138
+ --overwrite_cache \
139
+ --lr_scheduler_type constant \
140
+ --warmup_steps 0 \
141
+ --logging_strategy steps \
142
+ --logging_steps 10 \
143
+ --metric_for_best_model eval_exact_match_for_amazon \
144
+ --evaluation_strategy steps \
145
+ --save_strategy steps \
146
+ --save_total_limit 1 \
147
+ --load_best_model_at_end \
148
+ --lora_r 8 \
149
+ --lora_alpha 32 \
150
+ --lora_dropout 0.0 \
151
+ --data_replay_freq -1 \
152
+ --mlp_hidden_dim 100 \
153
+ --model_name specroute \
154
+ --threshold 0.995 \
155
+ --transthreshold 0.995 \
156
+ $FP16_FLAG
157
+
158
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/checkpoint*
159
+
160
+ sleep 5
161
+
162
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
163
+ BSZ=16; GA=1; EVAL_BSZ=256
164
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
165
+ BSZ=32; GA=1; EVAL_BSZ=256
166
+ else
167
+ BSZ=64; GA=1; EVAL_BSZ=512
168
+ fi
169
+
170
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
171
+ --do_train \
172
+ --predict_with_generate \
173
+ --model_name_or_path $2 \
174
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights \
175
+ --data_dir CL_Benchmark \
176
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
177
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/mnli \
178
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli \
179
+ --per_device_train_batch_size $BSZ \
180
+ --per_device_eval_batch_size $EVAL_BSZ \
181
+ --gradient_accumulation_steps $GA \
182
+ --learning_rate 0.0003 \
183
+ --num_train_epochs 10 \
184
+ --run_name gen_script_long_order3_t5_small_specroute \
185
+ --max_source_length 512 \
186
+ --max_target_length 50 \
187
+ --generation_max_length 50 \
188
+ --add_task_name False \
189
+ --add_dataset_name False \
190
+ --overwrite_output_dir \
191
+ --overwrite_cache \
192
+ --lr_scheduler_type constant \
193
+ --warmup_steps 0 \
194
+ --logging_strategy steps \
195
+ --logging_steps 10 \
196
+ --metric_for_best_model eval_exact_match_for_mnli \
197
+ --evaluation_strategy steps \
198
+ --save_strategy steps \
199
+ --save_total_limit 1 \
200
+ --load_best_model_at_end \
201
+ --lora_r 8 \
202
+ --lora_alpha 32 \
203
+ --lora_dropout 0.0 \
204
+ --data_replay_freq -1 \
205
+ --mlp_hidden_dim 100 \
206
+ --model_name specroute \
207
+ --threshold 0.995 \
208
+ --transthreshold 0.995 \
209
+ $FP16_FLAG
210
+
211
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/checkpoint*
212
+
213
+ sleep 5
214
+
215
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
216
+ BSZ=16; GA=1; EVAL_BSZ=256
217
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
218
+ BSZ=32; GA=1; EVAL_BSZ=256
219
+ else
220
+ BSZ=64; GA=1; EVAL_BSZ=512
221
+ fi
222
+
223
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
224
+ --do_train \
225
+ --predict_with_generate \
226
+ --model_name_or_path $2 \
227
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights \
228
+ --data_dir CL_Benchmark \
229
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
230
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/cb \
231
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb \
232
+ --per_device_train_batch_size $BSZ \
233
+ --per_device_eval_batch_size $EVAL_BSZ \
234
+ --gradient_accumulation_steps $GA \
235
+ --learning_rate 0.0003 \
236
+ --num_train_epochs 10 \
237
+ --run_name gen_script_long_order3_t5_small_specroute \
238
+ --max_source_length 512 \
239
+ --max_target_length 50 \
240
+ --generation_max_length 50 \
241
+ --add_task_name False \
242
+ --add_dataset_name False \
243
+ --overwrite_output_dir \
244
+ --overwrite_cache \
245
+ --lr_scheduler_type constant \
246
+ --warmup_steps 0 \
247
+ --logging_strategy steps \
248
+ --logging_steps 10 \
249
+ --metric_for_best_model eval_exact_match_for_cb \
250
+ --evaluation_strategy steps \
251
+ --save_strategy steps \
252
+ --save_total_limit 1 \
253
+ --load_best_model_at_end \
254
+ --lora_r 8 \
255
+ --lora_alpha 32 \
256
+ --lora_dropout 0.0 \
257
+ --data_replay_freq -1 \
258
+ --mlp_hidden_dim 100 \
259
+ --model_name specroute \
260
+ --threshold 0.995 \
261
+ --transthreshold 0.995 \
262
+ $FP16_FLAG
263
+
264
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/checkpoint*
265
+
266
+ sleep 5
267
+
268
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
269
+ BSZ=16; GA=1; EVAL_BSZ=256
270
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
271
+ BSZ=32; GA=1; EVAL_BSZ=256
272
+ else
273
+ BSZ=64; GA=1; EVAL_BSZ=512
274
+ fi
275
+
276
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
277
+ --do_train \
278
+ --predict_with_generate \
279
+ --model_name_or_path $2 \
280
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/saved_weights \
281
+ --data_dir CL_Benchmark \
282
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
283
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/copa \
284
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa \
285
+ --per_device_train_batch_size $BSZ \
286
+ --per_device_eval_batch_size $EVAL_BSZ \
287
+ --gradient_accumulation_steps $GA \
288
+ --learning_rate 0.0003 \
289
+ --num_train_epochs 10 \
290
+ --run_name gen_script_long_order3_t5_small_specroute \
291
+ --max_source_length 512 \
292
+ --max_target_length 50 \
293
+ --generation_max_length 50 \
294
+ --add_task_name False \
295
+ --add_dataset_name False \
296
+ --overwrite_output_dir \
297
+ --overwrite_cache \
298
+ --lr_scheduler_type constant \
299
+ --warmup_steps 0 \
300
+ --logging_strategy steps \
301
+ --logging_steps 10 \
302
+ --metric_for_best_model eval_exact_match_for_copa \
303
+ --evaluation_strategy steps \
304
+ --save_strategy steps \
305
+ --save_total_limit 1 \
306
+ --load_best_model_at_end \
307
+ --lora_r 8 \
308
+ --lora_alpha 32 \
309
+ --lora_dropout 0.0 \
310
+ --data_replay_freq -1 \
311
+ --mlp_hidden_dim 100 \
312
+ --model_name specroute \
313
+ --threshold 0.995 \
314
+ --transthreshold 0.995 \
315
+ $FP16_FLAG
316
+
317
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa/checkpoint*
318
+
319
+ sleep 5
320
+
321
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
322
+ BSZ=16; GA=1; EVAL_BSZ=256
323
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
324
+ BSZ=32; GA=1; EVAL_BSZ=256
325
+ else
326
+ BSZ=64; GA=1; EVAL_BSZ=512
327
+ fi
328
+
329
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
330
+ --do_train \
331
+ --predict_with_generate \
332
+ --model_name_or_path $2 \
333
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa/saved_weights \
334
+ --data_dir CL_Benchmark \
335
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
336
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/qqp \
337
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/6-qqp \
338
+ --per_device_train_batch_size $BSZ \
339
+ --per_device_eval_batch_size $EVAL_BSZ \
340
+ --gradient_accumulation_steps $GA \
341
+ --learning_rate 0.0003 \
342
+ --num_train_epochs 10 \
343
+ --run_name gen_script_long_order3_t5_small_specroute \
344
+ --max_source_length 512 \
345
+ --max_target_length 50 \
346
+ --generation_max_length 50 \
347
+ --add_task_name False \
348
+ --add_dataset_name False \
349
+ --overwrite_output_dir \
350
+ --overwrite_cache \
351
+ --lr_scheduler_type constant \
352
+ --warmup_steps 0 \
353
+ --logging_strategy steps \
354
+ --logging_steps 10 \
355
+ --metric_for_best_model eval_exact_match_for_qqp \
356
+ --evaluation_strategy steps \
357
+ --save_strategy steps \
358
+ --save_total_limit 1 \
359
+ --load_best_model_at_end \
360
+ --lora_r 8 \
361
+ --lora_alpha 32 \
362
+ --lora_dropout 0.0 \
363
+ --data_replay_freq -1 \
364
+ --mlp_hidden_dim 100 \
365
+ --model_name specroute \
366
+ --threshold 0.995 \
367
+ --transthreshold 0.995 \
368
+ $FP16_FLAG
369
+
370
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/6-qqp/checkpoint*
371
+
372
+ sleep 5
373
+
374
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
375
+ BSZ=16; GA=1; EVAL_BSZ=256
376
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
377
+ BSZ=32; GA=1; EVAL_BSZ=256
378
+ else
379
+ BSZ=64; GA=1; EVAL_BSZ=512
380
+ fi
381
+
382
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
383
+ --do_train \
384
+ --predict_with_generate \
385
+ --model_name_or_path $2 \
386
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/6-qqp/saved_weights \
387
+ --data_dir CL_Benchmark \
388
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
389
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/rte \
390
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/7-rte \
391
+ --per_device_train_batch_size $BSZ \
392
+ --per_device_eval_batch_size $EVAL_BSZ \
393
+ --gradient_accumulation_steps $GA \
394
+ --learning_rate 0.0003 \
395
+ --num_train_epochs 10 \
396
+ --run_name gen_script_long_order3_t5_small_specroute \
397
+ --max_source_length 512 \
398
+ --max_target_length 50 \
399
+ --generation_max_length 50 \
400
+ --add_task_name False \
401
+ --add_dataset_name False \
402
+ --overwrite_output_dir \
403
+ --overwrite_cache \
404
+ --lr_scheduler_type constant \
405
+ --warmup_steps 0 \
406
+ --logging_strategy steps \
407
+ --logging_steps 10 \
408
+ --metric_for_best_model eval_exact_match_for_rte \
409
+ --evaluation_strategy steps \
410
+ --save_strategy steps \
411
+ --save_total_limit 1 \
412
+ --load_best_model_at_end \
413
+ --lora_r 8 \
414
+ --lora_alpha 32 \
415
+ --lora_dropout 0.0 \
416
+ --data_replay_freq -1 \
417
+ --mlp_hidden_dim 100 \
418
+ --model_name specroute \
419
+ --threshold 0.995 \
420
+ --transthreshold 0.995 \
421
+ $FP16_FLAG
422
+
423
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/7-rte/checkpoint*
424
+
425
+ sleep 5
426
+
427
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
428
+ BSZ=16; GA=1; EVAL_BSZ=256
429
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
430
+ BSZ=32; GA=1; EVAL_BSZ=256
431
+ else
432
+ BSZ=64; GA=1; EVAL_BSZ=512
433
+ fi
434
+
435
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
436
+ --do_train \
437
+ --predict_with_generate \
438
+ --model_name_or_path $2 \
439
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/7-rte/saved_weights \
440
+ --data_dir CL_Benchmark \
441
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
442
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/imdb \
443
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/8-imdb \
444
+ --per_device_train_batch_size $BSZ \
445
+ --per_device_eval_batch_size $EVAL_BSZ \
446
+ --gradient_accumulation_steps $GA \
447
+ --learning_rate 0.0003 \
448
+ --num_train_epochs 10 \
449
+ --run_name gen_script_long_order3_t5_small_specroute \
450
+ --max_source_length 512 \
451
+ --max_target_length 50 \
452
+ --generation_max_length 50 \
453
+ --add_task_name False \
454
+ --add_dataset_name False \
455
+ --overwrite_output_dir \
456
+ --overwrite_cache \
457
+ --lr_scheduler_type constant \
458
+ --warmup_steps 0 \
459
+ --logging_strategy steps \
460
+ --logging_steps 10 \
461
+ --metric_for_best_model eval_exact_match_for_imdb \
462
+ --evaluation_strategy steps \
463
+ --save_strategy steps \
464
+ --save_total_limit 1 \
465
+ --load_best_model_at_end \
466
+ --lora_r 8 \
467
+ --lora_alpha 32 \
468
+ --lora_dropout 0.0 \
469
+ --data_replay_freq -1 \
470
+ --mlp_hidden_dim 100 \
471
+ --model_name specroute \
472
+ --threshold 0.995 \
473
+ --transthreshold 0.995 \
474
+ $FP16_FLAG
475
+
476
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/8-imdb/checkpoint*
477
+
478
+ sleep 5
479
+
480
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
481
+ BSZ=16; GA=1; EVAL_BSZ=256
482
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
483
+ BSZ=32; GA=1; EVAL_BSZ=256
484
+ else
485
+ BSZ=64; GA=1; EVAL_BSZ=512
486
+ fi
487
+
488
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
489
+ --do_train \
490
+ --predict_with_generate \
491
+ --model_name_or_path $2 \
492
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/8-imdb/saved_weights \
493
+ --data_dir CL_Benchmark \
494
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
495
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/sst2 \
496
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/9-sst2 \
497
+ --per_device_train_batch_size $BSZ \
498
+ --per_device_eval_batch_size $EVAL_BSZ \
499
+ --gradient_accumulation_steps $GA \
500
+ --learning_rate 0.0003 \
501
+ --num_train_epochs 10 \
502
+ --run_name gen_script_long_order3_t5_small_specroute \
503
+ --max_source_length 512 \
504
+ --max_target_length 50 \
505
+ --generation_max_length 50 \
506
+ --add_task_name False \
507
+ --add_dataset_name False \
508
+ --overwrite_output_dir \
509
+ --overwrite_cache \
510
+ --lr_scheduler_type constant \
511
+ --warmup_steps 0 \
512
+ --logging_strategy steps \
513
+ --logging_steps 10 \
514
+ --metric_for_best_model eval_exact_match_for_sst2 \
515
+ --evaluation_strategy steps \
516
+ --save_strategy steps \
517
+ --save_total_limit 1 \
518
+ --load_best_model_at_end \
519
+ --lora_r 8 \
520
+ --lora_alpha 32 \
521
+ --lora_dropout 0.0 \
522
+ --data_replay_freq -1 \
523
+ --mlp_hidden_dim 100 \
524
+ --model_name specroute \
525
+ --threshold 0.995 \
526
+ --transthreshold 0.995 \
527
+ $FP16_FLAG
528
+
529
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/9-sst2/checkpoint*
530
+
531
+ sleep 5
532
+
533
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
534
+ BSZ=16; GA=1; EVAL_BSZ=256
535
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
536
+ BSZ=32; GA=1; EVAL_BSZ=256
537
+ else
538
+ BSZ=64; GA=1; EVAL_BSZ=512
539
+ fi
540
+
541
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
542
+ --do_train \
543
+ --predict_with_generate \
544
+ --model_name_or_path $2 \
545
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/9-sst2/saved_weights \
546
+ --data_dir CL_Benchmark \
547
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
548
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/dbpedia \
549
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/10-dbpedia \
550
+ --per_device_train_batch_size $BSZ \
551
+ --per_device_eval_batch_size $EVAL_BSZ \
552
+ --gradient_accumulation_steps $GA \
553
+ --learning_rate 0.0003 \
554
+ --num_train_epochs 10 \
555
+ --run_name gen_script_long_order3_t5_small_specroute \
556
+ --max_source_length 512 \
557
+ --max_target_length 50 \
558
+ --generation_max_length 50 \
559
+ --add_task_name False \
560
+ --add_dataset_name False \
561
+ --overwrite_output_dir \
562
+ --overwrite_cache \
563
+ --lr_scheduler_type constant \
564
+ --warmup_steps 0 \
565
+ --logging_strategy steps \
566
+ --logging_steps 10 \
567
+ --metric_for_best_model eval_exact_match_for_dbpedia \
568
+ --evaluation_strategy steps \
569
+ --save_strategy steps \
570
+ --save_total_limit 1 \
571
+ --load_best_model_at_end \
572
+ --lora_r 8 \
573
+ --lora_alpha 32 \
574
+ --lora_dropout 0.0 \
575
+ --data_replay_freq -1 \
576
+ --mlp_hidden_dim 100 \
577
+ --model_name specroute \
578
+ --threshold 0.995 \
579
+ --transthreshold 0.995 \
580
+ $FP16_FLAG
581
+
582
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/10-dbpedia/checkpoint*
583
+
584
+ sleep 5
585
+
586
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
587
+ BSZ=16; GA=1; EVAL_BSZ=256
588
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
589
+ BSZ=32; GA=1; EVAL_BSZ=256
590
+ else
591
+ BSZ=64; GA=1; EVAL_BSZ=512
592
+ fi
593
+
594
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
595
+ --do_train \
596
+ --predict_with_generate \
597
+ --model_name_or_path $2 \
598
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/10-dbpedia/saved_weights \
599
+ --data_dir CL_Benchmark \
600
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
601
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/agnews \
602
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/11-agnews \
603
+ --per_device_train_batch_size $BSZ \
604
+ --per_device_eval_batch_size $EVAL_BSZ \
605
+ --gradient_accumulation_steps $GA \
606
+ --learning_rate 0.0003 \
607
+ --num_train_epochs 10 \
608
+ --run_name gen_script_long_order3_t5_small_specroute \
609
+ --max_source_length 512 \
610
+ --max_target_length 50 \
611
+ --generation_max_length 50 \
612
+ --add_task_name False \
613
+ --add_dataset_name False \
614
+ --overwrite_output_dir \
615
+ --overwrite_cache \
616
+ --lr_scheduler_type constant \
617
+ --warmup_steps 0 \
618
+ --logging_strategy steps \
619
+ --logging_steps 10 \
620
+ --metric_for_best_model eval_exact_match_for_agnews \
621
+ --evaluation_strategy steps \
622
+ --save_strategy steps \
623
+ --save_total_limit 1 \
624
+ --load_best_model_at_end \
625
+ --lora_r 8 \
626
+ --lora_alpha 32 \
627
+ --lora_dropout 0.0 \
628
+ --data_replay_freq -1 \
629
+ --mlp_hidden_dim 100 \
630
+ --model_name specroute \
631
+ --threshold 0.995 \
632
+ --transthreshold 0.995 \
633
+ $FP16_FLAG
634
+
635
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/11-agnews/checkpoint*
636
+
637
+ sleep 5
638
+
639
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
640
+ BSZ=16; GA=1; EVAL_BSZ=256
641
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
642
+ BSZ=32; GA=1; EVAL_BSZ=256
643
+ else
644
+ BSZ=64; GA=1; EVAL_BSZ=512
645
+ fi
646
+
647
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
648
+ --do_train \
649
+ --predict_with_generate \
650
+ --model_name_or_path $2 \
651
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/11-agnews/saved_weights \
652
+ --data_dir CL_Benchmark \
653
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
654
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/yahoo \
655
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/12-yahoo \
656
+ --per_device_train_batch_size $BSZ \
657
+ --per_device_eval_batch_size $EVAL_BSZ \
658
+ --gradient_accumulation_steps $GA \
659
+ --learning_rate 0.0003 \
660
+ --num_train_epochs 10 \
661
+ --run_name gen_script_long_order3_t5_small_specroute \
662
+ --max_source_length 512 \
663
+ --max_target_length 50 \
664
+ --generation_max_length 50 \
665
+ --add_task_name False \
666
+ --add_dataset_name False \
667
+ --overwrite_output_dir \
668
+ --overwrite_cache \
669
+ --lr_scheduler_type constant \
670
+ --warmup_steps 0 \
671
+ --logging_strategy steps \
672
+ --logging_steps 10 \
673
+ --metric_for_best_model eval_exact_match_for_yahoo \
674
+ --evaluation_strategy steps \
675
+ --save_strategy steps \
676
+ --save_total_limit 1 \
677
+ --load_best_model_at_end \
678
+ --lora_r 8 \
679
+ --lora_alpha 32 \
680
+ --lora_dropout 0.0 \
681
+ --data_replay_freq -1 \
682
+ --mlp_hidden_dim 100 \
683
+ --model_name specroute \
684
+ --threshold 0.995 \
685
+ --transthreshold 0.995 \
686
+ $FP16_FLAG
687
+
688
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/12-yahoo/checkpoint*
689
+
690
+ sleep 5
691
+
692
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
693
+ BSZ=16; GA=1; EVAL_BSZ=256
694
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
695
+ BSZ=32; GA=1; EVAL_BSZ=256
696
+ else
697
+ BSZ=64; GA=1; EVAL_BSZ=512
698
+ fi
699
+
700
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
701
+ --do_train \
702
+ --predict_with_generate \
703
+ --model_name_or_path $2 \
704
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/12-yahoo/saved_weights \
705
+ --data_dir CL_Benchmark \
706
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
707
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/multirc \
708
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/13-multirc \
709
+ --per_device_train_batch_size $BSZ \
710
+ --per_device_eval_batch_size $EVAL_BSZ \
711
+ --gradient_accumulation_steps $GA \
712
+ --learning_rate 0.0003 \
713
+ --num_train_epochs 10 \
714
+ --run_name gen_script_long_order3_t5_small_specroute \
715
+ --max_source_length 512 \
716
+ --max_target_length 50 \
717
+ --generation_max_length 50 \
718
+ --add_task_name False \
719
+ --add_dataset_name False \
720
+ --overwrite_output_dir \
721
+ --overwrite_cache \
722
+ --lr_scheduler_type constant \
723
+ --warmup_steps 0 \
724
+ --logging_strategy steps \
725
+ --logging_steps 10 \
726
+ --metric_for_best_model eval_exact_match_for_multirc \
727
+ --evaluation_strategy steps \
728
+ --save_strategy steps \
729
+ --save_total_limit 1 \
730
+ --load_best_model_at_end \
731
+ --lora_r 8 \
732
+ --lora_alpha 32 \
733
+ --lora_dropout 0.0 \
734
+ --data_replay_freq -1 \
735
+ --mlp_hidden_dim 100 \
736
+ --model_name specroute \
737
+ --threshold 0.995 \
738
+ --transthreshold 0.995 \
739
+ $FP16_FLAG
740
+
741
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/13-multirc/checkpoint*
742
+
743
+ sleep 5
744
+
745
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
746
+ BSZ=16; GA=1; EVAL_BSZ=256
747
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
748
+ BSZ=32; GA=1; EVAL_BSZ=256
749
+ else
750
+ BSZ=64; GA=1; EVAL_BSZ=512
751
+ fi
752
+
753
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
754
+ --do_train \
755
+ --predict_with_generate \
756
+ --model_name_or_path $2 \
757
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/12-yahoo/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/13-multirc/saved_weights \
758
+ --data_dir CL_Benchmark \
759
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
760
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/boolq \
761
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/14-boolq \
762
+ --per_device_train_batch_size $BSZ \
763
+ --per_device_eval_batch_size $EVAL_BSZ \
764
+ --gradient_accumulation_steps $GA \
765
+ --learning_rate 0.0003 \
766
+ --num_train_epochs 10 \
767
+ --run_name gen_script_long_order3_t5_small_specroute \
768
+ --max_source_length 512 \
769
+ --max_target_length 50 \
770
+ --generation_max_length 50 \
771
+ --add_task_name False \
772
+ --add_dataset_name False \
773
+ --overwrite_output_dir \
774
+ --overwrite_cache \
775
+ --lr_scheduler_type constant \
776
+ --warmup_steps 0 \
777
+ --logging_strategy steps \
778
+ --logging_steps 10 \
779
+ --metric_for_best_model eval_exact_match_for_boolq \
780
+ --evaluation_strategy steps \
781
+ --save_strategy steps \
782
+ --save_total_limit 1 \
783
+ --load_best_model_at_end \
784
+ --lora_r 8 \
785
+ --lora_alpha 32 \
786
+ --lora_dropout 0.0 \
787
+ --data_replay_freq -1 \
788
+ --mlp_hidden_dim 100 \
789
+ --model_name specroute \
790
+ --threshold 0.995 \
791
+ --transthreshold 0.995 \
792
+ $FP16_FLAG
793
+
794
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/14-boolq/checkpoint*
795
+
796
+ sleep 5
797
+
798
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
799
+ BSZ=16; GA=1; EVAL_BSZ=256
800
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
801
+ BSZ=32; GA=1; EVAL_BSZ=256
802
+ else
803
+ BSZ=64; GA=1; EVAL_BSZ=512
804
+ fi
805
+
806
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
807
+ --do_train \
808
+ --predict_with_generate \
809
+ --model_name_or_path $2 \
810
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/12-yahoo/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/13-multirc/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/14-boolq/saved_weights \
811
+ --data_dir CL_Benchmark \
812
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
813
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/wic \
814
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/15-wic \
815
+ --per_device_train_batch_size $BSZ \
816
+ --per_device_eval_batch_size $EVAL_BSZ \
817
+ --gradient_accumulation_steps $GA \
818
+ --learning_rate 0.0003 \
819
+ --num_train_epochs 10 \
820
+ --run_name gen_script_long_order3_t5_small_specroute \
821
+ --max_source_length 512 \
822
+ --max_target_length 50 \
823
+ --generation_max_length 50 \
824
+ --add_task_name False \
825
+ --add_dataset_name False \
826
+ --overwrite_output_dir \
827
+ --overwrite_cache \
828
+ --lr_scheduler_type constant \
829
+ --warmup_steps 0 \
830
+ --logging_strategy steps \
831
+ --logging_steps 10 \
832
+ --metric_for_best_model eval_exact_match_for_wic \
833
+ --evaluation_strategy steps \
834
+ --save_strategy steps \
835
+ --save_total_limit 1 \
836
+ --load_best_model_at_end \
837
+ --lora_r 8 \
838
+ --lora_alpha 32 \
839
+ --lora_dropout 0.0 \
840
+ --data_replay_freq -1 \
841
+ --mlp_hidden_dim 100 \
842
+ --model_name specroute \
843
+ --threshold 0.995 \
844
+ --transthreshold 0.995 \
845
+ $FP16_FLAG
846
+
847
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_specroute/outputs/15-wic/checkpoint*
848
+
849
+ sleep 5
improve_gainlora/T5_small/gen_script_long_order4_t5_small_gainlora_inflora.sh ADDED
@@ -0,0 +1,774 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
21
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/mnli \
22
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli \
23
+ --per_device_train_batch_size 32 \
24
+ --per_device_eval_batch_size 256 \
25
+ --gradient_accumulation_steps 1 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 10 \
28
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_exact_match \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --load_best_model_at_end \
45
+ --lora_r 8 \
46
+ --lora_alpha 32 \
47
+ --lora_dropout 0.0 \
48
+ --add_instruction_replay \
49
+ --data_replay_freq -1 \
50
+ --replay_after_n_epoch 0 \
51
+ --mlp_hidden_dim 100 \
52
+ --model_name gainlora_inflora \
53
+ --threshold 0.995 \
54
+ --transthreshold 0.995
55
+
56
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/checkpoint*
57
+
58
+ sleep 5
59
+
60
+
61
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
62
+ --do_train \
63
+ --do_predict \
64
+ --predict_with_generate \
65
+ --model_name_or_path $2 \
66
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights/trans_input.pt \
67
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights \
68
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights/prompts_keys_till_now.pt \
69
+ --data_dir CL_Benchmark \
70
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
71
+ --gen_data_dir generated_data/lora_gen_long_t5 \
72
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/cb \
73
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb \
74
+ --per_device_train_batch_size 32 \
75
+ --per_device_eval_batch_size 256 \
76
+ --gradient_accumulation_steps 1 \
77
+ --learning_rate 0.0003 \
78
+ --num_train_epochs 10\
79
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
80
+ --max_source_length 512 \
81
+ --max_target_length 50 \
82
+ --generation_max_length 50 \
83
+ --add_task_name False \
84
+ --add_dataset_name False \
85
+ --overwrite_output_dir \
86
+ --overwrite_cache \
87
+ --lr_scheduler_type constant \
88
+ --warmup_steps 0 \
89
+ --logging_strategy steps \
90
+ --logging_steps 10 \
91
+ --metric_for_best_model eval_exact_match_for_cb \
92
+ --evaluation_strategy steps \
93
+ --save_strategy steps \
94
+ --save_total_limit 1 \
95
+ --load_best_model_at_end \
96
+ --lora_r 8 \
97
+ --lora_alpha 32 \
98
+ --lora_dropout 0.0 \
99
+ --data_replay_freq -1 \
100
+ --kl_ratio 0.1 \
101
+ --attn_temperature 1 \
102
+ --mlp_hidden_dim 100 \
103
+ --model_name gainlora_inflora \
104
+ --threshold 0.995 \
105
+ --transthreshold 0.995
106
+
107
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/checkpoint*
108
+
109
+ sleep 5
110
+
111
+
112
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
113
+ --do_train \
114
+ --do_predict \
115
+ --predict_with_generate \
116
+ --model_name_or_path $2 \
117
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights/trans_input.pt \
118
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights \
119
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights/prompts_keys_till_now.pt \
120
+ --data_dir CL_Benchmark \
121
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
122
+ --gen_data_dir generated_data/lora_gen_long_t5 \
123
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/wic \
124
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic \
125
+ --per_device_train_batch_size 32 \
126
+ --per_device_eval_batch_size 256 \
127
+ --gradient_accumulation_steps 1 \
128
+ --learning_rate 0.0003 \
129
+ --num_train_epochs 10\
130
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
131
+ --max_source_length 512 \
132
+ --max_target_length 50 \
133
+ --generation_max_length 50 \
134
+ --add_task_name False \
135
+ --add_dataset_name False \
136
+ --overwrite_output_dir \
137
+ --overwrite_cache \
138
+ --lr_scheduler_type constant \
139
+ --warmup_steps 0 \
140
+ --logging_strategy steps \
141
+ --logging_steps 10 \
142
+ --metric_for_best_model eval_exact_match_for_wic \
143
+ --evaluation_strategy steps \
144
+ --save_strategy steps \
145
+ --save_total_limit 1 \
146
+ --load_best_model_at_end \
147
+ --lora_r 8 \
148
+ --lora_alpha 32 \
149
+ --lora_dropout 0.0 \
150
+ --data_replay_freq -1 \
151
+ --kl_ratio 0.1 \
152
+ --attn_temperature 1 \
153
+ --mlp_hidden_dim 100 \
154
+ --model_name gainlora_inflora \
155
+ --threshold 0.995 \
156
+ --transthreshold 0.995
157
+
158
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/checkpoint*
159
+
160
+ sleep 5
161
+
162
+
163
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
164
+ --do_train \
165
+ --do_predict \
166
+ --predict_with_generate \
167
+ --model_name_or_path $2 \
168
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights/trans_input.pt \
169
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights \
170
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights/prompts_keys_till_now.pt \
171
+ --data_dir CL_Benchmark \
172
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
173
+ --gen_data_dir generated_data/lora_gen_long_t5 \
174
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/copa \
175
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa \
176
+ --per_device_train_batch_size 32 \
177
+ --per_device_eval_batch_size 256 \
178
+ --gradient_accumulation_steps 1 \
179
+ --learning_rate 0.0003 \
180
+ --num_train_epochs 10\
181
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
182
+ --max_source_length 512 \
183
+ --max_target_length 50 \
184
+ --generation_max_length 50 \
185
+ --add_task_name False \
186
+ --add_dataset_name False \
187
+ --overwrite_output_dir \
188
+ --overwrite_cache \
189
+ --lr_scheduler_type constant \
190
+ --warmup_steps 0 \
191
+ --logging_strategy steps \
192
+ --logging_steps 10 \
193
+ --metric_for_best_model eval_exact_match_for_copa \
194
+ --evaluation_strategy steps \
195
+ --save_strategy steps \
196
+ --save_total_limit 1 \
197
+ --load_best_model_at_end \
198
+ --lora_r 8 \
199
+ --lora_alpha 32 \
200
+ --lora_dropout 0.0 \
201
+ --data_replay_freq -1 \
202
+ --kl_ratio 0.1 \
203
+ --attn_temperature 1 \
204
+ --mlp_hidden_dim 100 \
205
+ --model_name gainlora_inflora \
206
+ --threshold 0.995 \
207
+ --transthreshold 0.995
208
+
209
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/checkpoint*
210
+
211
+ sleep 5
212
+
213
+
214
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
215
+ --do_train \
216
+ --do_predict \
217
+ --predict_with_generate \
218
+ --model_name_or_path $2 \
219
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights/trans_input.pt \
220
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights \
221
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights/prompts_keys_till_now.pt \
222
+ --data_dir CL_Benchmark \
223
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
224
+ --gen_data_dir generated_data/lora_gen_long_t5 \
225
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/qqp \
226
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp \
227
+ --per_device_train_batch_size 32 \
228
+ --per_device_eval_batch_size 256 \
229
+ --gradient_accumulation_steps 1 \
230
+ --learning_rate 0.0003 \
231
+ --num_train_epochs 10\
232
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
233
+ --max_source_length 512 \
234
+ --max_target_length 50 \
235
+ --generation_max_length 50 \
236
+ --add_task_name False \
237
+ --add_dataset_name False \
238
+ --overwrite_output_dir \
239
+ --overwrite_cache \
240
+ --lr_scheduler_type constant \
241
+ --warmup_steps 0 \
242
+ --logging_strategy steps \
243
+ --logging_steps 10 \
244
+ --metric_for_best_model eval_exact_match_for_qqp \
245
+ --evaluation_strategy steps \
246
+ --save_strategy steps \
247
+ --save_total_limit 1 \
248
+ --load_best_model_at_end \
249
+ --lora_r 8 \
250
+ --lora_alpha 32 \
251
+ --lora_dropout 0.0 \
252
+ --data_replay_freq -1 \
253
+ --kl_ratio 0.1 \
254
+ --attn_temperature 1 \
255
+ --mlp_hidden_dim 100 \
256
+ --model_name gainlora_inflora \
257
+ --threshold 0.995 \
258
+ --transthreshold 0.995
259
+
260
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/checkpoint*
261
+
262
+ sleep 5
263
+
264
+
265
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
266
+ --do_train \
267
+ --do_predict \
268
+ --predict_with_generate \
269
+ --model_name_or_path $2 \
270
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights/trans_input.pt \
271
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights \
272
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights/prompts_keys_till_now.pt \
273
+ --data_dir CL_Benchmark \
274
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
275
+ --gen_data_dir generated_data/lora_gen_long_t5 \
276
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/boolq \
277
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq \
278
+ --per_device_train_batch_size 32 \
279
+ --per_device_eval_batch_size 256 \
280
+ --gradient_accumulation_steps 1 \
281
+ --learning_rate 0.0003 \
282
+ --num_train_epochs 10\
283
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
284
+ --max_source_length 512 \
285
+ --max_target_length 50 \
286
+ --generation_max_length 50 \
287
+ --add_task_name False \
288
+ --add_dataset_name False \
289
+ --overwrite_output_dir \
290
+ --overwrite_cache \
291
+ --lr_scheduler_type constant \
292
+ --warmup_steps 0 \
293
+ --logging_strategy steps \
294
+ --logging_steps 10 \
295
+ --metric_for_best_model eval_exact_match_for_boolq \
296
+ --evaluation_strategy steps \
297
+ --save_strategy steps \
298
+ --save_total_limit 1 \
299
+ --load_best_model_at_end \
300
+ --lora_r 8 \
301
+ --lora_alpha 32 \
302
+ --lora_dropout 0.0 \
303
+ --data_replay_freq -1 \
304
+ --kl_ratio 0.1 \
305
+ --attn_temperature 1 \
306
+ --mlp_hidden_dim 100 \
307
+ --model_name gainlora_inflora \
308
+ --threshold 0.995 \
309
+ --transthreshold 0.995
310
+
311
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/checkpoint*
312
+
313
+ sleep 5
314
+
315
+
316
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
317
+ --do_train \
318
+ --do_predict \
319
+ --predict_with_generate \
320
+ --model_name_or_path $2 \
321
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights/trans_input.pt \
322
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights \
323
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights/prompts_keys_till_now.pt \
324
+ --data_dir CL_Benchmark \
325
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
326
+ --gen_data_dir generated_data/lora_gen_long_t5 \
327
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/rte \
328
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte \
329
+ --per_device_train_batch_size 32 \
330
+ --per_device_eval_batch_size 256 \
331
+ --gradient_accumulation_steps 1 \
332
+ --learning_rate 0.0003 \
333
+ --num_train_epochs 10\
334
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
335
+ --max_source_length 512 \
336
+ --max_target_length 50 \
337
+ --generation_max_length 50 \
338
+ --add_task_name False \
339
+ --add_dataset_name False \
340
+ --overwrite_output_dir \
341
+ --overwrite_cache \
342
+ --lr_scheduler_type constant \
343
+ --warmup_steps 0 \
344
+ --logging_strategy steps \
345
+ --logging_steps 10 \
346
+ --metric_for_best_model eval_exact_match_for_rte \
347
+ --evaluation_strategy steps \
348
+ --save_strategy steps \
349
+ --save_total_limit 1 \
350
+ --load_best_model_at_end \
351
+ --lora_r 8 \
352
+ --lora_alpha 32 \
353
+ --lora_dropout 0.0 \
354
+ --data_replay_freq -1 \
355
+ --kl_ratio 0.1 \
356
+ --attn_temperature 1 \
357
+ --mlp_hidden_dim 100 \
358
+ --model_name gainlora_inflora \
359
+ --threshold 0.995 \
360
+ --transthreshold 0.995
361
+
362
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/checkpoint*
363
+
364
+ sleep 5
365
+
366
+
367
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
368
+ --do_train \
369
+ --do_predict \
370
+ --predict_with_generate \
371
+ --model_name_or_path $2 \
372
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights/trans_input.pt \
373
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights \
374
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights/prompts_keys_till_now.pt \
375
+ --data_dir CL_Benchmark \
376
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
377
+ --gen_data_dir generated_data/lora_gen_long_t5 \
378
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/imdb \
379
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb \
380
+ --per_device_train_batch_size 32 \
381
+ --per_device_eval_batch_size 256 \
382
+ --gradient_accumulation_steps 1 \
383
+ --learning_rate 0.0003 \
384
+ --num_train_epochs 10\
385
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
386
+ --max_source_length 512 \
387
+ --max_target_length 50 \
388
+ --generation_max_length 50 \
389
+ --add_task_name False \
390
+ --add_dataset_name False \
391
+ --overwrite_output_dir \
392
+ --overwrite_cache \
393
+ --lr_scheduler_type constant \
394
+ --warmup_steps 0 \
395
+ --logging_strategy steps \
396
+ --logging_steps 10 \
397
+ --metric_for_best_model eval_exact_match_for_imdb \
398
+ --evaluation_strategy steps \
399
+ --save_strategy steps \
400
+ --save_total_limit 1 \
401
+ --load_best_model_at_end \
402
+ --lora_r 8 \
403
+ --lora_alpha 32 \
404
+ --lora_dropout 0.0 \
405
+ --data_replay_freq -1 \
406
+ --kl_ratio 0.1 \
407
+ --attn_temperature 1 \
408
+ --mlp_hidden_dim 100 \
409
+ --model_name gainlora_inflora \
410
+ --threshold 0.995 \
411
+ --transthreshold 0.995
412
+
413
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/checkpoint*
414
+
415
+ sleep 5
416
+
417
+
418
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
419
+ --do_train \
420
+ --do_predict \
421
+ --predict_with_generate \
422
+ --model_name_or_path $2 \
423
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights/trans_input.pt \
424
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights \
425
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights/prompts_keys_till_now.pt \
426
+ --data_dir CL_Benchmark \
427
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
428
+ --gen_data_dir generated_data/lora_gen_long_t5 \
429
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/yelp \
430
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp \
431
+ --per_device_train_batch_size 32 \
432
+ --per_device_eval_batch_size 256 \
433
+ --gradient_accumulation_steps 1 \
434
+ --learning_rate 0.0003 \
435
+ --num_train_epochs 10\
436
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
437
+ --max_source_length 512 \
438
+ --max_target_length 50 \
439
+ --generation_max_length 50 \
440
+ --add_task_name False \
441
+ --add_dataset_name False \
442
+ --overwrite_output_dir \
443
+ --overwrite_cache \
444
+ --lr_scheduler_type constant \
445
+ --warmup_steps 0 \
446
+ --logging_strategy steps \
447
+ --logging_steps 10 \
448
+ --metric_for_best_model eval_exact_match_for_yelp \
449
+ --evaluation_strategy steps \
450
+ --save_strategy steps \
451
+ --save_total_limit 1 \
452
+ --load_best_model_at_end \
453
+ --lora_r 8 \
454
+ --lora_alpha 32 \
455
+ --lora_dropout 0.0 \
456
+ --data_replay_freq -1 \
457
+ --kl_ratio 0.1 \
458
+ --attn_temperature 1 \
459
+ --mlp_hidden_dim 100 \
460
+ --model_name gainlora_inflora \
461
+ --threshold 0.995 \
462
+ --transthreshold 0.995
463
+
464
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/checkpoint*
465
+
466
+ sleep 5
467
+
468
+
469
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
470
+ --do_train \
471
+ --do_predict \
472
+ --predict_with_generate \
473
+ --model_name_or_path $2 \
474
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights/trans_input.pt \
475
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights \
476
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights/prompts_keys_till_now.pt \
477
+ --data_dir CL_Benchmark \
478
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
479
+ --gen_data_dir generated_data/lora_gen_long_t5 \
480
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/amazon \
481
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon \
482
+ --per_device_train_batch_size 32 \
483
+ --per_device_eval_batch_size 256 \
484
+ --gradient_accumulation_steps 1 \
485
+ --learning_rate 0.0003 \
486
+ --num_train_epochs 10\
487
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
488
+ --max_source_length 512 \
489
+ --max_target_length 50 \
490
+ --generation_max_length 50 \
491
+ --add_task_name False \
492
+ --add_dataset_name False \
493
+ --overwrite_output_dir \
494
+ --overwrite_cache \
495
+ --lr_scheduler_type constant \
496
+ --warmup_steps 0 \
497
+ --logging_strategy steps \
498
+ --logging_steps 10 \
499
+ --metric_for_best_model eval_exact_match_for_amazon \
500
+ --evaluation_strategy steps \
501
+ --save_strategy steps \
502
+ --save_total_limit 1 \
503
+ --load_best_model_at_end \
504
+ --lora_r 8 \
505
+ --lora_alpha 32 \
506
+ --lora_dropout 0.0 \
507
+ --data_replay_freq -1 \
508
+ --kl_ratio 0.1 \
509
+ --attn_temperature 1 \
510
+ --mlp_hidden_dim 100 \
511
+ --model_name gainlora_inflora \
512
+ --threshold 0.995 \
513
+ --transthreshold 0.995
514
+
515
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/checkpoint*
516
+
517
+ sleep 5
518
+
519
+
520
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
521
+ --do_train \
522
+ --do_predict \
523
+ --predict_with_generate \
524
+ --model_name_or_path $2 \
525
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights/trans_input.pt \
526
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights \
527
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights/prompts_keys_till_now.pt \
528
+ --data_dir CL_Benchmark \
529
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
530
+ --gen_data_dir generated_data/lora_gen_long_t5 \
531
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/sst2 \
532
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2 \
533
+ --per_device_train_batch_size 32 \
534
+ --per_device_eval_batch_size 256 \
535
+ --gradient_accumulation_steps 1 \
536
+ --learning_rate 0.0003 \
537
+ --num_train_epochs 10\
538
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
539
+ --max_source_length 512 \
540
+ --max_target_length 50 \
541
+ --generation_max_length 50 \
542
+ --add_task_name False \
543
+ --add_dataset_name False \
544
+ --overwrite_output_dir \
545
+ --overwrite_cache \
546
+ --lr_scheduler_type constant \
547
+ --warmup_steps 0 \
548
+ --logging_strategy steps \
549
+ --logging_steps 10 \
550
+ --metric_for_best_model eval_exact_match_for_sst2 \
551
+ --evaluation_strategy steps \
552
+ --save_strategy steps \
553
+ --save_total_limit 1 \
554
+ --load_best_model_at_end \
555
+ --lora_r 8 \
556
+ --lora_alpha 32 \
557
+ --lora_dropout 0.0 \
558
+ --data_replay_freq -1 \
559
+ --kl_ratio 0.1 \
560
+ --attn_temperature 1 \
561
+ --mlp_hidden_dim 100 \
562
+ --model_name gainlora_inflora \
563
+ --threshold 0.995 \
564
+ --transthreshold 0.995
565
+
566
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/checkpoint*
567
+
568
+ sleep 5
569
+
570
+
571
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
572
+ --do_train \
573
+ --do_predict \
574
+ --predict_with_generate \
575
+ --model_name_or_path $2 \
576
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights/trans_input.pt \
577
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights \
578
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights/prompts_keys_till_now.pt \
579
+ --data_dir CL_Benchmark \
580
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
581
+ --gen_data_dir generated_data/lora_gen_long_t5 \
582
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/dbpedia \
583
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia \
584
+ --per_device_train_batch_size 32 \
585
+ --per_device_eval_batch_size 256 \
586
+ --gradient_accumulation_steps 1 \
587
+ --learning_rate 0.0003 \
588
+ --num_train_epochs 10\
589
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
590
+ --max_source_length 512 \
591
+ --max_target_length 50 \
592
+ --generation_max_length 50 \
593
+ --add_task_name False \
594
+ --add_dataset_name False \
595
+ --overwrite_output_dir \
596
+ --overwrite_cache \
597
+ --lr_scheduler_type constant \
598
+ --warmup_steps 0 \
599
+ --logging_strategy steps \
600
+ --logging_steps 10 \
601
+ --metric_for_best_model eval_exact_match_for_dbpedia \
602
+ --evaluation_strategy steps \
603
+ --save_strategy steps \
604
+ --save_total_limit 1 \
605
+ --load_best_model_at_end \
606
+ --lora_r 8 \
607
+ --lora_alpha 32 \
608
+ --lora_dropout 0.0 \
609
+ --data_replay_freq -1 \
610
+ --kl_ratio 0.1 \
611
+ --attn_temperature 1 \
612
+ --mlp_hidden_dim 100 \
613
+ --model_name gainlora_inflora \
614
+ --threshold 0.995 \
615
+ --transthreshold 0.995
616
+
617
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/checkpoint*
618
+
619
+ sleep 5
620
+
621
+
622
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
623
+ --do_train \
624
+ --do_predict \
625
+ --predict_with_generate \
626
+ --model_name_or_path $2 \
627
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/saved_weights/trans_input.pt \
628
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/saved_weights \
629
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/saved_weights/prompts_keys_till_now.pt \
630
+ --data_dir CL_Benchmark \
631
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
632
+ --gen_data_dir generated_data/lora_gen_long_t5 \
633
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/agnews \
634
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews \
635
+ --per_device_train_batch_size 32 \
636
+ --per_device_eval_batch_size 256 \
637
+ --gradient_accumulation_steps 1 \
638
+ --learning_rate 0.0003 \
639
+ --num_train_epochs 10\
640
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
641
+ --max_source_length 512 \
642
+ --max_target_length 50 \
643
+ --generation_max_length 50 \
644
+ --add_task_name False \
645
+ --add_dataset_name False \
646
+ --overwrite_output_dir \
647
+ --overwrite_cache \
648
+ --lr_scheduler_type constant \
649
+ --warmup_steps 0 \
650
+ --logging_strategy steps \
651
+ --logging_steps 10 \
652
+ --metric_for_best_model eval_exact_match_for_agnews \
653
+ --evaluation_strategy steps \
654
+ --save_strategy steps \
655
+ --save_total_limit 1 \
656
+ --load_best_model_at_end \
657
+ --lora_r 8 \
658
+ --lora_alpha 32 \
659
+ --lora_dropout 0.0 \
660
+ --data_replay_freq -1 \
661
+ --kl_ratio 0.1 \
662
+ --attn_temperature 1 \
663
+ --mlp_hidden_dim 100 \
664
+ --model_name gainlora_inflora \
665
+ --threshold 0.995 \
666
+ --transthreshold 0.995
667
+
668
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews/checkpoint*
669
+
670
+ sleep 5
671
+
672
+
673
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
674
+ --do_train \
675
+ --do_predict \
676
+ --predict_with_generate \
677
+ --model_name_or_path $2 \
678
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews/saved_weights/trans_input.pt \
679
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews/saved_weights \
680
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews/saved_weights/prompts_keys_till_now.pt \
681
+ --data_dir CL_Benchmark \
682
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
683
+ --gen_data_dir generated_data/lora_gen_long_t5 \
684
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/multirc \
685
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/14-multirc \
686
+ --per_device_train_batch_size 32 \
687
+ --per_device_eval_batch_size 256 \
688
+ --gradient_accumulation_steps 1 \
689
+ --learning_rate 0.0003 \
690
+ --num_train_epochs 10\
691
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
692
+ --max_source_length 512 \
693
+ --max_target_length 50 \
694
+ --generation_max_length 50 \
695
+ --add_task_name False \
696
+ --add_dataset_name False \
697
+ --overwrite_output_dir \
698
+ --overwrite_cache \
699
+ --lr_scheduler_type constant \
700
+ --warmup_steps 0 \
701
+ --logging_strategy steps \
702
+ --logging_steps 10 \
703
+ --metric_for_best_model eval_exact_match_for_multirc \
704
+ --evaluation_strategy steps \
705
+ --save_strategy steps \
706
+ --save_total_limit 1 \
707
+ --load_best_model_at_end \
708
+ --lora_r 8 \
709
+ --lora_alpha 32 \
710
+ --lora_dropout 0.0 \
711
+ --data_replay_freq -1 \
712
+ --kl_ratio 0.1 \
713
+ --attn_temperature 1 \
714
+ --mlp_hidden_dim 100 \
715
+ --model_name gainlora_inflora \
716
+ --threshold 0.995 \
717
+ --transthreshold 0.995
718
+
719
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/14-multirc/checkpoint*
720
+
721
+ sleep 5
722
+
723
+
724
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
725
+ --do_train \
726
+ --do_predict \
727
+ --predict_with_generate \
728
+ --model_name_or_path $2 \
729
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/14-multirc/saved_weights/trans_input.pt \
730
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/14-multirc/saved_weights \
731
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/14-multirc/saved_weights/prompts_keys_till_now.pt \
732
+ --data_dir CL_Benchmark \
733
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
734
+ --gen_data_dir generated_data/lora_gen_long_t5 \
735
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/yahoo \
736
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/15-yahoo \
737
+ --per_device_train_batch_size 32 \
738
+ --per_device_eval_batch_size 256 \
739
+ --gradient_accumulation_steps 1 \
740
+ --learning_rate 0.0003 \
741
+ --num_train_epochs 10\
742
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
743
+ --max_source_length 512 \
744
+ --max_target_length 50 \
745
+ --generation_max_length 50 \
746
+ --add_task_name False \
747
+ --add_dataset_name False \
748
+ --overwrite_output_dir \
749
+ --overwrite_cache \
750
+ --lr_scheduler_type constant \
751
+ --warmup_steps 0 \
752
+ --logging_strategy steps \
753
+ --logging_steps 10 \
754
+ --metric_for_best_model eval_exact_match_for_yahoo \
755
+ --evaluation_strategy steps \
756
+ --save_strategy steps \
757
+ --save_total_limit 1 \
758
+ --load_best_model_at_end \
759
+ --lora_r 8 \
760
+ --lora_alpha 32 \
761
+ --lora_dropout 0.0 \
762
+ --data_replay_freq -1 \
763
+ --kl_ratio 0.1 \
764
+ --attn_temperature 1 \
765
+ --mlp_hidden_dim 100 \
766
+ --model_name gainlora_inflora \
767
+ --threshold 0.995 \
768
+ --transthreshold 0.995
769
+
770
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/15-yahoo/checkpoint*
771
+
772
+ sleep 5
773
+
774
+ CUDA_VISIBLE_DEVICES=$1 python score.py gen_script_long_order4_t5_small_gainlora_inflora gen_script_long_order4_t5_small_gainlora_inflora
improve_gainlora/T5_small/gen_script_long_order4_t5_small_inflora.sh ADDED
@@ -0,0 +1,744 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
21
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/mnli \
22
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli \
23
+ --per_device_train_batch_size 32 \
24
+ --per_device_eval_batch_size 256 \
25
+ --gradient_accumulation_steps 1 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 10 \
28
+ --run_name gen_script_long_order4_t5_small_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_exact_match \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --load_best_model_at_end \
45
+ --lora_r 8 \
46
+ --lora_alpha 32 \
47
+ --lora_dropout 0.0 \
48
+ --add_instruction_replay \
49
+ --data_replay_freq -1 \
50
+ --replay_after_n_epoch 0 \
51
+ --model_name inflora \
52
+ --threshold 0.995
53
+
54
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/checkpoint*
55
+
56
+ sleep 5
57
+
58
+
59
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
60
+ --do_train \
61
+ --do_predict \
62
+ --predict_with_generate \
63
+ --model_name_or_path $2 \
64
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights/trans_input.pt \
65
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights \
66
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights/prompts_keys_till_now.pt \
67
+ --data_dir CL_Benchmark \
68
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
69
+ --gen_data_dir generated_data/lora_gen_long_t5 \
70
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/cb \
71
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb \
72
+ --per_device_train_batch_size 32 \
73
+ --per_device_eval_batch_size 256 \
74
+ --gradient_accumulation_steps 1 \
75
+ --learning_rate 0.0003 \
76
+ --num_train_epochs 10\
77
+ --run_name gen_script_long_order4_t5_small_inflora \
78
+ --max_source_length 512 \
79
+ --max_target_length 50 \
80
+ --generation_max_length 50 \
81
+ --add_task_name False \
82
+ --add_dataset_name False \
83
+ --overwrite_output_dir \
84
+ --overwrite_cache \
85
+ --lr_scheduler_type constant \
86
+ --warmup_steps 0 \
87
+ --logging_strategy steps \
88
+ --logging_steps 10 \
89
+ --metric_for_best_model eval_exact_match_for_cb \
90
+ --evaluation_strategy steps \
91
+ --save_strategy steps \
92
+ --save_total_limit 1 \
93
+ --load_best_model_at_end \
94
+ --lora_r 8 \
95
+ --lora_alpha 32 \
96
+ --lora_dropout 0.0 \
97
+ --data_replay_freq -1 \
98
+ --kl_ratio 0.1 \
99
+ --attn_temperature 1 \
100
+ --model_name inflora \
101
+ --threshold 0.995
102
+
103
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/checkpoint*
104
+
105
+ sleep 5
106
+
107
+
108
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
109
+ --do_train \
110
+ --do_predict \
111
+ --predict_with_generate \
112
+ --model_name_or_path $2 \
113
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights/trans_input.pt \
114
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights \
115
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights/prompts_keys_till_now.pt \
116
+ --data_dir CL_Benchmark \
117
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
118
+ --gen_data_dir generated_data/lora_gen_long_t5 \
119
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/wic \
120
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic \
121
+ --per_device_train_batch_size 32 \
122
+ --per_device_eval_batch_size 256 \
123
+ --gradient_accumulation_steps 1 \
124
+ --learning_rate 0.0003 \
125
+ --num_train_epochs 10\
126
+ --run_name gen_script_long_order4_t5_small_inflora \
127
+ --max_source_length 512 \
128
+ --max_target_length 50 \
129
+ --generation_max_length 50 \
130
+ --add_task_name False \
131
+ --add_dataset_name False \
132
+ --overwrite_output_dir \
133
+ --overwrite_cache \
134
+ --lr_scheduler_type constant \
135
+ --warmup_steps 0 \
136
+ --logging_strategy steps \
137
+ --logging_steps 10 \
138
+ --metric_for_best_model eval_exact_match_for_wic \
139
+ --evaluation_strategy steps \
140
+ --save_strategy steps \
141
+ --save_total_limit 1 \
142
+ --load_best_model_at_end \
143
+ --lora_r 8 \
144
+ --lora_alpha 32 \
145
+ --lora_dropout 0.0 \
146
+ --data_replay_freq -1 \
147
+ --kl_ratio 0.1 \
148
+ --attn_temperature 1 \
149
+ --model_name inflora \
150
+ --threshold 0.995
151
+
152
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/checkpoint*
153
+
154
+ sleep 5
155
+
156
+
157
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
158
+ --do_train \
159
+ --do_predict \
160
+ --predict_with_generate \
161
+ --model_name_or_path $2 \
162
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights/trans_input.pt \
163
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights \
164
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights/prompts_keys_till_now.pt \
165
+ --data_dir CL_Benchmark \
166
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
167
+ --gen_data_dir generated_data/lora_gen_long_t5 \
168
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/copa \
169
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa \
170
+ --per_device_train_batch_size 32 \
171
+ --per_device_eval_batch_size 256 \
172
+ --gradient_accumulation_steps 1 \
173
+ --learning_rate 0.0003 \
174
+ --num_train_epochs 10\
175
+ --run_name gen_script_long_order4_t5_small_inflora \
176
+ --max_source_length 512 \
177
+ --max_target_length 50 \
178
+ --generation_max_length 50 \
179
+ --add_task_name False \
180
+ --add_dataset_name False \
181
+ --overwrite_output_dir \
182
+ --overwrite_cache \
183
+ --lr_scheduler_type constant \
184
+ --warmup_steps 0 \
185
+ --logging_strategy steps \
186
+ --logging_steps 10 \
187
+ --metric_for_best_model eval_exact_match_for_copa \
188
+ --evaluation_strategy steps \
189
+ --save_strategy steps \
190
+ --save_total_limit 1 \
191
+ --load_best_model_at_end \
192
+ --lora_r 8 \
193
+ --lora_alpha 32 \
194
+ --lora_dropout 0.0 \
195
+ --data_replay_freq -1 \
196
+ --kl_ratio 0.1 \
197
+ --attn_temperature 1 \
198
+ --model_name inflora \
199
+ --threshold 0.995
200
+
201
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/checkpoint*
202
+
203
+ sleep 5
204
+
205
+
206
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
207
+ --do_train \
208
+ --do_predict \
209
+ --predict_with_generate \
210
+ --model_name_or_path $2 \
211
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights/trans_input.pt \
212
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights \
213
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights/prompts_keys_till_now.pt \
214
+ --data_dir CL_Benchmark \
215
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
216
+ --gen_data_dir generated_data/lora_gen_long_t5 \
217
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/qqp \
218
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp \
219
+ --per_device_train_batch_size 32 \
220
+ --per_device_eval_batch_size 256 \
221
+ --gradient_accumulation_steps 1 \
222
+ --learning_rate 0.0003 \
223
+ --num_train_epochs 10\
224
+ --run_name gen_script_long_order4_t5_small_inflora \
225
+ --max_source_length 512 \
226
+ --max_target_length 50 \
227
+ --generation_max_length 50 \
228
+ --add_task_name False \
229
+ --add_dataset_name False \
230
+ --overwrite_output_dir \
231
+ --overwrite_cache \
232
+ --lr_scheduler_type constant \
233
+ --warmup_steps 0 \
234
+ --logging_strategy steps \
235
+ --logging_steps 10 \
236
+ --metric_for_best_model eval_exact_match_for_qqp \
237
+ --evaluation_strategy steps \
238
+ --save_strategy steps \
239
+ --save_total_limit 1 \
240
+ --load_best_model_at_end \
241
+ --lora_r 8 \
242
+ --lora_alpha 32 \
243
+ --lora_dropout 0.0 \
244
+ --data_replay_freq -1 \
245
+ --kl_ratio 0.1 \
246
+ --attn_temperature 1 \
247
+ --model_name inflora \
248
+ --threshold 0.995
249
+
250
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/checkpoint*
251
+
252
+ sleep 5
253
+
254
+
255
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
256
+ --do_train \
257
+ --do_predict \
258
+ --predict_with_generate \
259
+ --model_name_or_path $2 \
260
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights/trans_input.pt \
261
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights \
262
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights/prompts_keys_till_now.pt \
263
+ --data_dir CL_Benchmark \
264
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
265
+ --gen_data_dir generated_data/lora_gen_long_t5 \
266
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/boolq \
267
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq \
268
+ --per_device_train_batch_size 32 \
269
+ --per_device_eval_batch_size 256 \
270
+ --gradient_accumulation_steps 1 \
271
+ --learning_rate 0.0003 \
272
+ --num_train_epochs 10\
273
+ --run_name gen_script_long_order4_t5_small_inflora \
274
+ --max_source_length 512 \
275
+ --max_target_length 50 \
276
+ --generation_max_length 50 \
277
+ --add_task_name False \
278
+ --add_dataset_name False \
279
+ --overwrite_output_dir \
280
+ --overwrite_cache \
281
+ --lr_scheduler_type constant \
282
+ --warmup_steps 0 \
283
+ --logging_strategy steps \
284
+ --logging_steps 10 \
285
+ --metric_for_best_model eval_exact_match_for_boolq \
286
+ --evaluation_strategy steps \
287
+ --save_strategy steps \
288
+ --save_total_limit 1 \
289
+ --load_best_model_at_end \
290
+ --lora_r 8 \
291
+ --lora_alpha 32 \
292
+ --lora_dropout 0.0 \
293
+ --data_replay_freq -1 \
294
+ --kl_ratio 0.1 \
295
+ --attn_temperature 1 \
296
+ --model_name inflora \
297
+ --threshold 0.995
298
+
299
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/checkpoint*
300
+
301
+ sleep 5
302
+
303
+
304
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
305
+ --do_train \
306
+ --do_predict \
307
+ --predict_with_generate \
308
+ --model_name_or_path $2 \
309
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights/trans_input.pt \
310
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights \
311
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights/prompts_keys_till_now.pt \
312
+ --data_dir CL_Benchmark \
313
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
314
+ --gen_data_dir generated_data/lora_gen_long_t5 \
315
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/rte \
316
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte \
317
+ --per_device_train_batch_size 32 \
318
+ --per_device_eval_batch_size 256 \
319
+ --gradient_accumulation_steps 1 \
320
+ --learning_rate 0.0003 \
321
+ --num_train_epochs 10\
322
+ --run_name gen_script_long_order4_t5_small_inflora \
323
+ --max_source_length 512 \
324
+ --max_target_length 50 \
325
+ --generation_max_length 50 \
326
+ --add_task_name False \
327
+ --add_dataset_name False \
328
+ --overwrite_output_dir \
329
+ --overwrite_cache \
330
+ --lr_scheduler_type constant \
331
+ --warmup_steps 0 \
332
+ --logging_strategy steps \
333
+ --logging_steps 10 \
334
+ --metric_for_best_model eval_exact_match_for_rte \
335
+ --evaluation_strategy steps \
336
+ --save_strategy steps \
337
+ --save_total_limit 1 \
338
+ --load_best_model_at_end \
339
+ --lora_r 8 \
340
+ --lora_alpha 32 \
341
+ --lora_dropout 0.0 \
342
+ --data_replay_freq -1 \
343
+ --kl_ratio 0.1 \
344
+ --attn_temperature 1 \
345
+ --model_name inflora \
346
+ --threshold 0.995
347
+
348
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/checkpoint*
349
+
350
+ sleep 5
351
+
352
+
353
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
354
+ --do_train \
355
+ --do_predict \
356
+ --predict_with_generate \
357
+ --model_name_or_path $2 \
358
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights/trans_input.pt \
359
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights \
360
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights/prompts_keys_till_now.pt \
361
+ --data_dir CL_Benchmark \
362
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
363
+ --gen_data_dir generated_data/lora_gen_long_t5 \
364
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/imdb \
365
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb \
366
+ --per_device_train_batch_size 32 \
367
+ --per_device_eval_batch_size 256 \
368
+ --gradient_accumulation_steps 1 \
369
+ --learning_rate 0.0003 \
370
+ --num_train_epochs 10\
371
+ --run_name gen_script_long_order4_t5_small_inflora \
372
+ --max_source_length 512 \
373
+ --max_target_length 50 \
374
+ --generation_max_length 50 \
375
+ --add_task_name False \
376
+ --add_dataset_name False \
377
+ --overwrite_output_dir \
378
+ --overwrite_cache \
379
+ --lr_scheduler_type constant \
380
+ --warmup_steps 0 \
381
+ --logging_strategy steps \
382
+ --logging_steps 10 \
383
+ --metric_for_best_model eval_exact_match_for_imdb \
384
+ --evaluation_strategy steps \
385
+ --save_strategy steps \
386
+ --save_total_limit 1 \
387
+ --load_best_model_at_end \
388
+ --lora_r 8 \
389
+ --lora_alpha 32 \
390
+ --lora_dropout 0.0 \
391
+ --data_replay_freq -1 \
392
+ --kl_ratio 0.1 \
393
+ --attn_temperature 1 \
394
+ --model_name inflora \
395
+ --threshold 0.995
396
+
397
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/checkpoint*
398
+
399
+ sleep 5
400
+
401
+
402
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
403
+ --do_train \
404
+ --do_predict \
405
+ --predict_with_generate \
406
+ --model_name_or_path $2 \
407
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights/trans_input.pt \
408
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights \
409
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights/prompts_keys_till_now.pt \
410
+ --data_dir CL_Benchmark \
411
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
412
+ --gen_data_dir generated_data/lora_gen_long_t5 \
413
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/yelp \
414
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp \
415
+ --per_device_train_batch_size 32 \
416
+ --per_device_eval_batch_size 256 \
417
+ --gradient_accumulation_steps 1 \
418
+ --learning_rate 0.0003 \
419
+ --num_train_epochs 10\
420
+ --run_name gen_script_long_order4_t5_small_inflora \
421
+ --max_source_length 512 \
422
+ --max_target_length 50 \
423
+ --generation_max_length 50 \
424
+ --add_task_name False \
425
+ --add_dataset_name False \
426
+ --overwrite_output_dir \
427
+ --overwrite_cache \
428
+ --lr_scheduler_type constant \
429
+ --warmup_steps 0 \
430
+ --logging_strategy steps \
431
+ --logging_steps 10 \
432
+ --metric_for_best_model eval_exact_match_for_yelp \
433
+ --evaluation_strategy steps \
434
+ --save_strategy steps \
435
+ --save_total_limit 1 \
436
+ --load_best_model_at_end \
437
+ --lora_r 8 \
438
+ --lora_alpha 32 \
439
+ --lora_dropout 0.0 \
440
+ --data_replay_freq -1 \
441
+ --kl_ratio 0.1 \
442
+ --attn_temperature 1 \
443
+ --model_name inflora \
444
+ --threshold 0.995
445
+
446
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/checkpoint*
447
+
448
+ sleep 5
449
+
450
+
451
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
452
+ --do_train \
453
+ --do_predict \
454
+ --predict_with_generate \
455
+ --model_name_or_path $2 \
456
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights/trans_input.pt \
457
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights \
458
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights/prompts_keys_till_now.pt \
459
+ --data_dir CL_Benchmark \
460
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
461
+ --gen_data_dir generated_data/lora_gen_long_t5 \
462
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/amazon \
463
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon \
464
+ --per_device_train_batch_size 32 \
465
+ --per_device_eval_batch_size 256 \
466
+ --gradient_accumulation_steps 1 \
467
+ --learning_rate 0.0003 \
468
+ --num_train_epochs 10\
469
+ --run_name gen_script_long_order4_t5_small_inflora \
470
+ --max_source_length 512 \
471
+ --max_target_length 50 \
472
+ --generation_max_length 50 \
473
+ --add_task_name False \
474
+ --add_dataset_name False \
475
+ --overwrite_output_dir \
476
+ --overwrite_cache \
477
+ --lr_scheduler_type constant \
478
+ --warmup_steps 0 \
479
+ --logging_strategy steps \
480
+ --logging_steps 10 \
481
+ --metric_for_best_model eval_exact_match_for_amazon \
482
+ --evaluation_strategy steps \
483
+ --save_strategy steps \
484
+ --save_total_limit 1 \
485
+ --load_best_model_at_end \
486
+ --lora_r 8 \
487
+ --lora_alpha 32 \
488
+ --lora_dropout 0.0 \
489
+ --data_replay_freq -1 \
490
+ --kl_ratio 0.1 \
491
+ --attn_temperature 1 \
492
+ --model_name inflora \
493
+ --threshold 0.995
494
+
495
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/checkpoint*
496
+
497
+ sleep 5
498
+
499
+
500
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
501
+ --do_train \
502
+ --do_predict \
503
+ --predict_with_generate \
504
+ --model_name_or_path $2 \
505
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights/trans_input.pt \
506
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights \
507
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights/prompts_keys_till_now.pt \
508
+ --data_dir CL_Benchmark \
509
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
510
+ --gen_data_dir generated_data/lora_gen_long_t5 \
511
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/sst2 \
512
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2 \
513
+ --per_device_train_batch_size 32 \
514
+ --per_device_eval_batch_size 256 \
515
+ --gradient_accumulation_steps 1 \
516
+ --learning_rate 0.0003 \
517
+ --num_train_epochs 10\
518
+ --run_name gen_script_long_order4_t5_small_inflora \
519
+ --max_source_length 512 \
520
+ --max_target_length 50 \
521
+ --generation_max_length 50 \
522
+ --add_task_name False \
523
+ --add_dataset_name False \
524
+ --overwrite_output_dir \
525
+ --overwrite_cache \
526
+ --lr_scheduler_type constant \
527
+ --warmup_steps 0 \
528
+ --logging_strategy steps \
529
+ --logging_steps 10 \
530
+ --metric_for_best_model eval_exact_match_for_sst2 \
531
+ --evaluation_strategy steps \
532
+ --save_strategy steps \
533
+ --save_total_limit 1 \
534
+ --load_best_model_at_end \
535
+ --lora_r 8 \
536
+ --lora_alpha 32 \
537
+ --lora_dropout 0.0 \
538
+ --data_replay_freq -1 \
539
+ --kl_ratio 0.1 \
540
+ --attn_temperature 1 \
541
+ --model_name inflora \
542
+ --threshold 0.995
543
+
544
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/checkpoint*
545
+
546
+ sleep 5
547
+
548
+
549
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
550
+ --do_train \
551
+ --do_predict \
552
+ --predict_with_generate \
553
+ --model_name_or_path $2 \
554
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights/trans_input.pt \
555
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights \
556
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights/prompts_keys_till_now.pt \
557
+ --data_dir CL_Benchmark \
558
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
559
+ --gen_data_dir generated_data/lora_gen_long_t5 \
560
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/dbpedia \
561
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia \
562
+ --per_device_train_batch_size 32 \
563
+ --per_device_eval_batch_size 256 \
564
+ --gradient_accumulation_steps 1 \
565
+ --learning_rate 0.0003 \
566
+ --num_train_epochs 10\
567
+ --run_name gen_script_long_order4_t5_small_inflora \
568
+ --max_source_length 512 \
569
+ --max_target_length 50 \
570
+ --generation_max_length 50 \
571
+ --add_task_name False \
572
+ --add_dataset_name False \
573
+ --overwrite_output_dir \
574
+ --overwrite_cache \
575
+ --lr_scheduler_type constant \
576
+ --warmup_steps 0 \
577
+ --logging_strategy steps \
578
+ --logging_steps 10 \
579
+ --metric_for_best_model eval_exact_match_for_dbpedia \
580
+ --evaluation_strategy steps \
581
+ --save_strategy steps \
582
+ --save_total_limit 1 \
583
+ --load_best_model_at_end \
584
+ --lora_r 8 \
585
+ --lora_alpha 32 \
586
+ --lora_dropout 0.0 \
587
+ --data_replay_freq -1 \
588
+ --kl_ratio 0.1 \
589
+ --attn_temperature 1 \
590
+ --model_name inflora \
591
+ --threshold 0.995
592
+
593
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/checkpoint*
594
+
595
+ sleep 5
596
+
597
+
598
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
599
+ --do_train \
600
+ --do_predict \
601
+ --predict_with_generate \
602
+ --model_name_or_path $2 \
603
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/saved_weights/trans_input.pt \
604
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/saved_weights \
605
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/saved_weights/prompts_keys_till_now.pt \
606
+ --data_dir CL_Benchmark \
607
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
608
+ --gen_data_dir generated_data/lora_gen_long_t5 \
609
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/agnews \
610
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews \
611
+ --per_device_train_batch_size 32 \
612
+ --per_device_eval_batch_size 256 \
613
+ --gradient_accumulation_steps 1 \
614
+ --learning_rate 0.0003 \
615
+ --num_train_epochs 10\
616
+ --run_name gen_script_long_order4_t5_small_inflora \
617
+ --max_source_length 512 \
618
+ --max_target_length 50 \
619
+ --generation_max_length 50 \
620
+ --add_task_name False \
621
+ --add_dataset_name False \
622
+ --overwrite_output_dir \
623
+ --overwrite_cache \
624
+ --lr_scheduler_type constant \
625
+ --warmup_steps 0 \
626
+ --logging_strategy steps \
627
+ --logging_steps 10 \
628
+ --metric_for_best_model eval_exact_match_for_agnews \
629
+ --evaluation_strategy steps \
630
+ --save_strategy steps \
631
+ --save_total_limit 1 \
632
+ --load_best_model_at_end \
633
+ --lora_r 8 \
634
+ --lora_alpha 32 \
635
+ --lora_dropout 0.0 \
636
+ --data_replay_freq -1 \
637
+ --kl_ratio 0.1 \
638
+ --attn_temperature 1 \
639
+ --model_name inflora \
640
+ --threshold 0.995
641
+
642
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews/checkpoint*
643
+
644
+ sleep 5
645
+
646
+
647
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
648
+ --do_train \
649
+ --do_predict \
650
+ --predict_with_generate \
651
+ --model_name_or_path $2 \
652
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews/saved_weights/trans_input.pt \
653
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews/saved_weights \
654
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews/saved_weights/prompts_keys_till_now.pt \
655
+ --data_dir CL_Benchmark \
656
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
657
+ --gen_data_dir generated_data/lora_gen_long_t5 \
658
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/multirc \
659
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/14-multirc \
660
+ --per_device_train_batch_size 32 \
661
+ --per_device_eval_batch_size 256 \
662
+ --gradient_accumulation_steps 1 \
663
+ --learning_rate 0.0003 \
664
+ --num_train_epochs 10\
665
+ --run_name gen_script_long_order4_t5_small_inflora \
666
+ --max_source_length 512 \
667
+ --max_target_length 50 \
668
+ --generation_max_length 50 \
669
+ --add_task_name False \
670
+ --add_dataset_name False \
671
+ --overwrite_output_dir \
672
+ --overwrite_cache \
673
+ --lr_scheduler_type constant \
674
+ --warmup_steps 0 \
675
+ --logging_strategy steps \
676
+ --logging_steps 10 \
677
+ --metric_for_best_model eval_exact_match_for_multirc \
678
+ --evaluation_strategy steps \
679
+ --save_strategy steps \
680
+ --save_total_limit 1 \
681
+ --load_best_model_at_end \
682
+ --lora_r 8 \
683
+ --lora_alpha 32 \
684
+ --lora_dropout 0.0 \
685
+ --data_replay_freq -1 \
686
+ --kl_ratio 0.1 \
687
+ --attn_temperature 1 \
688
+ --model_name inflora \
689
+ --threshold 0.995
690
+
691
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/14-multirc/checkpoint*
692
+
693
+ sleep 5
694
+
695
+
696
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
697
+ --do_train \
698
+ --do_predict \
699
+ --predict_with_generate \
700
+ --model_name_or_path $2 \
701
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/14-multirc/saved_weights/trans_input.pt \
702
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/14-multirc/saved_weights \
703
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/14-multirc/saved_weights/prompts_keys_till_now.pt \
704
+ --data_dir CL_Benchmark \
705
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
706
+ --gen_data_dir generated_data/lora_gen_long_t5 \
707
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/yahoo \
708
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/15-yahoo \
709
+ --per_device_train_batch_size 32 \
710
+ --per_device_eval_batch_size 256 \
711
+ --gradient_accumulation_steps 1 \
712
+ --learning_rate 0.0003 \
713
+ --num_train_epochs 10\
714
+ --run_name gen_script_long_order4_t5_small_inflora \
715
+ --max_source_length 512 \
716
+ --max_target_length 50 \
717
+ --generation_max_length 50 \
718
+ --add_task_name False \
719
+ --add_dataset_name False \
720
+ --overwrite_output_dir \
721
+ --overwrite_cache \
722
+ --lr_scheduler_type constant \
723
+ --warmup_steps 0 \
724
+ --logging_strategy steps \
725
+ --logging_steps 10 \
726
+ --metric_for_best_model eval_exact_match_for_yahoo \
727
+ --evaluation_strategy steps \
728
+ --save_strategy steps \
729
+ --save_total_limit 1 \
730
+ --load_best_model_at_end \
731
+ --lora_r 8 \
732
+ --lora_alpha 32 \
733
+ --lora_dropout 0.0 \
734
+ --data_replay_freq -1 \
735
+ --kl_ratio 0.1 \
736
+ --attn_temperature 1 \
737
+ --model_name inflora \
738
+ --threshold 0.995
739
+
740
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/15-yahoo/checkpoint*
741
+
742
+ sleep 5
743
+
744
+ CUDA_VISIBLE_DEVICES=1 python score.py gen_script_long_order4_t5_small_inflora gen_script_long_order4_t5_small_inflora
improve_gainlora/T5_small/gen_script_long_order4_t5_small_specroute.sh ADDED
@@ -0,0 +1,849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:2
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ # ============================================================
15
+ # Auto-detect GPU count and type for optimal parallelism
16
+ # ============================================================
17
+ NUM_GPUS=$(nvidia-smi -L 2>/dev/null | wc -l)
18
+ GPU_MEM=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits 2>/dev/null | head -1)
19
+
20
+ if [ -z "$GPU_MEM" ]; then
21
+ echo "ERROR: No GPU detected!"
22
+ exit 1
23
+ fi
24
+
25
+ # Determine GPU type
26
+ if [ "$GPU_MEM" -lt 20000 ]; then
27
+ IS_T4=1
28
+ echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
29
+ else
30
+ IS_T4=0
31
+ echo "[GPU] Detected high-memory GPUs (${GPU_MEM}MB VRAM each)"
32
+ fi
33
+
34
+ # Determine parallelism strategy
35
+ if [ "$IS_T4" -eq 1 ] && [ "$NUM_GPUS" -ge 2 ]; then
36
+ GPU_MODE="t4_2gpu"
37
+ GPU_IDS="0,1"
38
+ FP16_FLAG=""
39
+ echo "[GPU] Strategy: 2x T4 DataParallel + fp32 + gradient_checkpointing"
40
+ elif [ "$IS_T4" -eq 1 ]; then
41
+ GPU_MODE="t4_1gpu"
42
+ GPU_IDS="${1:-0}"
43
+ FP16_FLAG=""
44
+ echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
45
+ else
46
+ GPU_MODE="a100"
47
+ GPU_IDS="${1:-0}"
48
+ FP16_FLAG=""
49
+ echo "[GPU] Strategy: A100 (single GPU, fp32)"
50
+ fi
51
+
52
+ echo "[GPU] Using CUDA_VISIBLE_DEVICES=$GPU_IDS"
53
+ echo "============================================================"
54
+ echo ""
55
+
56
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
57
+ BSZ=16; GA=1; EVAL_BSZ=256
58
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
59
+ BSZ=32; GA=1; EVAL_BSZ=256
60
+ else
61
+ BSZ=64; GA=1; EVAL_BSZ=512
62
+ fi
63
+
64
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
65
+ --do_train \
66
+ --predict_with_generate \
67
+ --model_name_or_path $2 \
68
+ --data_dir CL_Benchmark \
69
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
70
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/mnli \
71
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli \
72
+ --per_device_train_batch_size $BSZ \
73
+ --per_device_eval_batch_size $EVAL_BSZ \
74
+ --gradient_accumulation_steps $GA \
75
+ --learning_rate 0.0003 \
76
+ --num_train_epochs 10 \
77
+ --run_name gen_script_long_order4_t5_small_specroute \
78
+ --max_source_length 512 \
79
+ --max_target_length 50 \
80
+ --generation_max_length 50 \
81
+ --add_task_name False \
82
+ --add_dataset_name False \
83
+ --overwrite_output_dir \
84
+ --overwrite_cache \
85
+ --lr_scheduler_type constant \
86
+ --warmup_steps 0 \
87
+ --logging_strategy steps \
88
+ --logging_steps 10 \
89
+ --metric_for_best_model eval_exact_match \
90
+ --evaluation_strategy steps \
91
+ --save_strategy steps \
92
+ --save_total_limit 1 \
93
+ --load_best_model_at_end \
94
+ --lora_r 8 \
95
+ --lora_alpha 32 \
96
+ --lora_dropout 0.0 \
97
+ --run_single True \
98
+ --data_replay_freq -1 \
99
+ --mlp_hidden_dim 100 \
100
+ --model_name specroute \
101
+ --threshold 0.995 \
102
+ --transthreshold 0.995 \
103
+ $FP16_FLAG
104
+
105
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/checkpoint*
106
+
107
+ sleep 5
108
+
109
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
110
+ BSZ=16; GA=1; EVAL_BSZ=256
111
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
112
+ BSZ=32; GA=1; EVAL_BSZ=256
113
+ else
114
+ BSZ=64; GA=1; EVAL_BSZ=512
115
+ fi
116
+
117
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
118
+ --do_train \
119
+ --predict_with_generate \
120
+ --model_name_or_path $2 \
121
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights \
122
+ --data_dir CL_Benchmark \
123
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
124
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/cb \
125
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb \
126
+ --per_device_train_batch_size $BSZ \
127
+ --per_device_eval_batch_size $EVAL_BSZ \
128
+ --gradient_accumulation_steps $GA \
129
+ --learning_rate 0.0003 \
130
+ --num_train_epochs 10 \
131
+ --run_name gen_script_long_order4_t5_small_specroute \
132
+ --max_source_length 512 \
133
+ --max_target_length 50 \
134
+ --generation_max_length 50 \
135
+ --add_task_name False \
136
+ --add_dataset_name False \
137
+ --overwrite_output_dir \
138
+ --overwrite_cache \
139
+ --lr_scheduler_type constant \
140
+ --warmup_steps 0 \
141
+ --logging_strategy steps \
142
+ --logging_steps 10 \
143
+ --metric_for_best_model eval_exact_match_for_cb \
144
+ --evaluation_strategy steps \
145
+ --save_strategy steps \
146
+ --save_total_limit 1 \
147
+ --load_best_model_at_end \
148
+ --lora_r 8 \
149
+ --lora_alpha 32 \
150
+ --lora_dropout 0.0 \
151
+ --data_replay_freq -1 \
152
+ --mlp_hidden_dim 100 \
153
+ --model_name specroute \
154
+ --threshold 0.995 \
155
+ --transthreshold 0.995 \
156
+ $FP16_FLAG
157
+
158
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/checkpoint*
159
+
160
+ sleep 5
161
+
162
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
163
+ BSZ=16; GA=1; EVAL_BSZ=256
164
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
165
+ BSZ=32; GA=1; EVAL_BSZ=256
166
+ else
167
+ BSZ=64; GA=1; EVAL_BSZ=512
168
+ fi
169
+
170
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
171
+ --do_train \
172
+ --predict_with_generate \
173
+ --model_name_or_path $2 \
174
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights \
175
+ --data_dir CL_Benchmark \
176
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
177
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/wic \
178
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic \
179
+ --per_device_train_batch_size $BSZ \
180
+ --per_device_eval_batch_size $EVAL_BSZ \
181
+ --gradient_accumulation_steps $GA \
182
+ --learning_rate 0.0003 \
183
+ --num_train_epochs 10 \
184
+ --run_name gen_script_long_order4_t5_small_specroute \
185
+ --max_source_length 512 \
186
+ --max_target_length 50 \
187
+ --generation_max_length 50 \
188
+ --add_task_name False \
189
+ --add_dataset_name False \
190
+ --overwrite_output_dir \
191
+ --overwrite_cache \
192
+ --lr_scheduler_type constant \
193
+ --warmup_steps 0 \
194
+ --logging_strategy steps \
195
+ --logging_steps 10 \
196
+ --metric_for_best_model eval_exact_match_for_wic \
197
+ --evaluation_strategy steps \
198
+ --save_strategy steps \
199
+ --save_total_limit 1 \
200
+ --load_best_model_at_end \
201
+ --lora_r 8 \
202
+ --lora_alpha 32 \
203
+ --lora_dropout 0.0 \
204
+ --data_replay_freq -1 \
205
+ --mlp_hidden_dim 100 \
206
+ --model_name specroute \
207
+ --threshold 0.995 \
208
+ --transthreshold 0.995 \
209
+ $FP16_FLAG
210
+
211
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/checkpoint*
212
+
213
+ sleep 5
214
+
215
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
216
+ BSZ=16; GA=1; EVAL_BSZ=256
217
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
218
+ BSZ=32; GA=1; EVAL_BSZ=256
219
+ else
220
+ BSZ=64; GA=1; EVAL_BSZ=512
221
+ fi
222
+
223
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
224
+ --do_train \
225
+ --predict_with_generate \
226
+ --model_name_or_path $2 \
227
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights \
228
+ --data_dir CL_Benchmark \
229
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
230
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/copa \
231
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa \
232
+ --per_device_train_batch_size $BSZ \
233
+ --per_device_eval_batch_size $EVAL_BSZ \
234
+ --gradient_accumulation_steps $GA \
235
+ --learning_rate 0.0003 \
236
+ --num_train_epochs 10 \
237
+ --run_name gen_script_long_order4_t5_small_specroute \
238
+ --max_source_length 512 \
239
+ --max_target_length 50 \
240
+ --generation_max_length 50 \
241
+ --add_task_name False \
242
+ --add_dataset_name False \
243
+ --overwrite_output_dir \
244
+ --overwrite_cache \
245
+ --lr_scheduler_type constant \
246
+ --warmup_steps 0 \
247
+ --logging_strategy steps \
248
+ --logging_steps 10 \
249
+ --metric_for_best_model eval_exact_match_for_copa \
250
+ --evaluation_strategy steps \
251
+ --save_strategy steps \
252
+ --save_total_limit 1 \
253
+ --load_best_model_at_end \
254
+ --lora_r 8 \
255
+ --lora_alpha 32 \
256
+ --lora_dropout 0.0 \
257
+ --data_replay_freq -1 \
258
+ --mlp_hidden_dim 100 \
259
+ --model_name specroute \
260
+ --threshold 0.995 \
261
+ --transthreshold 0.995 \
262
+ $FP16_FLAG
263
+
264
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/checkpoint*
265
+
266
+ sleep 5
267
+
268
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
269
+ BSZ=16; GA=1; EVAL_BSZ=256
270
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
271
+ BSZ=32; GA=1; EVAL_BSZ=256
272
+ else
273
+ BSZ=64; GA=1; EVAL_BSZ=512
274
+ fi
275
+
276
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
277
+ --do_train \
278
+ --predict_with_generate \
279
+ --model_name_or_path $2 \
280
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/saved_weights \
281
+ --data_dir CL_Benchmark \
282
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
283
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/qqp \
284
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp \
285
+ --per_device_train_batch_size $BSZ \
286
+ --per_device_eval_batch_size $EVAL_BSZ \
287
+ --gradient_accumulation_steps $GA \
288
+ --learning_rate 0.0003 \
289
+ --num_train_epochs 10 \
290
+ --run_name gen_script_long_order4_t5_small_specroute \
291
+ --max_source_length 512 \
292
+ --max_target_length 50 \
293
+ --generation_max_length 50 \
294
+ --add_task_name False \
295
+ --add_dataset_name False \
296
+ --overwrite_output_dir \
297
+ --overwrite_cache \
298
+ --lr_scheduler_type constant \
299
+ --warmup_steps 0 \
300
+ --logging_strategy steps \
301
+ --logging_steps 10 \
302
+ --metric_for_best_model eval_exact_match_for_qqp \
303
+ --evaluation_strategy steps \
304
+ --save_strategy steps \
305
+ --save_total_limit 1 \
306
+ --load_best_model_at_end \
307
+ --lora_r 8 \
308
+ --lora_alpha 32 \
309
+ --lora_dropout 0.0 \
310
+ --data_replay_freq -1 \
311
+ --mlp_hidden_dim 100 \
312
+ --model_name specroute \
313
+ --threshold 0.995 \
314
+ --transthreshold 0.995 \
315
+ $FP16_FLAG
316
+
317
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp/checkpoint*
318
+
319
+ sleep 5
320
+
321
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
322
+ BSZ=16; GA=1; EVAL_BSZ=256
323
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
324
+ BSZ=32; GA=1; EVAL_BSZ=256
325
+ else
326
+ BSZ=64; GA=1; EVAL_BSZ=512
327
+ fi
328
+
329
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
330
+ --do_train \
331
+ --predict_with_generate \
332
+ --model_name_or_path $2 \
333
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp/saved_weights \
334
+ --data_dir CL_Benchmark \
335
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
336
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/boolq \
337
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/6-boolq \
338
+ --per_device_train_batch_size $BSZ \
339
+ --per_device_eval_batch_size $EVAL_BSZ \
340
+ --gradient_accumulation_steps $GA \
341
+ --learning_rate 0.0003 \
342
+ --num_train_epochs 10 \
343
+ --run_name gen_script_long_order4_t5_small_specroute \
344
+ --max_source_length 512 \
345
+ --max_target_length 50 \
346
+ --generation_max_length 50 \
347
+ --add_task_name False \
348
+ --add_dataset_name False \
349
+ --overwrite_output_dir \
350
+ --overwrite_cache \
351
+ --lr_scheduler_type constant \
352
+ --warmup_steps 0 \
353
+ --logging_strategy steps \
354
+ --logging_steps 10 \
355
+ --metric_for_best_model eval_exact_match_for_boolq \
356
+ --evaluation_strategy steps \
357
+ --save_strategy steps \
358
+ --save_total_limit 1 \
359
+ --load_best_model_at_end \
360
+ --lora_r 8 \
361
+ --lora_alpha 32 \
362
+ --lora_dropout 0.0 \
363
+ --data_replay_freq -1 \
364
+ --mlp_hidden_dim 100 \
365
+ --model_name specroute \
366
+ --threshold 0.995 \
367
+ --transthreshold 0.995 \
368
+ $FP16_FLAG
369
+
370
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/6-boolq/checkpoint*
371
+
372
+ sleep 5
373
+
374
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
375
+ BSZ=16; GA=1; EVAL_BSZ=256
376
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
377
+ BSZ=32; GA=1; EVAL_BSZ=256
378
+ else
379
+ BSZ=64; GA=1; EVAL_BSZ=512
380
+ fi
381
+
382
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
383
+ --do_train \
384
+ --predict_with_generate \
385
+ --model_name_or_path $2 \
386
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/6-boolq/saved_weights \
387
+ --data_dir CL_Benchmark \
388
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
389
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/rte \
390
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/7-rte \
391
+ --per_device_train_batch_size $BSZ \
392
+ --per_device_eval_batch_size $EVAL_BSZ \
393
+ --gradient_accumulation_steps $GA \
394
+ --learning_rate 0.0003 \
395
+ --num_train_epochs 10 \
396
+ --run_name gen_script_long_order4_t5_small_specroute \
397
+ --max_source_length 512 \
398
+ --max_target_length 50 \
399
+ --generation_max_length 50 \
400
+ --add_task_name False \
401
+ --add_dataset_name False \
402
+ --overwrite_output_dir \
403
+ --overwrite_cache \
404
+ --lr_scheduler_type constant \
405
+ --warmup_steps 0 \
406
+ --logging_strategy steps \
407
+ --logging_steps 10 \
408
+ --metric_for_best_model eval_exact_match_for_rte \
409
+ --evaluation_strategy steps \
410
+ --save_strategy steps \
411
+ --save_total_limit 1 \
412
+ --load_best_model_at_end \
413
+ --lora_r 8 \
414
+ --lora_alpha 32 \
415
+ --lora_dropout 0.0 \
416
+ --data_replay_freq -1 \
417
+ --mlp_hidden_dim 100 \
418
+ --model_name specroute \
419
+ --threshold 0.995 \
420
+ --transthreshold 0.995 \
421
+ $FP16_FLAG
422
+
423
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/7-rte/checkpoint*
424
+
425
+ sleep 5
426
+
427
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
428
+ BSZ=16; GA=1; EVAL_BSZ=256
429
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
430
+ BSZ=32; GA=1; EVAL_BSZ=256
431
+ else
432
+ BSZ=64; GA=1; EVAL_BSZ=512
433
+ fi
434
+
435
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
436
+ --do_train \
437
+ --predict_with_generate \
438
+ --model_name_or_path $2 \
439
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/7-rte/saved_weights \
440
+ --data_dir CL_Benchmark \
441
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
442
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/imdb \
443
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/8-imdb \
444
+ --per_device_train_batch_size $BSZ \
445
+ --per_device_eval_batch_size $EVAL_BSZ \
446
+ --gradient_accumulation_steps $GA \
447
+ --learning_rate 0.0003 \
448
+ --num_train_epochs 10 \
449
+ --run_name gen_script_long_order4_t5_small_specroute \
450
+ --max_source_length 512 \
451
+ --max_target_length 50 \
452
+ --generation_max_length 50 \
453
+ --add_task_name False \
454
+ --add_dataset_name False \
455
+ --overwrite_output_dir \
456
+ --overwrite_cache \
457
+ --lr_scheduler_type constant \
458
+ --warmup_steps 0 \
459
+ --logging_strategy steps \
460
+ --logging_steps 10 \
461
+ --metric_for_best_model eval_exact_match_for_imdb \
462
+ --evaluation_strategy steps \
463
+ --save_strategy steps \
464
+ --save_total_limit 1 \
465
+ --load_best_model_at_end \
466
+ --lora_r 8 \
467
+ --lora_alpha 32 \
468
+ --lora_dropout 0.0 \
469
+ --data_replay_freq -1 \
470
+ --mlp_hidden_dim 100 \
471
+ --model_name specroute \
472
+ --threshold 0.995 \
473
+ --transthreshold 0.995 \
474
+ $FP16_FLAG
475
+
476
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/8-imdb/checkpoint*
477
+
478
+ sleep 5
479
+
480
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
481
+ BSZ=16; GA=1; EVAL_BSZ=256
482
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
483
+ BSZ=32; GA=1; EVAL_BSZ=256
484
+ else
485
+ BSZ=64; GA=1; EVAL_BSZ=512
486
+ fi
487
+
488
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
489
+ --do_train \
490
+ --predict_with_generate \
491
+ --model_name_or_path $2 \
492
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/8-imdb/saved_weights \
493
+ --data_dir CL_Benchmark \
494
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
495
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/yelp \
496
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/9-yelp \
497
+ --per_device_train_batch_size $BSZ \
498
+ --per_device_eval_batch_size $EVAL_BSZ \
499
+ --gradient_accumulation_steps $GA \
500
+ --learning_rate 0.0003 \
501
+ --num_train_epochs 10 \
502
+ --run_name gen_script_long_order4_t5_small_specroute \
503
+ --max_source_length 512 \
504
+ --max_target_length 50 \
505
+ --generation_max_length 50 \
506
+ --add_task_name False \
507
+ --add_dataset_name False \
508
+ --overwrite_output_dir \
509
+ --overwrite_cache \
510
+ --lr_scheduler_type constant \
511
+ --warmup_steps 0 \
512
+ --logging_strategy steps \
513
+ --logging_steps 10 \
514
+ --metric_for_best_model eval_exact_match_for_yelp \
515
+ --evaluation_strategy steps \
516
+ --save_strategy steps \
517
+ --save_total_limit 1 \
518
+ --load_best_model_at_end \
519
+ --lora_r 8 \
520
+ --lora_alpha 32 \
521
+ --lora_dropout 0.0 \
522
+ --data_replay_freq -1 \
523
+ --mlp_hidden_dim 100 \
524
+ --model_name specroute \
525
+ --threshold 0.995 \
526
+ --transthreshold 0.995 \
527
+ $FP16_FLAG
528
+
529
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/9-yelp/checkpoint*
530
+
531
+ sleep 5
532
+
533
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
534
+ BSZ=16; GA=1; EVAL_BSZ=256
535
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
536
+ BSZ=32; GA=1; EVAL_BSZ=256
537
+ else
538
+ BSZ=64; GA=1; EVAL_BSZ=512
539
+ fi
540
+
541
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
542
+ --do_train \
543
+ --predict_with_generate \
544
+ --model_name_or_path $2 \
545
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/9-yelp/saved_weights \
546
+ --data_dir CL_Benchmark \
547
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
548
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/amazon \
549
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/10-amazon \
550
+ --per_device_train_batch_size $BSZ \
551
+ --per_device_eval_batch_size $EVAL_BSZ \
552
+ --gradient_accumulation_steps $GA \
553
+ --learning_rate 0.0003 \
554
+ --num_train_epochs 10 \
555
+ --run_name gen_script_long_order4_t5_small_specroute \
556
+ --max_source_length 512 \
557
+ --max_target_length 50 \
558
+ --generation_max_length 50 \
559
+ --add_task_name False \
560
+ --add_dataset_name False \
561
+ --overwrite_output_dir \
562
+ --overwrite_cache \
563
+ --lr_scheduler_type constant \
564
+ --warmup_steps 0 \
565
+ --logging_strategy steps \
566
+ --logging_steps 10 \
567
+ --metric_for_best_model eval_exact_match_for_amazon \
568
+ --evaluation_strategy steps \
569
+ --save_strategy steps \
570
+ --save_total_limit 1 \
571
+ --load_best_model_at_end \
572
+ --lora_r 8 \
573
+ --lora_alpha 32 \
574
+ --lora_dropout 0.0 \
575
+ --data_replay_freq -1 \
576
+ --mlp_hidden_dim 100 \
577
+ --model_name specroute \
578
+ --threshold 0.995 \
579
+ --transthreshold 0.995 \
580
+ $FP16_FLAG
581
+
582
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/10-amazon/checkpoint*
583
+
584
+ sleep 5
585
+
586
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
587
+ BSZ=16; GA=1; EVAL_BSZ=256
588
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
589
+ BSZ=32; GA=1; EVAL_BSZ=256
590
+ else
591
+ BSZ=64; GA=1; EVAL_BSZ=512
592
+ fi
593
+
594
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
595
+ --do_train \
596
+ --predict_with_generate \
597
+ --model_name_or_path $2 \
598
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/10-amazon/saved_weights \
599
+ --data_dir CL_Benchmark \
600
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
601
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/sst2 \
602
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/11-sst2 \
603
+ --per_device_train_batch_size $BSZ \
604
+ --per_device_eval_batch_size $EVAL_BSZ \
605
+ --gradient_accumulation_steps $GA \
606
+ --learning_rate 0.0003 \
607
+ --num_train_epochs 10 \
608
+ --run_name gen_script_long_order4_t5_small_specroute \
609
+ --max_source_length 512 \
610
+ --max_target_length 50 \
611
+ --generation_max_length 50 \
612
+ --add_task_name False \
613
+ --add_dataset_name False \
614
+ --overwrite_output_dir \
615
+ --overwrite_cache \
616
+ --lr_scheduler_type constant \
617
+ --warmup_steps 0 \
618
+ --logging_strategy steps \
619
+ --logging_steps 10 \
620
+ --metric_for_best_model eval_exact_match_for_sst2 \
621
+ --evaluation_strategy steps \
622
+ --save_strategy steps \
623
+ --save_total_limit 1 \
624
+ --load_best_model_at_end \
625
+ --lora_r 8 \
626
+ --lora_alpha 32 \
627
+ --lora_dropout 0.0 \
628
+ --data_replay_freq -1 \
629
+ --mlp_hidden_dim 100 \
630
+ --model_name specroute \
631
+ --threshold 0.995 \
632
+ --transthreshold 0.995 \
633
+ $FP16_FLAG
634
+
635
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/11-sst2/checkpoint*
636
+
637
+ sleep 5
638
+
639
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
640
+ BSZ=16; GA=1; EVAL_BSZ=256
641
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
642
+ BSZ=32; GA=1; EVAL_BSZ=256
643
+ else
644
+ BSZ=64; GA=1; EVAL_BSZ=512
645
+ fi
646
+
647
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
648
+ --do_train \
649
+ --predict_with_generate \
650
+ --model_name_or_path $2 \
651
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/11-sst2/saved_weights \
652
+ --data_dir CL_Benchmark \
653
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
654
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/dbpedia \
655
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/12-dbpedia \
656
+ --per_device_train_batch_size $BSZ \
657
+ --per_device_eval_batch_size $EVAL_BSZ \
658
+ --gradient_accumulation_steps $GA \
659
+ --learning_rate 0.0003 \
660
+ --num_train_epochs 10 \
661
+ --run_name gen_script_long_order4_t5_small_specroute \
662
+ --max_source_length 512 \
663
+ --max_target_length 50 \
664
+ --generation_max_length 50 \
665
+ --add_task_name False \
666
+ --add_dataset_name False \
667
+ --overwrite_output_dir \
668
+ --overwrite_cache \
669
+ --lr_scheduler_type constant \
670
+ --warmup_steps 0 \
671
+ --logging_strategy steps \
672
+ --logging_steps 10 \
673
+ --metric_for_best_model eval_exact_match_for_dbpedia \
674
+ --evaluation_strategy steps \
675
+ --save_strategy steps \
676
+ --save_total_limit 1 \
677
+ --load_best_model_at_end \
678
+ --lora_r 8 \
679
+ --lora_alpha 32 \
680
+ --lora_dropout 0.0 \
681
+ --data_replay_freq -1 \
682
+ --mlp_hidden_dim 100 \
683
+ --model_name specroute \
684
+ --threshold 0.995 \
685
+ --transthreshold 0.995 \
686
+ $FP16_FLAG
687
+
688
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/12-dbpedia/checkpoint*
689
+
690
+ sleep 5
691
+
692
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
693
+ BSZ=16; GA=1; EVAL_BSZ=256
694
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
695
+ BSZ=32; GA=1; EVAL_BSZ=256
696
+ else
697
+ BSZ=64; GA=1; EVAL_BSZ=512
698
+ fi
699
+
700
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
701
+ --do_train \
702
+ --predict_with_generate \
703
+ --model_name_or_path $2 \
704
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/12-dbpedia/saved_weights \
705
+ --data_dir CL_Benchmark \
706
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
707
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/agnews \
708
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/13-agnews \
709
+ --per_device_train_batch_size $BSZ \
710
+ --per_device_eval_batch_size $EVAL_BSZ \
711
+ --gradient_accumulation_steps $GA \
712
+ --learning_rate 0.0003 \
713
+ --num_train_epochs 10 \
714
+ --run_name gen_script_long_order4_t5_small_specroute \
715
+ --max_source_length 512 \
716
+ --max_target_length 50 \
717
+ --generation_max_length 50 \
718
+ --add_task_name False \
719
+ --add_dataset_name False \
720
+ --overwrite_output_dir \
721
+ --overwrite_cache \
722
+ --lr_scheduler_type constant \
723
+ --warmup_steps 0 \
724
+ --logging_strategy steps \
725
+ --logging_steps 10 \
726
+ --metric_for_best_model eval_exact_match_for_agnews \
727
+ --evaluation_strategy steps \
728
+ --save_strategy steps \
729
+ --save_total_limit 1 \
730
+ --load_best_model_at_end \
731
+ --lora_r 8 \
732
+ --lora_alpha 32 \
733
+ --lora_dropout 0.0 \
734
+ --data_replay_freq -1 \
735
+ --mlp_hidden_dim 100 \
736
+ --model_name specroute \
737
+ --threshold 0.995 \
738
+ --transthreshold 0.995 \
739
+ $FP16_FLAG
740
+
741
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/13-agnews/checkpoint*
742
+
743
+ sleep 5
744
+
745
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
746
+ BSZ=16; GA=1; EVAL_BSZ=256
747
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
748
+ BSZ=32; GA=1; EVAL_BSZ=256
749
+ else
750
+ BSZ=64; GA=1; EVAL_BSZ=512
751
+ fi
752
+
753
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
754
+ --do_train \
755
+ --predict_with_generate \
756
+ --model_name_or_path $2 \
757
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/12-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/13-agnews/saved_weights \
758
+ --data_dir CL_Benchmark \
759
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
760
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/multirc \
761
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/14-multirc \
762
+ --per_device_train_batch_size $BSZ \
763
+ --per_device_eval_batch_size $EVAL_BSZ \
764
+ --gradient_accumulation_steps $GA \
765
+ --learning_rate 0.0003 \
766
+ --num_train_epochs 10 \
767
+ --run_name gen_script_long_order4_t5_small_specroute \
768
+ --max_source_length 512 \
769
+ --max_target_length 50 \
770
+ --generation_max_length 50 \
771
+ --add_task_name False \
772
+ --add_dataset_name False \
773
+ --overwrite_output_dir \
774
+ --overwrite_cache \
775
+ --lr_scheduler_type constant \
776
+ --warmup_steps 0 \
777
+ --logging_strategy steps \
778
+ --logging_steps 10 \
779
+ --metric_for_best_model eval_exact_match_for_multirc \
780
+ --evaluation_strategy steps \
781
+ --save_strategy steps \
782
+ --save_total_limit 1 \
783
+ --load_best_model_at_end \
784
+ --lora_r 8 \
785
+ --lora_alpha 32 \
786
+ --lora_dropout 0.0 \
787
+ --data_replay_freq -1 \
788
+ --mlp_hidden_dim 100 \
789
+ --model_name specroute \
790
+ --threshold 0.995 \
791
+ --transthreshold 0.995 \
792
+ $FP16_FLAG
793
+
794
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/14-multirc/checkpoint*
795
+
796
+ sleep 5
797
+
798
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
799
+ BSZ=16; GA=1; EVAL_BSZ=256
800
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
801
+ BSZ=32; GA=1; EVAL_BSZ=256
802
+ else
803
+ BSZ=64; GA=1; EVAL_BSZ=512
804
+ fi
805
+
806
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
807
+ --do_train \
808
+ --predict_with_generate \
809
+ --model_name_or_path $2 \
810
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/12-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/13-agnews/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/14-multirc/saved_weights \
811
+ --data_dir CL_Benchmark \
812
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
813
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/yahoo \
814
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/15-yahoo \
815
+ --per_device_train_batch_size $BSZ \
816
+ --per_device_eval_batch_size $EVAL_BSZ \
817
+ --gradient_accumulation_steps $GA \
818
+ --learning_rate 0.0003 \
819
+ --num_train_epochs 10 \
820
+ --run_name gen_script_long_order4_t5_small_specroute \
821
+ --max_source_length 512 \
822
+ --max_target_length 50 \
823
+ --generation_max_length 50 \
824
+ --add_task_name False \
825
+ --add_dataset_name False \
826
+ --overwrite_output_dir \
827
+ --overwrite_cache \
828
+ --lr_scheduler_type constant \
829
+ --warmup_steps 0 \
830
+ --logging_strategy steps \
831
+ --logging_steps 10 \
832
+ --metric_for_best_model eval_exact_match_for_yahoo \
833
+ --evaluation_strategy steps \
834
+ --save_strategy steps \
835
+ --save_total_limit 1 \
836
+ --load_best_model_at_end \
837
+ --lora_r 8 \
838
+ --lora_alpha 32 \
839
+ --lora_dropout 0.0 \
840
+ --data_replay_freq -1 \
841
+ --mlp_hidden_dim 100 \
842
+ --model_name specroute \
843
+ --threshold 0.995 \
844
+ --transthreshold 0.995 \
845
+ $FP16_FLAG
846
+
847
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_specroute/outputs/15-yahoo/checkpoint*
848
+
849
+ sleep 5
improve_gainlora/T5_small/gen_script_superni_order1_t5_small_gainlora_inflora.sh ADDED
@@ -0,0 +1,744 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
21
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1572_samsum_summary \
22
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary \
23
+ --per_device_train_batch_size 16 \
24
+ --per_device_eval_batch_size 8 \
25
+ --gradient_accumulation_steps 2 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 100 \
28
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_rougeL \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --lora_r 4 \
45
+ --lora_alpha 32 \
46
+ --lora_dropout 0.0 \
47
+ --load_best_model_at_end \
48
+ --data_replay_freq -1 \
49
+ --replay_after_n_epoch 0 \
50
+ --kl_ratio 0.5 \
51
+ --attn_temperature 1 \
52
+ --mlp_hidden_dim 100 \
53
+ --model_name gainlora_inflora \
54
+ --threshold 0.995 \
55
+ --transthreshold 0.995
56
+
57
+
58
+
59
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
60
+ --do_train \
61
+ --do_predict \
62
+ --predict_with_generate \
63
+ --model_name_or_path $2 \
64
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights/trans_input.pt \
65
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights \
66
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights/prompts_keys_till_now.pt \
67
+ --data_dir CL_Benchmark \
68
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
69
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
70
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task363_sst2_polarity_classification \
71
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification \
72
+ --per_device_train_batch_size 16 \
73
+ --per_device_eval_batch_size 8 \
74
+ --gradient_accumulation_steps 2 \
75
+ --learning_rate 0.0003 \
76
+ --num_train_epochs 100 \
77
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
78
+ --max_source_length 512 \
79
+ --max_target_length 50 \
80
+ --generation_max_length 50 \
81
+ --add_task_name False \
82
+ --add_dataset_name False \
83
+ --overwrite_output_dir \
84
+ --overwrite_cache \
85
+ --lr_scheduler_type constant \
86
+ --warmup_steps 0 \
87
+ --logging_strategy steps \
88
+ --logging_steps 10 \
89
+ --metric_for_best_model eval_rougeL_for_task363_sst2_polarity_classification \
90
+ --evaluation_strategy steps \
91
+ --save_strategy steps \
92
+ --save_total_limit 1 \
93
+ --load_best_model_at_end \
94
+ --lora_r 4 \
95
+ --lora_alpha 32 \
96
+ --lora_dropout 0.0 \
97
+ --add_instruction_replay \
98
+ --data_replay_freq -1 \
99
+ --replay_after_n_epoch 0 \
100
+ --kl_ratio 0.5 \
101
+ --attn_temperature 1 \
102
+ --mlp_hidden_dim 100 \
103
+ --model_name gainlora_inflora \
104
+ --threshold 0.995 \
105
+ --transthreshold 0.995
106
+
107
+
108
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
109
+ --do_train \
110
+ --do_predict \
111
+ --predict_with_generate \
112
+ --model_name_or_path $2 \
113
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights/trans_input.pt \
114
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights \
115
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights/prompts_keys_till_now.pt \
116
+ --data_dir CL_Benchmark \
117
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
118
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
119
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1290_xsum_summarization \
120
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization \
121
+ --per_device_train_batch_size 16 \
122
+ --per_device_eval_batch_size 8 \
123
+ --gradient_accumulation_steps 2 \
124
+ --learning_rate 0.0003 \
125
+ --num_train_epochs 100 \
126
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
127
+ --max_source_length 512 \
128
+ --max_target_length 50 \
129
+ --generation_max_length 50 \
130
+ --add_task_name False \
131
+ --add_dataset_name False \
132
+ --overwrite_output_dir \
133
+ --overwrite_cache \
134
+ --lr_scheduler_type constant \
135
+ --warmup_steps 0 \
136
+ --logging_strategy steps \
137
+ --logging_steps 10 \
138
+ --metric_for_best_model eval_rougeL_for_task1290_xsum_summarization \
139
+ --evaluation_strategy steps \
140
+ --save_strategy steps \
141
+ --save_total_limit 1 \
142
+ --load_best_model_at_end \
143
+ --lora_r 4 \
144
+ --lora_alpha 32 \
145
+ --lora_dropout 0.0 \
146
+ --add_instruction_replay \
147
+ --data_replay_freq -1 \
148
+ --replay_after_n_epoch 0 \
149
+ --kl_ratio 0.5 \
150
+ --attn_temperature 1 \
151
+ --mlp_hidden_dim 100 \
152
+ --model_name gainlora_inflora \
153
+ --threshold 0.995 \
154
+ --transthreshold 0.995
155
+
156
+
157
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
158
+ --do_train \
159
+ --do_predict \
160
+ --predict_with_generate \
161
+ --model_name_or_path $2 \
162
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights/trans_input.pt \
163
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights \
164
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights/prompts_keys_till_now.pt \
165
+ --data_dir CL_Benchmark \
166
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
167
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
168
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task181_outcome_extraction \
169
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction \
170
+ --per_device_train_batch_size 16 \
171
+ --per_device_eval_batch_size 8 \
172
+ --gradient_accumulation_steps 2 \
173
+ --learning_rate 0.0003 \
174
+ --num_train_epochs 100 \
175
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
176
+ --max_source_length 512 \
177
+ --max_target_length 50 \
178
+ --generation_max_length 50 \
179
+ --add_task_name False \
180
+ --add_dataset_name False \
181
+ --overwrite_output_dir \
182
+ --overwrite_cache \
183
+ --lr_scheduler_type constant \
184
+ --warmup_steps 0 \
185
+ --logging_strategy steps \
186
+ --logging_steps 10 \
187
+ --metric_for_best_model eval_rougeL_for_task181_outcome_extraction \
188
+ --evaluation_strategy steps \
189
+ --save_strategy steps \
190
+ --save_total_limit 1 \
191
+ --load_best_model_at_end \
192
+ --lora_r 4 \
193
+ --lora_alpha 32 \
194
+ --lora_dropout 0.0 \
195
+ --add_instruction_replay \
196
+ --data_replay_freq -1 \
197
+ --replay_after_n_epoch 0 \
198
+ --kl_ratio 0.5 \
199
+ --attn_temperature 1 \
200
+ --mlp_hidden_dim 100 \
201
+ --model_name gainlora_inflora \
202
+ --threshold 0.995 \
203
+ --transthreshold 0.995
204
+
205
+
206
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
207
+ --do_train \
208
+ --do_predict \
209
+ --predict_with_generate \
210
+ --model_name_or_path $2 \
211
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights/trans_input.pt \
212
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights \
213
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights/prompts_keys_till_now.pt \
214
+ --data_dir CL_Benchmark \
215
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
216
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
217
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task002_quoref_answer_generation \
218
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation \
219
+ --per_device_train_batch_size 16 \
220
+ --per_device_eval_batch_size 8 \
221
+ --gradient_accumulation_steps 2 \
222
+ --learning_rate 0.0003 \
223
+ --num_train_epochs 100 \
224
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
225
+ --max_source_length 512 \
226
+ --max_target_length 50 \
227
+ --generation_max_length 50 \
228
+ --add_task_name False \
229
+ --add_dataset_name False \
230
+ --overwrite_output_dir \
231
+ --overwrite_cache \
232
+ --lr_scheduler_type constant \
233
+ --warmup_steps 0 \
234
+ --logging_strategy steps \
235
+ --logging_steps 10 \
236
+ --metric_for_best_model eval_rougeL_for_task002_quoref_answer_generation \
237
+ --evaluation_strategy steps \
238
+ --save_strategy steps \
239
+ --save_total_limit 1 \
240
+ --load_best_model_at_end \
241
+ --lora_r 4 \
242
+ --lora_alpha 32 \
243
+ --lora_dropout 0.0 \
244
+ --add_instruction_replay \
245
+ --data_replay_freq -1 \
246
+ --replay_after_n_epoch 0 \
247
+ --kl_ratio 0.5 \
248
+ --attn_temperature 1 \
249
+ --mlp_hidden_dim 100 \
250
+ --model_name gainlora_inflora \
251
+ --threshold 0.995 \
252
+ --transthreshold 0.995
253
+
254
+
255
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
256
+ --do_train \
257
+ --do_predict \
258
+ --predict_with_generate \
259
+ --model_name_or_path $2 \
260
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights/trans_input.pt \
261
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights \
262
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights/prompts_keys_till_now.pt \
263
+ --data_dir CL_Benchmark \
264
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
265
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
266
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1510_evalution_relation_extraction \
267
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction \
268
+ --per_device_train_batch_size 16 \
269
+ --per_device_eval_batch_size 8 \
270
+ --gradient_accumulation_steps 2 \
271
+ --learning_rate 0.0003 \
272
+ --num_train_epochs 100 \
273
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
274
+ --max_source_length 512 \
275
+ --max_target_length 50 \
276
+ --generation_max_length 50 \
277
+ --add_task_name False \
278
+ --add_dataset_name False \
279
+ --overwrite_output_dir \
280
+ --overwrite_cache \
281
+ --lr_scheduler_type constant \
282
+ --warmup_steps 0 \
283
+ --logging_strategy steps \
284
+ --logging_steps 10 \
285
+ --metric_for_best_model eval_rougeL_for_task1510_evalution_relation_extraction \
286
+ --evaluation_strategy steps \
287
+ --save_strategy steps \
288
+ --save_total_limit 1 \
289
+ --load_best_model_at_end \
290
+ --lora_r 4 \
291
+ --lora_alpha 32 \
292
+ --lora_dropout 0.0 \
293
+ --add_instruction_replay \
294
+ --data_replay_freq -1 \
295
+ --replay_after_n_epoch 0 \
296
+ --kl_ratio 0.5 \
297
+ --attn_temperature 1 \
298
+ --mlp_hidden_dim 100 \
299
+ --model_name gainlora_inflora \
300
+ --threshold 0.995 \
301
+ --transthreshold 0.995
302
+
303
+
304
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
305
+ --do_train \
306
+ --do_predict \
307
+ --predict_with_generate \
308
+ --model_name_or_path $2 \
309
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights/trans_input.pt \
310
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights \
311
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights/prompts_keys_till_now.pt \
312
+ --data_dir CL_Benchmark \
313
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
314
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
315
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task639_multi_woz_user_utterance_generation \
316
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation \
317
+ --per_device_train_batch_size 16 \
318
+ --per_device_eval_batch_size 8 \
319
+ --gradient_accumulation_steps 2 \
320
+ --learning_rate 0.0003 \
321
+ --num_train_epochs 100 \
322
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
323
+ --max_source_length 512 \
324
+ --max_target_length 50 \
325
+ --generation_max_length 50 \
326
+ --add_task_name False \
327
+ --add_dataset_name False \
328
+ --overwrite_output_dir \
329
+ --overwrite_cache \
330
+ --lr_scheduler_type constant \
331
+ --warmup_steps 0 \
332
+ --logging_strategy steps \
333
+ --logging_steps 10 \
334
+ --metric_for_best_model eval_rougeL_for_task639_multi_woz_user_utterance_generation \
335
+ --evaluation_strategy steps \
336
+ --save_strategy steps \
337
+ --save_total_limit 1 \
338
+ --load_best_model_at_end \
339
+ --lora_r 4 \
340
+ --lora_alpha 32 \
341
+ --lora_dropout 0.0 \
342
+ --add_instruction_replay \
343
+ --data_replay_freq -1 \
344
+ --replay_after_n_epoch 0 \
345
+ --kl_ratio 0.5 \
346
+ --attn_temperature 1 \
347
+ --mlp_hidden_dim 100 \
348
+ --model_name gainlora_inflora \
349
+ --threshold 0.995 \
350
+ --transthreshold 0.995
351
+
352
+
353
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
354
+ --do_train \
355
+ --do_predict \
356
+ --predict_with_generate \
357
+ --model_name_or_path $2 \
358
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights/trans_input.pt \
359
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights \
360
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights/prompts_keys_till_now.pt \
361
+ --data_dir CL_Benchmark \
362
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
363
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
364
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1729_personachat_generate_next \
365
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next \
366
+ --per_device_train_batch_size 16 \
367
+ --per_device_eval_batch_size 8 \
368
+ --gradient_accumulation_steps 2 \
369
+ --learning_rate 0.0003 \
370
+ --num_train_epochs 100 \
371
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
372
+ --max_source_length 512 \
373
+ --max_target_length 50 \
374
+ --generation_max_length 50 \
375
+ --add_task_name False \
376
+ --add_dataset_name False \
377
+ --overwrite_output_dir \
378
+ --overwrite_cache \
379
+ --lr_scheduler_type constant \
380
+ --warmup_steps 0 \
381
+ --logging_strategy steps \
382
+ --logging_steps 10 \
383
+ --metric_for_best_model eval_rougeL_for_task1729_personachat_generate_next \
384
+ --evaluation_strategy steps \
385
+ --save_strategy steps \
386
+ --save_total_limit 1 \
387
+ --load_best_model_at_end \
388
+ --lora_r 4 \
389
+ --lora_alpha 32 \
390
+ --lora_dropout 0.0 \
391
+ --add_instruction_replay \
392
+ --data_replay_freq -1 \
393
+ --replay_after_n_epoch 0 \
394
+ --kl_ratio 0.5 \
395
+ --attn_temperature 1 \
396
+ --mlp_hidden_dim 100 \
397
+ --model_name gainlora_inflora \
398
+ --threshold 0.995 \
399
+ --transthreshold 0.995
400
+
401
+
402
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
403
+ --do_train \
404
+ --do_predict \
405
+ --predict_with_generate \
406
+ --model_name_or_path $2 \
407
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights/trans_input.pt \
408
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights \
409
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights/prompts_keys_till_now.pt \
410
+ --data_dir CL_Benchmark \
411
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
412
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
413
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task073_commonsenseqa_answer_generation \
414
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation \
415
+ --per_device_train_batch_size 16 \
416
+ --per_device_eval_batch_size 8 \
417
+ --gradient_accumulation_steps 2 \
418
+ --learning_rate 0.0003 \
419
+ --num_train_epochs 100 \
420
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
421
+ --max_source_length 512 \
422
+ --max_target_length 50 \
423
+ --generation_max_length 50 \
424
+ --add_task_name False \
425
+ --add_dataset_name False \
426
+ --overwrite_output_dir \
427
+ --overwrite_cache \
428
+ --lr_scheduler_type constant \
429
+ --warmup_steps 0 \
430
+ --logging_strategy steps \
431
+ --logging_steps 10 \
432
+ --metric_for_best_model eval_rougeL_for_task073_commonsenseqa_answer_generation \
433
+ --evaluation_strategy steps \
434
+ --save_strategy steps \
435
+ --save_total_limit 1 \
436
+ --load_best_model_at_end \
437
+ --lora_r 4 \
438
+ --lora_alpha 32 \
439
+ --lora_dropout 0.0 \
440
+ --add_instruction_replay \
441
+ --data_replay_freq -1 \
442
+ --replay_after_n_epoch 0 \
443
+ --kl_ratio 0.5 \
444
+ --attn_temperature 1 \
445
+ --mlp_hidden_dim 100 \
446
+ --model_name gainlora_inflora \
447
+ --threshold 0.995 \
448
+ --transthreshold 0.995
449
+
450
+
451
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
452
+ --do_train \
453
+ --do_predict \
454
+ --predict_with_generate \
455
+ --model_name_or_path $2 \
456
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights/trans_input.pt \
457
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights \
458
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights/prompts_keys_till_now.pt \
459
+ --data_dir CL_Benchmark \
460
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
461
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
462
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1590_diplomacy_text_generation \
463
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation \
464
+ --per_device_train_batch_size 16 \
465
+ --per_device_eval_batch_size 8 \
466
+ --gradient_accumulation_steps 2 \
467
+ --learning_rate 0.0003 \
468
+ --num_train_epochs 100 \
469
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
470
+ --max_source_length 512 \
471
+ --max_target_length 50 \
472
+ --generation_max_length 50 \
473
+ --add_task_name False \
474
+ --add_dataset_name False \
475
+ --overwrite_output_dir \
476
+ --overwrite_cache \
477
+ --lr_scheduler_type constant \
478
+ --warmup_steps 0 \
479
+ --logging_strategy steps \
480
+ --logging_steps 10 \
481
+ --metric_for_best_model eval_rougeL_for_task1590_diplomacy_text_generation \
482
+ --evaluation_strategy steps \
483
+ --save_strategy steps \
484
+ --save_total_limit 1 \
485
+ --load_best_model_at_end \
486
+ --lora_r 4 \
487
+ --lora_alpha 32 \
488
+ --lora_dropout 0.0 \
489
+ --add_instruction_replay \
490
+ --data_replay_freq -1 \
491
+ --replay_after_n_epoch 0 \
492
+ --kl_ratio 0.5 \
493
+ --attn_temperature 1 \
494
+ --mlp_hidden_dim 100 \
495
+ --model_name gainlora_inflora \
496
+ --threshold 0.995 \
497
+ --transthreshold 0.995
498
+
499
+
500
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
501
+ --do_train \
502
+ --do_predict \
503
+ --predict_with_generate \
504
+ --model_name_or_path $2 \
505
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights/trans_input.pt \
506
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights \
507
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights/prompts_keys_till_now.pt \
508
+ --data_dir CL_Benchmark \
509
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
510
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
511
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task748_glucose_reverse_cause_event_detection \
512
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection \
513
+ --per_device_train_batch_size 16 \
514
+ --per_device_eval_batch_size 8 \
515
+ --gradient_accumulation_steps 2 \
516
+ --learning_rate 0.0003 \
517
+ --num_train_epochs 100 \
518
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
519
+ --max_source_length 512 \
520
+ --max_target_length 50 \
521
+ --generation_max_length 50 \
522
+ --add_task_name False \
523
+ --add_dataset_name False \
524
+ --overwrite_output_dir \
525
+ --overwrite_cache \
526
+ --lr_scheduler_type constant \
527
+ --warmup_steps 0 \
528
+ --logging_strategy steps \
529
+ --logging_steps 10 \
530
+ --metric_for_best_model eval_rougeL_for_task748_glucose_reverse_cause_event_detection \
531
+ --evaluation_strategy steps \
532
+ --save_strategy steps \
533
+ --save_total_limit 1 \
534
+ --load_best_model_at_end \
535
+ --lora_r 4 \
536
+ --lora_alpha 32 \
537
+ --lora_dropout 0.0 \
538
+ --add_instruction_replay \
539
+ --data_replay_freq -1 \
540
+ --replay_after_n_epoch 0 \
541
+ --kl_ratio 0.5 \
542
+ --attn_temperature 1 \
543
+ --mlp_hidden_dim 100 \
544
+ --model_name gainlora_inflora \
545
+ --threshold 0.995 \
546
+ --transthreshold 0.995
547
+
548
+
549
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
550
+ --do_train \
551
+ --do_predict \
552
+ --predict_with_generate \
553
+ --model_name_or_path $2 \
554
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights/trans_input.pt \
555
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights \
556
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights/prompts_keys_till_now.pt \
557
+ --data_dir CL_Benchmark \
558
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
559
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
560
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task511_reddit_tifu_long_text_summarization \
561
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization \
562
+ --per_device_train_batch_size 16 \
563
+ --per_device_eval_batch_size 8 \
564
+ --gradient_accumulation_steps 2 \
565
+ --learning_rate 0.0003 \
566
+ --num_train_epochs 100 \
567
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
568
+ --max_source_length 512 \
569
+ --max_target_length 50 \
570
+ --generation_max_length 50 \
571
+ --add_task_name False \
572
+ --add_dataset_name False \
573
+ --overwrite_output_dir \
574
+ --overwrite_cache \
575
+ --lr_scheduler_type constant \
576
+ --warmup_steps 0 \
577
+ --logging_strategy steps \
578
+ --logging_steps 10 \
579
+ --metric_for_best_model eval_rougeL_for_task511_reddit_tifu_long_text_summarization \
580
+ --evaluation_strategy steps \
581
+ --save_strategy steps \
582
+ --save_total_limit 1 \
583
+ --load_best_model_at_end \
584
+ --lora_r 4 \
585
+ --lora_alpha 32 \
586
+ --lora_dropout 0.0 \
587
+ --add_instruction_replay \
588
+ --data_replay_freq -1 \
589
+ --replay_after_n_epoch 0 \
590
+ --kl_ratio 0.5 \
591
+ --attn_temperature 1 \
592
+ --mlp_hidden_dim 100 \
593
+ --model_name gainlora_inflora \
594
+ --threshold 0.995 \
595
+ --transthreshold 0.995
596
+
597
+
598
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
599
+ --do_train \
600
+ --do_predict \
601
+ --predict_with_generate \
602
+ --model_name_or_path $2 \
603
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/trans_input.pt \
604
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights \
605
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/prompts_keys_till_now.pt \
606
+ --data_dir CL_Benchmark \
607
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
608
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
609
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task591_sciq_answer_generation \
610
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/13-task591_sciq_answer_generation \
611
+ --per_device_train_batch_size 16 \
612
+ --per_device_eval_batch_size 8 \
613
+ --gradient_accumulation_steps 2 \
614
+ --learning_rate 0.0003 \
615
+ --num_train_epochs 100 \
616
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
617
+ --max_source_length 512 \
618
+ --max_target_length 50 \
619
+ --generation_max_length 50 \
620
+ --add_task_name False \
621
+ --add_dataset_name False \
622
+ --overwrite_output_dir \
623
+ --overwrite_cache \
624
+ --lr_scheduler_type constant \
625
+ --warmup_steps 0 \
626
+ --logging_strategy steps \
627
+ --logging_steps 10 \
628
+ --metric_for_best_model eval_rougeL_for_task591_sciq_answer_generation \
629
+ --evaluation_strategy steps \
630
+ --save_strategy steps \
631
+ --save_total_limit 1 \
632
+ --load_best_model_at_end \
633
+ --lora_r 4 \
634
+ --lora_alpha 32 \
635
+ --lora_dropout 0.0 \
636
+ --add_instruction_replay \
637
+ --data_replay_freq -1 \
638
+ --replay_after_n_epoch 0 \
639
+ --kl_ratio 0.5 \
640
+ --attn_temperature 1 \
641
+ --mlp_hidden_dim 100 \
642
+ --model_name gainlora_inflora \
643
+ --threshold 0.995 \
644
+ --transthreshold 0.995
645
+
646
+
647
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
648
+ --do_train \
649
+ --do_predict \
650
+ --predict_with_generate \
651
+ --model_name_or_path $2 \
652
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/13-task591_sciq_answer_generation/saved_weights/trans_input.pt \
653
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/13-task591_sciq_answer_generation/saved_weights \
654
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/13-task591_sciq_answer_generation/saved_weights/prompts_keys_till_now.pt \
655
+ --data_dir CL_Benchmark \
656
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
657
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
658
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1687_sentiment140_classification \
659
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/14-task1687_sentiment140_classification \
660
+ --per_device_train_batch_size 16 \
661
+ --per_device_eval_batch_size 8 \
662
+ --gradient_accumulation_steps 2 \
663
+ --learning_rate 0.0003 \
664
+ --num_train_epochs 100 \
665
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
666
+ --max_source_length 512 \
667
+ --max_target_length 50 \
668
+ --generation_max_length 50 \
669
+ --add_task_name False \
670
+ --add_dataset_name False \
671
+ --overwrite_output_dir \
672
+ --overwrite_cache \
673
+ --lr_scheduler_type constant \
674
+ --warmup_steps 0 \
675
+ --logging_strategy steps \
676
+ --logging_steps 10 \
677
+ --metric_for_best_model eval_rougeL_for_task1687_sentiment140_classification \
678
+ --evaluation_strategy steps \
679
+ --save_strategy steps \
680
+ --save_total_limit 1 \
681
+ --load_best_model_at_end \
682
+ --lora_r 4 \
683
+ --lora_alpha 32 \
684
+ --lora_dropout 0.0 \
685
+ --add_instruction_replay \
686
+ --data_replay_freq -1 \
687
+ --replay_after_n_epoch 0 \
688
+ --kl_ratio 0.5 \
689
+ --attn_temperature 1 \
690
+ --mlp_hidden_dim 100 \
691
+ --model_name gainlora_inflora \
692
+ --threshold 0.995 \
693
+ --transthreshold 0.995
694
+
695
+
696
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
697
+ --do_train \
698
+ --do_predict \
699
+ --predict_with_generate \
700
+ --model_name_or_path $2 \
701
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/14-task1687_sentiment140_classification/saved_weights/trans_input.pt \
702
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/13-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/14-task1687_sentiment140_classification/saved_weights \
703
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/14-task1687_sentiment140_classification/saved_weights/prompts_keys_till_now.pt \
704
+ --data_dir CL_Benchmark \
705
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
706
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
707
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task875_emotion_classification \
708
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/15-task875_emotion_classification \
709
+ --per_device_train_batch_size 16 \
710
+ --per_device_eval_batch_size 8 \
711
+ --gradient_accumulation_steps 2 \
712
+ --learning_rate 0.0003 \
713
+ --num_train_epochs 100 \
714
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
715
+ --max_source_length 512 \
716
+ --max_target_length 50 \
717
+ --generation_max_length 50 \
718
+ --add_task_name False \
719
+ --add_dataset_name False \
720
+ --overwrite_output_dir \
721
+ --overwrite_cache \
722
+ --lr_scheduler_type constant \
723
+ --warmup_steps 0 \
724
+ --logging_strategy steps \
725
+ --logging_steps 10 \
726
+ --metric_for_best_model eval_rougeL_for_task875_emotion_classification \
727
+ --evaluation_strategy steps \
728
+ --save_strategy steps \
729
+ --save_total_limit 1 \
730
+ --load_best_model_at_end \
731
+ --lora_r 4 \
732
+ --lora_alpha 32 \
733
+ --lora_dropout 0.0 \
734
+ --add_instruction_replay \
735
+ --data_replay_freq -1 \
736
+ --replay_after_n_epoch 0 \
737
+ --kl_ratio 0.5 \
738
+ --attn_temperature 1 \
739
+ --mlp_hidden_dim 100 \
740
+ --model_name gainlora_inflora \
741
+ --threshold 0.995 \
742
+ --transthreshold 0.995
743
+
744
+ python score.py gen_script_superni_order1_t5_small_gainlora_inflora gen_script_superni_order1_t5_small_gainlora_inflora
improve_gainlora/T5_small/gen_script_superni_order1_t5_small_inflora.sh ADDED
@@ -0,0 +1,713 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
21
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1572_samsum_summary \
22
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary \
23
+ --per_device_train_batch_size 16 \
24
+ --per_device_eval_batch_size 8 \
25
+ --gradient_accumulation_steps 2 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 100 \
28
+ --run_name gen_script_superni_order1_t5_small_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_rougeL \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --lora_r 4 \
45
+ --lora_alpha 32 \
46
+ --lora_dropout 0.0 \
47
+ --load_best_model_at_end \
48
+ --data_replay_freq -1 \
49
+ --replay_after_n_epoch 0 \
50
+ --kl_ratio 0.5 \
51
+ --attn_temperature 1 \
52
+ --model_name inflora \
53
+ --threshold 0.995
54
+
55
+
56
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
57
+ --do_train \
58
+ --do_predict \
59
+ --predict_with_generate \
60
+ --model_name_or_path $2 \
61
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights/trans_input.pt \
62
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights \
63
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights/prompts_keys_till_now.pt \
64
+ --data_dir CL_Benchmark \
65
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
66
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
67
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task363_sst2_polarity_classification \
68
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification \
69
+ --per_device_train_batch_size 16 \
70
+ --per_device_eval_batch_size 8 \
71
+ --gradient_accumulation_steps 2 \
72
+ --learning_rate 0.0003 \
73
+ --num_train_epochs 100 \
74
+ --run_name gen_script_superni_order1_t5_small_inflora \
75
+ --max_source_length 512 \
76
+ --max_target_length 50 \
77
+ --generation_max_length 50 \
78
+ --add_task_name False \
79
+ --add_dataset_name False \
80
+ --overwrite_output_dir \
81
+ --overwrite_cache \
82
+ --lr_scheduler_type constant \
83
+ --warmup_steps 0 \
84
+ --logging_strategy steps \
85
+ --logging_steps 10 \
86
+ --metric_for_best_model eval_rougeL_for_task363_sst2_polarity_classification \
87
+ --evaluation_strategy steps \
88
+ --save_strategy steps \
89
+ --save_total_limit 1 \
90
+ --load_best_model_at_end \
91
+ --lora_r 4 \
92
+ --lora_alpha 32 \
93
+ --lora_dropout 0.0 \
94
+ --add_instruction_replay \
95
+ --data_replay_freq -1 \
96
+ --replay_after_n_epoch 0 \
97
+ --kl_ratio 0.5 \
98
+ --attn_temperature 1 \
99
+ --model_name inflora \
100
+ --threshold 0.995
101
+
102
+
103
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
104
+ --do_train \
105
+ --do_predict \
106
+ --predict_with_generate \
107
+ --model_name_or_path $2 \
108
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights/trans_input.pt \
109
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights \
110
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights/prompts_keys_till_now.pt \
111
+ --data_dir CL_Benchmark \
112
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
113
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
114
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1290_xsum_summarization \
115
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization \
116
+ --per_device_train_batch_size 16 \
117
+ --per_device_eval_batch_size 8 \
118
+ --gradient_accumulation_steps 2 \
119
+ --learning_rate 0.0003 \
120
+ --num_train_epochs 100 \
121
+ --run_name gen_script_superni_order1_t5_small_inflora \
122
+ --max_source_length 512 \
123
+ --max_target_length 50 \
124
+ --generation_max_length 50 \
125
+ --add_task_name False \
126
+ --add_dataset_name False \
127
+ --overwrite_output_dir \
128
+ --overwrite_cache \
129
+ --lr_scheduler_type constant \
130
+ --warmup_steps 0 \
131
+ --logging_strategy steps \
132
+ --logging_steps 10 \
133
+ --metric_for_best_model eval_rougeL_for_task1290_xsum_summarization \
134
+ --evaluation_strategy steps \
135
+ --save_strategy steps \
136
+ --save_total_limit 1 \
137
+ --load_best_model_at_end \
138
+ --lora_r 4 \
139
+ --lora_alpha 32 \
140
+ --lora_dropout 0.0 \
141
+ --add_instruction_replay \
142
+ --data_replay_freq -1 \
143
+ --replay_after_n_epoch 0 \
144
+ --kl_ratio 0.5 \
145
+ --attn_temperature 1 \
146
+ --model_name inflora \
147
+ --threshold 0.995
148
+
149
+
150
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
151
+ --do_train \
152
+ --do_predict \
153
+ --predict_with_generate \
154
+ --model_name_or_path $2 \
155
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights/trans_input.pt \
156
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights \
157
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights/prompts_keys_till_now.pt \
158
+ --data_dir CL_Benchmark \
159
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
160
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
161
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task181_outcome_extraction \
162
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction \
163
+ --per_device_train_batch_size 16 \
164
+ --per_device_eval_batch_size 8 \
165
+ --gradient_accumulation_steps 2 \
166
+ --learning_rate 0.0003 \
167
+ --num_train_epochs 100 \
168
+ --run_name gen_script_superni_order1_t5_small_inflora \
169
+ --max_source_length 512 \
170
+ --max_target_length 50 \
171
+ --generation_max_length 50 \
172
+ --add_task_name False \
173
+ --add_dataset_name False \
174
+ --overwrite_output_dir \
175
+ --overwrite_cache \
176
+ --lr_scheduler_type constant \
177
+ --warmup_steps 0 \
178
+ --logging_strategy steps \
179
+ --logging_steps 10 \
180
+ --metric_for_best_model eval_rougeL_for_task181_outcome_extraction \
181
+ --evaluation_strategy steps \
182
+ --save_strategy steps \
183
+ --save_total_limit 1 \
184
+ --load_best_model_at_end \
185
+ --lora_r 4 \
186
+ --lora_alpha 32 \
187
+ --lora_dropout 0.0 \
188
+ --add_instruction_replay \
189
+ --data_replay_freq -1 \
190
+ --replay_after_n_epoch 0 \
191
+ --kl_ratio 0.5 \
192
+ --attn_temperature 1 \
193
+ --model_name inflora \
194
+ --threshold 0.995
195
+
196
+
197
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
198
+ --do_train \
199
+ --do_predict \
200
+ --predict_with_generate \
201
+ --model_name_or_path $2 \
202
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights/trans_input.pt \
203
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights \
204
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights/prompts_keys_till_now.pt \
205
+ --data_dir CL_Benchmark \
206
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
207
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
208
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task002_quoref_answer_generation \
209
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation \
210
+ --per_device_train_batch_size 16 \
211
+ --per_device_eval_batch_size 8 \
212
+ --gradient_accumulation_steps 2 \
213
+ --learning_rate 0.0003 \
214
+ --num_train_epochs 100 \
215
+ --run_name gen_script_superni_order1_t5_small_inflora \
216
+ --max_source_length 512 \
217
+ --max_target_length 50 \
218
+ --generation_max_length 50 \
219
+ --add_task_name False \
220
+ --add_dataset_name False \
221
+ --overwrite_output_dir \
222
+ --overwrite_cache \
223
+ --lr_scheduler_type constant \
224
+ --warmup_steps 0 \
225
+ --logging_strategy steps \
226
+ --logging_steps 10 \
227
+ --metric_for_best_model eval_rougeL_for_task002_quoref_answer_generation \
228
+ --evaluation_strategy steps \
229
+ --save_strategy steps \
230
+ --save_total_limit 1 \
231
+ --load_best_model_at_end \
232
+ --lora_r 4 \
233
+ --lora_alpha 32 \
234
+ --lora_dropout 0.0 \
235
+ --add_instruction_replay \
236
+ --data_replay_freq -1 \
237
+ --replay_after_n_epoch 0 \
238
+ --kl_ratio 0.5 \
239
+ --attn_temperature 1 \
240
+ --model_name inflora \
241
+ --threshold 0.995
242
+
243
+
244
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
245
+ --do_train \
246
+ --do_predict \
247
+ --predict_with_generate \
248
+ --model_name_or_path $2 \
249
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights/trans_input.pt \
250
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights \
251
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights/prompts_keys_till_now.pt \
252
+ --data_dir CL_Benchmark \
253
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
254
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
255
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1510_evalution_relation_extraction \
256
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction \
257
+ --per_device_train_batch_size 16 \
258
+ --per_device_eval_batch_size 8 \
259
+ --gradient_accumulation_steps 2 \
260
+ --learning_rate 0.0003 \
261
+ --num_train_epochs 100 \
262
+ --run_name gen_script_superni_order1_t5_small_inflora \
263
+ --max_source_length 512 \
264
+ --max_target_length 50 \
265
+ --generation_max_length 50 \
266
+ --add_task_name False \
267
+ --add_dataset_name False \
268
+ --overwrite_output_dir \
269
+ --overwrite_cache \
270
+ --lr_scheduler_type constant \
271
+ --warmup_steps 0 \
272
+ --logging_strategy steps \
273
+ --logging_steps 10 \
274
+ --metric_for_best_model eval_rougeL_for_task1510_evalution_relation_extraction \
275
+ --evaluation_strategy steps \
276
+ --save_strategy steps \
277
+ --save_total_limit 1 \
278
+ --load_best_model_at_end \
279
+ --lora_r 4 \
280
+ --lora_alpha 32 \
281
+ --lora_dropout 0.0 \
282
+ --add_instruction_replay \
283
+ --data_replay_freq -1 \
284
+ --replay_after_n_epoch 0 \
285
+ --kl_ratio 0.5 \
286
+ --attn_temperature 1 \
287
+ --model_name inflora \
288
+ --threshold 0.995
289
+
290
+
291
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
292
+ --do_train \
293
+ --do_predict \
294
+ --predict_with_generate \
295
+ --model_name_or_path $2 \
296
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights/trans_input.pt \
297
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights \
298
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights/prompts_keys_till_now.pt \
299
+ --data_dir CL_Benchmark \
300
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
301
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
302
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task639_multi_woz_user_utterance_generation \
303
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation \
304
+ --per_device_train_batch_size 16 \
305
+ --per_device_eval_batch_size 8 \
306
+ --gradient_accumulation_steps 2 \
307
+ --learning_rate 0.0003 \
308
+ --num_train_epochs 100 \
309
+ --run_name gen_script_superni_order1_t5_small_inflora \
310
+ --max_source_length 512 \
311
+ --max_target_length 50 \
312
+ --generation_max_length 50 \
313
+ --add_task_name False \
314
+ --add_dataset_name False \
315
+ --overwrite_output_dir \
316
+ --overwrite_cache \
317
+ --lr_scheduler_type constant \
318
+ --warmup_steps 0 \
319
+ --logging_strategy steps \
320
+ --logging_steps 10 \
321
+ --metric_for_best_model eval_rougeL_for_task639_multi_woz_user_utterance_generation \
322
+ --evaluation_strategy steps \
323
+ --save_strategy steps \
324
+ --save_total_limit 1 \
325
+ --load_best_model_at_end \
326
+ --lora_r 4 \
327
+ --lora_alpha 32 \
328
+ --lora_dropout 0.0 \
329
+ --add_instruction_replay \
330
+ --data_replay_freq -1 \
331
+ --replay_after_n_epoch 0 \
332
+ --kl_ratio 0.5 \
333
+ --attn_temperature 1 \
334
+ --model_name inflora \
335
+ --threshold 0.995
336
+
337
+
338
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
339
+ --do_train \
340
+ --do_predict \
341
+ --predict_with_generate \
342
+ --model_name_or_path $2 \
343
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights/trans_input.pt \
344
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights \
345
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights/prompts_keys_till_now.pt \
346
+ --data_dir CL_Benchmark \
347
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
348
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
349
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1729_personachat_generate_next \
350
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next \
351
+ --per_device_train_batch_size 16 \
352
+ --per_device_eval_batch_size 8 \
353
+ --gradient_accumulation_steps 2 \
354
+ --learning_rate 0.0003 \
355
+ --num_train_epochs 100 \
356
+ --run_name gen_script_superni_order1_t5_small_inflora \
357
+ --max_source_length 512 \
358
+ --max_target_length 50 \
359
+ --generation_max_length 50 \
360
+ --add_task_name False \
361
+ --add_dataset_name False \
362
+ --overwrite_output_dir \
363
+ --overwrite_cache \
364
+ --lr_scheduler_type constant \
365
+ --warmup_steps 0 \
366
+ --logging_strategy steps \
367
+ --logging_steps 10 \
368
+ --metric_for_best_model eval_rougeL_for_task1729_personachat_generate_next \
369
+ --evaluation_strategy steps \
370
+ --save_strategy steps \
371
+ --save_total_limit 1 \
372
+ --load_best_model_at_end \
373
+ --lora_r 4 \
374
+ --lora_alpha 32 \
375
+ --lora_dropout 0.0 \
376
+ --add_instruction_replay \
377
+ --data_replay_freq -1 \
378
+ --replay_after_n_epoch 0 \
379
+ --kl_ratio 0.5 \
380
+ --attn_temperature 1 \
381
+ --model_name inflora \
382
+ --threshold 0.995
383
+
384
+
385
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
386
+ --do_train \
387
+ --do_predict \
388
+ --predict_with_generate \
389
+ --model_name_or_path $2 \
390
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights/trans_input.pt \
391
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights \
392
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights/prompts_keys_till_now.pt \
393
+ --data_dir CL_Benchmark \
394
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
395
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
396
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task073_commonsenseqa_answer_generation \
397
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation \
398
+ --per_device_train_batch_size 16 \
399
+ --per_device_eval_batch_size 8 \
400
+ --gradient_accumulation_steps 2 \
401
+ --learning_rate 0.0003 \
402
+ --num_train_epochs 100 \
403
+ --run_name gen_script_superni_order1_t5_small_inflora \
404
+ --max_source_length 512 \
405
+ --max_target_length 50 \
406
+ --generation_max_length 50 \
407
+ --add_task_name False \
408
+ --add_dataset_name False \
409
+ --overwrite_output_dir \
410
+ --overwrite_cache \
411
+ --lr_scheduler_type constant \
412
+ --warmup_steps 0 \
413
+ --logging_strategy steps \
414
+ --logging_steps 10 \
415
+ --metric_for_best_model eval_rougeL_for_task073_commonsenseqa_answer_generation \
416
+ --evaluation_strategy steps \
417
+ --save_strategy steps \
418
+ --save_total_limit 1 \
419
+ --load_best_model_at_end \
420
+ --lora_r 4 \
421
+ --lora_alpha 32 \
422
+ --lora_dropout 0.0 \
423
+ --add_instruction_replay \
424
+ --data_replay_freq -1 \
425
+ --replay_after_n_epoch 0 \
426
+ --kl_ratio 0.5 \
427
+ --attn_temperature 1 \
428
+ --model_name inflora \
429
+ --threshold 0.995
430
+
431
+
432
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
433
+ --do_train \
434
+ --do_predict \
435
+ --predict_with_generate \
436
+ --model_name_or_path $2 \
437
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights/trans_input.pt \
438
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights \
439
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights/prompts_keys_till_now.pt \
440
+ --data_dir CL_Benchmark \
441
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
442
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
443
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1590_diplomacy_text_generation \
444
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation \
445
+ --per_device_train_batch_size 16 \
446
+ --per_device_eval_batch_size 8 \
447
+ --gradient_accumulation_steps 2 \
448
+ --learning_rate 0.0003 \
449
+ --num_train_epochs 100 \
450
+ --run_name gen_script_superni_order1_t5_small_inflora \
451
+ --max_source_length 512 \
452
+ --max_target_length 50 \
453
+ --generation_max_length 50 \
454
+ --add_task_name False \
455
+ --add_dataset_name False \
456
+ --overwrite_output_dir \
457
+ --overwrite_cache \
458
+ --lr_scheduler_type constant \
459
+ --warmup_steps 0 \
460
+ --logging_strategy steps \
461
+ --logging_steps 10 \
462
+ --metric_for_best_model eval_rougeL_for_task1590_diplomacy_text_generation \
463
+ --evaluation_strategy steps \
464
+ --save_strategy steps \
465
+ --save_total_limit 1 \
466
+ --load_best_model_at_end \
467
+ --lora_r 4 \
468
+ --lora_alpha 32 \
469
+ --lora_dropout 0.0 \
470
+ --add_instruction_replay \
471
+ --data_replay_freq -1 \
472
+ --replay_after_n_epoch 0 \
473
+ --kl_ratio 0.5 \
474
+ --attn_temperature 1 \
475
+ --model_name inflora \
476
+ --threshold 0.995
477
+
478
+
479
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
480
+ --do_train \
481
+ --do_predict \
482
+ --predict_with_generate \
483
+ --model_name_or_path $2 \
484
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights/trans_input.pt \
485
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights \
486
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights/prompts_keys_till_now.pt \
487
+ --data_dir CL_Benchmark \
488
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
489
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
490
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task748_glucose_reverse_cause_event_detection \
491
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection \
492
+ --per_device_train_batch_size 16 \
493
+ --per_device_eval_batch_size 8 \
494
+ --gradient_accumulation_steps 2 \
495
+ --learning_rate 0.0003 \
496
+ --num_train_epochs 100 \
497
+ --run_name gen_script_superni_order1_t5_small_inflora \
498
+ --max_source_length 512 \
499
+ --max_target_length 50 \
500
+ --generation_max_length 50 \
501
+ --add_task_name False \
502
+ --add_dataset_name False \
503
+ --overwrite_output_dir \
504
+ --overwrite_cache \
505
+ --lr_scheduler_type constant \
506
+ --warmup_steps 0 \
507
+ --logging_strategy steps \
508
+ --logging_steps 10 \
509
+ --metric_for_best_model eval_rougeL_for_task748_glucose_reverse_cause_event_detection \
510
+ --evaluation_strategy steps \
511
+ --save_strategy steps \
512
+ --save_total_limit 1 \
513
+ --load_best_model_at_end \
514
+ --lora_r 4 \
515
+ --lora_alpha 32 \
516
+ --lora_dropout 0.0 \
517
+ --add_instruction_replay \
518
+ --data_replay_freq -1 \
519
+ --replay_after_n_epoch 0 \
520
+ --kl_ratio 0.5 \
521
+ --attn_temperature 1 \
522
+ --model_name inflora \
523
+ --threshold 0.995
524
+
525
+
526
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
527
+ --do_train \
528
+ --do_predict \
529
+ --predict_with_generate \
530
+ --model_name_or_path $2 \
531
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights/trans_input.pt \
532
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights \
533
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights/prompts_keys_till_now.pt \
534
+ --data_dir CL_Benchmark \
535
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
536
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
537
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task511_reddit_tifu_long_text_summarization \
538
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization \
539
+ --per_device_train_batch_size 16 \
540
+ --per_device_eval_batch_size 8 \
541
+ --gradient_accumulation_steps 2 \
542
+ --learning_rate 0.0003 \
543
+ --num_train_epochs 100 \
544
+ --run_name gen_script_superni_order1_t5_small_inflora \
545
+ --max_source_length 512 \
546
+ --max_target_length 50 \
547
+ --generation_max_length 50 \
548
+ --add_task_name False \
549
+ --add_dataset_name False \
550
+ --overwrite_output_dir \
551
+ --overwrite_cache \
552
+ --lr_scheduler_type constant \
553
+ --warmup_steps 0 \
554
+ --logging_strategy steps \
555
+ --logging_steps 10 \
556
+ --metric_for_best_model eval_rougeL_for_task511_reddit_tifu_long_text_summarization \
557
+ --evaluation_strategy steps \
558
+ --save_strategy steps \
559
+ --save_total_limit 1 \
560
+ --load_best_model_at_end \
561
+ --lora_r 4 \
562
+ --lora_alpha 32 \
563
+ --lora_dropout 0.0 \
564
+ --add_instruction_replay \
565
+ --data_replay_freq -1 \
566
+ --replay_after_n_epoch 0 \
567
+ --kl_ratio 0.5 \
568
+ --attn_temperature 1 \
569
+ --model_name inflora \
570
+ --threshold 0.995
571
+
572
+
573
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
574
+ --do_train \
575
+ --do_predict \
576
+ --predict_with_generate \
577
+ --model_name_or_path $2 \
578
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/trans_input.pt \
579
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights \
580
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/prompts_keys_till_now.pt \
581
+ --data_dir CL_Benchmark \
582
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
583
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
584
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task591_sciq_answer_generation \
585
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/13-task591_sciq_answer_generation \
586
+ --per_device_train_batch_size 16 \
587
+ --per_device_eval_batch_size 8 \
588
+ --gradient_accumulation_steps 2 \
589
+ --learning_rate 0.0003 \
590
+ --num_train_epochs 100 \
591
+ --run_name gen_script_superni_order1_t5_small_inflora \
592
+ --max_source_length 512 \
593
+ --max_target_length 50 \
594
+ --generation_max_length 50 \
595
+ --add_task_name False \
596
+ --add_dataset_name False \
597
+ --overwrite_output_dir \
598
+ --overwrite_cache \
599
+ --lr_scheduler_type constant \
600
+ --warmup_steps 0 \
601
+ --logging_strategy steps \
602
+ --logging_steps 10 \
603
+ --metric_for_best_model eval_rougeL_for_task591_sciq_answer_generation \
604
+ --evaluation_strategy steps \
605
+ --save_strategy steps \
606
+ --save_total_limit 1 \
607
+ --load_best_model_at_end \
608
+ --lora_r 4 \
609
+ --lora_alpha 32 \
610
+ --lora_dropout 0.0 \
611
+ --add_instruction_replay \
612
+ --data_replay_freq -1 \
613
+ --replay_after_n_epoch 0 \
614
+ --kl_ratio 0.5 \
615
+ --attn_temperature 1 \
616
+ --model_name inflora \
617
+ --threshold 0.995
618
+
619
+
620
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
621
+ --do_train \
622
+ --do_predict \
623
+ --predict_with_generate \
624
+ --model_name_or_path $2 \
625
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/13-task591_sciq_answer_generation/saved_weights/trans_input.pt \
626
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/13-task591_sciq_answer_generation/saved_weights \
627
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/13-task591_sciq_answer_generation/saved_weights/prompts_keys_till_now.pt \
628
+ --data_dir CL_Benchmark \
629
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
630
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
631
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1687_sentiment140_classification \
632
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/14-task1687_sentiment140_classification \
633
+ --per_device_train_batch_size 16 \
634
+ --per_device_eval_batch_size 8 \
635
+ --gradient_accumulation_steps 2 \
636
+ --learning_rate 0.0003 \
637
+ --num_train_epochs 100 \
638
+ --run_name gen_script_superni_order1_t5_small_inflora \
639
+ --max_source_length 512 \
640
+ --max_target_length 50 \
641
+ --generation_max_length 50 \
642
+ --add_task_name False \
643
+ --add_dataset_name False \
644
+ --overwrite_output_dir \
645
+ --overwrite_cache \
646
+ --lr_scheduler_type constant \
647
+ --warmup_steps 0 \
648
+ --logging_strategy steps \
649
+ --logging_steps 10 \
650
+ --metric_for_best_model eval_rougeL_for_task1687_sentiment140_classification \
651
+ --evaluation_strategy steps \
652
+ --save_strategy steps \
653
+ --save_total_limit 1 \
654
+ --load_best_model_at_end \
655
+ --lora_r 4 \
656
+ --lora_alpha 32 \
657
+ --lora_dropout 0.0 \
658
+ --add_instruction_replay \
659
+ --data_replay_freq -1 \
660
+ --replay_after_n_epoch 0 \
661
+ --kl_ratio 0.5 \
662
+ --attn_temperature 1 \
663
+ --model_name inflora \
664
+ --threshold 0.995
665
+
666
+
667
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
668
+ --do_train \
669
+ --do_predict \
670
+ --predict_with_generate \
671
+ --model_name_or_path $2 \
672
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/14-task1687_sentiment140_classification/saved_weights/trans_input.pt \
673
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/13-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/14-task1687_sentiment140_classification/saved_weights \
674
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/14-task1687_sentiment140_classification/saved_weights/prompts_keys_till_now.pt \
675
+ --data_dir CL_Benchmark \
676
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
677
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
678
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task875_emotion_classification \
679
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/15-task875_emotion_classification \
680
+ --per_device_train_batch_size 16 \
681
+ --per_device_eval_batch_size 8 \
682
+ --gradient_accumulation_steps 2 \
683
+ --learning_rate 0.0003 \
684
+ --num_train_epochs 100 \
685
+ --run_name gen_script_superni_order1_t5_small_inflora \
686
+ --max_source_length 512 \
687
+ --max_target_length 50 \
688
+ --generation_max_length 50 \
689
+ --add_task_name False \
690
+ --add_dataset_name False \
691
+ --overwrite_output_dir \
692
+ --overwrite_cache \
693
+ --lr_scheduler_type constant \
694
+ --warmup_steps 0 \
695
+ --logging_strategy steps \
696
+ --logging_steps 10 \
697
+ --metric_for_best_model eval_rougeL_for_task875_emotion_classification \
698
+ --evaluation_strategy steps \
699
+ --save_strategy steps \
700
+ --save_total_limit 1 \
701
+ --load_best_model_at_end \
702
+ --lora_r 4 \
703
+ --lora_alpha 32 \
704
+ --lora_dropout 0.0 \
705
+ --add_instruction_replay \
706
+ --data_replay_freq -1 \
707
+ --replay_after_n_epoch 0 \
708
+ --kl_ratio 0.5 \
709
+ --attn_temperature 1 \
710
+ --model_name inflora \
711
+ --threshold 0.995
712
+
713
+ python score.py gen_script_superni_order1_t5_small_inflora gen_script_superni_order1_t5_small_inflora
improve_gainlora/T5_small/gen_script_superni_order1_t5_small_specroute.sh ADDED
@@ -0,0 +1,821 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:2
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ # ============================================================
15
+ # Auto-detect GPU count and type for optimal parallelism
16
+ # ============================================================
17
+ NUM_GPUS=$(nvidia-smi -L 2>/dev/null | wc -l)
18
+ GPU_MEM=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits 2>/dev/null | head -1)
19
+
20
+ if [ -z "$GPU_MEM" ]; then
21
+ echo "ERROR: No GPU detected!"
22
+ exit 1
23
+ fi
24
+
25
+ # Determine GPU type
26
+ if [ "$GPU_MEM" -lt 20000 ]; then
27
+ IS_T4=1
28
+ echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
29
+ else
30
+ IS_T4=0
31
+ echo "[GPU] Detected high-memory GPUs (${GPU_MEM}MB VRAM each)"
32
+ fi
33
+
34
+ # Determine parallelism strategy
35
+ # NOTE: T5 models trained in bfloat16 produce NaN with fp16 (overflow).
36
+ # T4 GPUs do not support bf16. Use fp32 + gradient_checkpointing instead.
37
+ if [ "$IS_T4" -eq 1 ] && [ "$NUM_GPUS" -ge 2 ]; then
38
+ GPU_MODE="t4_2gpu"
39
+ GPU_IDS="0,1"
40
+ FP16_FLAG=""
41
+ echo "[GPU] Strategy: 2x T4 DataParallel + fp32 + gradient_checkpointing"
42
+ elif [ "$IS_T4" -eq 1 ]; then
43
+ GPU_MODE="t4_1gpu"
44
+ GPU_IDS="${1:-0}"
45
+ FP16_FLAG=""
46
+ echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
47
+ else
48
+ GPU_MODE="a100"
49
+ GPU_IDS="${1:-0}"
50
+ FP16_FLAG=""
51
+ echo "[GPU] Strategy: A100 (single GPU, fp32)"
52
+ fi
53
+
54
+ echo "[GPU] Using CUDA_VISIBLE_DEVICES=$GPU_IDS"
55
+ echo "============================================================"
56
+ echo ""
57
+
58
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
59
+ BSZ=8; GA=2; EVAL_BSZ=16
60
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
61
+ BSZ=16; GA=2; EVAL_BSZ=16
62
+ else
63
+ BSZ=32; GA=1; EVAL_BSZ=32
64
+ fi
65
+
66
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
67
+ --do_train \
68
+ --do_predict \
69
+ --predict_with_generate \
70
+ --model_name_or_path $2 \
71
+ --data_dir CL_Benchmark \
72
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
73
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1572_samsum_summary \
74
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary \
75
+ --per_device_train_batch_size $BSZ \
76
+ --per_device_eval_batch_size $EVAL_BSZ \
77
+ --gradient_accumulation_steps $GA \
78
+ --learning_rate 0.0003 \
79
+ --num_train_epochs 100 \
80
+ --run_name gen_script_superni_order1_t5_small_specroute \
81
+ --max_source_length 512 \
82
+ --max_target_length 50 \
83
+ --generation_max_length 50 \
84
+ --add_task_name False \
85
+ --add_dataset_name False \
86
+ --overwrite_output_dir \
87
+ --overwrite_cache \
88
+ --lr_scheduler_type constant \
89
+ --warmup_steps 0 \
90
+ --logging_strategy steps \
91
+ --logging_steps 10 \
92
+ --metric_for_best_model eval_rougeL \
93
+ --evaluation_strategy steps \
94
+ --save_strategy steps \
95
+ --save_safetensors false \
96
+ --save_total_limit 1 \
97
+ --load_best_model_at_end \
98
+ --lora_r 4 \
99
+ --lora_alpha 32 \
100
+ --lora_dropout 0.0 \
101
+ --run_single True \
102
+ --data_replay_freq -1 \
103
+ --mlp_hidden_dim 100 \
104
+ --model_name specroute \
105
+ --threshold 0.995 \
106
+ --transthreshold 0.995 \
107
+ $FP16_FLAG
108
+
109
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
110
+ BSZ=8; GA=2; EVAL_BSZ=16
111
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
112
+ BSZ=16; GA=2; EVAL_BSZ=16
113
+ else
114
+ BSZ=32; GA=1; EVAL_BSZ=32
115
+ fi
116
+
117
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
118
+ --do_train \
119
+ --do_predict \
120
+ --predict_with_generate \
121
+ --model_name_or_path $2 \
122
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights \
123
+ --data_dir CL_Benchmark \
124
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
125
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task363_sst2_polarity_classification \
126
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification \
127
+ --per_device_train_batch_size $BSZ \
128
+ --per_device_eval_batch_size $EVAL_BSZ \
129
+ --gradient_accumulation_steps $GA \
130
+ --learning_rate 0.0003 \
131
+ --num_train_epochs 100 \
132
+ --run_name gen_script_superni_order1_t5_small_specroute \
133
+ --max_source_length 512 \
134
+ --max_target_length 50 \
135
+ --generation_max_length 50 \
136
+ --add_task_name False \
137
+ --add_dataset_name False \
138
+ --overwrite_output_dir \
139
+ --overwrite_cache \
140
+ --lr_scheduler_type constant \
141
+ --warmup_steps 0 \
142
+ --logging_strategy steps \
143
+ --logging_steps 10 \
144
+ --metric_for_best_model eval_rougeL_for_task363_sst2_polarity_classification \
145
+ --evaluation_strategy steps \
146
+ --save_strategy steps \
147
+ --save_safetensors false \
148
+ --save_total_limit 1 \
149
+ --load_best_model_at_end \
150
+ --lora_r 4 \
151
+ --lora_alpha 32 \
152
+ --lora_dropout 0.0 \
153
+ --data_replay_freq -1 \
154
+ --mlp_hidden_dim 100 \
155
+ --model_name specroute \
156
+ --threshold 0.995 \
157
+ --transthreshold 0.995 \
158
+ $FP16_FLAG
159
+
160
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
161
+ BSZ=8; GA=2; EVAL_BSZ=16
162
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
163
+ BSZ=16; GA=2; EVAL_BSZ=16
164
+ else
165
+ BSZ=32; GA=1; EVAL_BSZ=32
166
+ fi
167
+
168
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
169
+ --do_train \
170
+ --do_predict \
171
+ --predict_with_generate \
172
+ --model_name_or_path $2 \
173
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights \
174
+ --data_dir CL_Benchmark \
175
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
176
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1290_xsum_summarization \
177
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization \
178
+ --per_device_train_batch_size $BSZ \
179
+ --per_device_eval_batch_size $EVAL_BSZ \
180
+ --gradient_accumulation_steps $GA \
181
+ --learning_rate 0.0003 \
182
+ --num_train_epochs 100 \
183
+ --run_name gen_script_superni_order1_t5_small_specroute \
184
+ --max_source_length 512 \
185
+ --max_target_length 50 \
186
+ --generation_max_length 50 \
187
+ --add_task_name False \
188
+ --add_dataset_name False \
189
+ --overwrite_output_dir \
190
+ --overwrite_cache \
191
+ --lr_scheduler_type constant \
192
+ --warmup_steps 0 \
193
+ --logging_strategy steps \
194
+ --logging_steps 10 \
195
+ --metric_for_best_model eval_rougeL_for_task1290_xsum_summarization \
196
+ --evaluation_strategy steps \
197
+ --save_strategy steps \
198
+ --save_safetensors false \
199
+ --save_total_limit 1 \
200
+ --load_best_model_at_end \
201
+ --lora_r 4 \
202
+ --lora_alpha 32 \
203
+ --lora_dropout 0.0 \
204
+ --data_replay_freq -1 \
205
+ --mlp_hidden_dim 100 \
206
+ --model_name specroute \
207
+ --threshold 0.995 \
208
+ --transthreshold 0.995 \
209
+ $FP16_FLAG
210
+
211
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
212
+ BSZ=8; GA=2; EVAL_BSZ=16
213
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
214
+ BSZ=16; GA=2; EVAL_BSZ=16
215
+ else
216
+ BSZ=32; GA=1; EVAL_BSZ=32
217
+ fi
218
+
219
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
220
+ --do_train \
221
+ --do_predict \
222
+ --predict_with_generate \
223
+ --model_name_or_path $2 \
224
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights \
225
+ --data_dir CL_Benchmark \
226
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
227
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task181_outcome_extraction \
228
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction \
229
+ --per_device_train_batch_size $BSZ \
230
+ --per_device_eval_batch_size $EVAL_BSZ \
231
+ --gradient_accumulation_steps $GA \
232
+ --learning_rate 0.0003 \
233
+ --num_train_epochs 100 \
234
+ --run_name gen_script_superni_order1_t5_small_specroute \
235
+ --max_source_length 512 \
236
+ --max_target_length 50 \
237
+ --generation_max_length 50 \
238
+ --add_task_name False \
239
+ --add_dataset_name False \
240
+ --overwrite_output_dir \
241
+ --overwrite_cache \
242
+ --lr_scheduler_type constant \
243
+ --warmup_steps 0 \
244
+ --logging_strategy steps \
245
+ --logging_steps 10 \
246
+ --metric_for_best_model eval_rougeL_for_task181_outcome_extraction \
247
+ --evaluation_strategy steps \
248
+ --save_strategy steps \
249
+ --save_safetensors false \
250
+ --save_total_limit 1 \
251
+ --load_best_model_at_end \
252
+ --lora_r 4 \
253
+ --lora_alpha 32 \
254
+ --lora_dropout 0.0 \
255
+ --data_replay_freq -1 \
256
+ --mlp_hidden_dim 100 \
257
+ --model_name specroute \
258
+ --threshold 0.995 \
259
+ --transthreshold 0.995 \
260
+ $FP16_FLAG
261
+
262
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
263
+ BSZ=8; GA=2; EVAL_BSZ=16
264
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
265
+ BSZ=16; GA=2; EVAL_BSZ=16
266
+ else
267
+ BSZ=32; GA=1; EVAL_BSZ=32
268
+ fi
269
+
270
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
271
+ --do_train \
272
+ --do_predict \
273
+ --predict_with_generate \
274
+ --model_name_or_path $2 \
275
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction/saved_weights \
276
+ --data_dir CL_Benchmark \
277
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
278
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task002_quoref_answer_generation \
279
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/5-task002_quoref_answer_generation \
280
+ --per_device_train_batch_size $BSZ \
281
+ --per_device_eval_batch_size $EVAL_BSZ \
282
+ --gradient_accumulation_steps $GA \
283
+ --learning_rate 0.0003 \
284
+ --num_train_epochs 100 \
285
+ --run_name gen_script_superni_order1_t5_small_specroute \
286
+ --max_source_length 512 \
287
+ --max_target_length 50 \
288
+ --generation_max_length 50 \
289
+ --add_task_name False \
290
+ --add_dataset_name False \
291
+ --overwrite_output_dir \
292
+ --overwrite_cache \
293
+ --lr_scheduler_type constant \
294
+ --warmup_steps 0 \
295
+ --logging_strategy steps \
296
+ --logging_steps 10 \
297
+ --metric_for_best_model eval_rougeL_for_task002_quoref_answer_generation \
298
+ --evaluation_strategy steps \
299
+ --save_strategy steps \
300
+ --save_safetensors false \
301
+ --save_total_limit 1 \
302
+ --load_best_model_at_end \
303
+ --lora_r 4 \
304
+ --lora_alpha 32 \
305
+ --lora_dropout 0.0 \
306
+ --data_replay_freq -1 \
307
+ --mlp_hidden_dim 100 \
308
+ --model_name specroute \
309
+ --threshold 0.995 \
310
+ --transthreshold 0.995 \
311
+ $FP16_FLAG
312
+
313
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
314
+ BSZ=8; GA=2; EVAL_BSZ=16
315
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
316
+ BSZ=16; GA=2; EVAL_BSZ=16
317
+ else
318
+ BSZ=32; GA=1; EVAL_BSZ=32
319
+ fi
320
+
321
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
322
+ --do_train \
323
+ --do_predict \
324
+ --predict_with_generate \
325
+ --model_name_or_path $2 \
326
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/5-task002_quoref_answer_generation/saved_weights \
327
+ --data_dir CL_Benchmark \
328
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
329
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1510_evalution_relation_extraction \
330
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/6-task1510_evalution_relation_extraction \
331
+ --per_device_train_batch_size $BSZ \
332
+ --per_device_eval_batch_size $EVAL_BSZ \
333
+ --gradient_accumulation_steps $GA \
334
+ --learning_rate 0.0003 \
335
+ --num_train_epochs 100 \
336
+ --run_name gen_script_superni_order1_t5_small_specroute \
337
+ --max_source_length 512 \
338
+ --max_target_length 50 \
339
+ --generation_max_length 50 \
340
+ --add_task_name False \
341
+ --add_dataset_name False \
342
+ --overwrite_output_dir \
343
+ --overwrite_cache \
344
+ --lr_scheduler_type constant \
345
+ --warmup_steps 0 \
346
+ --logging_strategy steps \
347
+ --logging_steps 10 \
348
+ --metric_for_best_model eval_rougeL_for_task1510_evalution_relation_extraction \
349
+ --evaluation_strategy steps \
350
+ --save_strategy steps \
351
+ --save_safetensors false \
352
+ --save_total_limit 1 \
353
+ --load_best_model_at_end \
354
+ --lora_r 4 \
355
+ --lora_alpha 32 \
356
+ --lora_dropout 0.0 \
357
+ --data_replay_freq -1 \
358
+ --mlp_hidden_dim 100 \
359
+ --model_name specroute \
360
+ --threshold 0.995 \
361
+ --transthreshold 0.995 \
362
+ $FP16_FLAG
363
+
364
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
365
+ BSZ=8; GA=2; EVAL_BSZ=16
366
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
367
+ BSZ=16; GA=2; EVAL_BSZ=16
368
+ else
369
+ BSZ=32; GA=1; EVAL_BSZ=32
370
+ fi
371
+
372
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
373
+ --do_train \
374
+ --do_predict \
375
+ --predict_with_generate \
376
+ --model_name_or_path $2 \
377
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/6-task1510_evalution_relation_extraction/saved_weights \
378
+ --data_dir CL_Benchmark \
379
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
380
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task639_multi_woz_user_utterance_generation \
381
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/7-task639_multi_woz_user_utterance_generation \
382
+ --per_device_train_batch_size $BSZ \
383
+ --per_device_eval_batch_size $EVAL_BSZ \
384
+ --gradient_accumulation_steps $GA \
385
+ --learning_rate 0.0003 \
386
+ --num_train_epochs 100 \
387
+ --run_name gen_script_superni_order1_t5_small_specroute \
388
+ --max_source_length 512 \
389
+ --max_target_length 50 \
390
+ --generation_max_length 50 \
391
+ --add_task_name False \
392
+ --add_dataset_name False \
393
+ --overwrite_output_dir \
394
+ --overwrite_cache \
395
+ --lr_scheduler_type constant \
396
+ --warmup_steps 0 \
397
+ --logging_strategy steps \
398
+ --logging_steps 10 \
399
+ --metric_for_best_model eval_rougeL_for_task639_multi_woz_user_utterance_generation \
400
+ --evaluation_strategy steps \
401
+ --save_strategy steps \
402
+ --save_safetensors false \
403
+ --save_total_limit 1 \
404
+ --load_best_model_at_end \
405
+ --lora_r 4 \
406
+ --lora_alpha 32 \
407
+ --lora_dropout 0.0 \
408
+ --data_replay_freq -1 \
409
+ --mlp_hidden_dim 100 \
410
+ --model_name specroute \
411
+ --threshold 0.995 \
412
+ --transthreshold 0.995 \
413
+ $FP16_FLAG
414
+
415
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
416
+ BSZ=8; GA=2; EVAL_BSZ=16
417
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
418
+ BSZ=16; GA=2; EVAL_BSZ=16
419
+ else
420
+ BSZ=32; GA=1; EVAL_BSZ=32
421
+ fi
422
+
423
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
424
+ --do_train \
425
+ --do_predict \
426
+ --predict_with_generate \
427
+ --model_name_or_path $2 \
428
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights \
429
+ --data_dir CL_Benchmark \
430
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
431
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1729_personachat_generate_next \
432
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/8-task1729_personachat_generate_next \
433
+ --per_device_train_batch_size $BSZ \
434
+ --per_device_eval_batch_size $EVAL_BSZ \
435
+ --gradient_accumulation_steps $GA \
436
+ --learning_rate 0.0003 \
437
+ --num_train_epochs 100 \
438
+ --run_name gen_script_superni_order1_t5_small_specroute \
439
+ --max_source_length 512 \
440
+ --max_target_length 50 \
441
+ --generation_max_length 50 \
442
+ --add_task_name False \
443
+ --add_dataset_name False \
444
+ --overwrite_output_dir \
445
+ --overwrite_cache \
446
+ --lr_scheduler_type constant \
447
+ --warmup_steps 0 \
448
+ --logging_strategy steps \
449
+ --logging_steps 10 \
450
+ --metric_for_best_model eval_rougeL_for_task1729_personachat_generate_next \
451
+ --evaluation_strategy steps \
452
+ --save_strategy steps \
453
+ --save_safetensors false \
454
+ --save_total_limit 1 \
455
+ --load_best_model_at_end \
456
+ --lora_r 4 \
457
+ --lora_alpha 32 \
458
+ --lora_dropout 0.0 \
459
+ --data_replay_freq -1 \
460
+ --mlp_hidden_dim 100 \
461
+ --model_name specroute \
462
+ --threshold 0.995 \
463
+ --transthreshold 0.995 \
464
+ $FP16_FLAG
465
+
466
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
467
+ BSZ=8; GA=2; EVAL_BSZ=16
468
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
469
+ BSZ=16; GA=2; EVAL_BSZ=16
470
+ else
471
+ BSZ=32; GA=1; EVAL_BSZ=32
472
+ fi
473
+
474
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
475
+ --do_train \
476
+ --do_predict \
477
+ --predict_with_generate \
478
+ --model_name_or_path $2 \
479
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/8-task1729_personachat_generate_next/saved_weights \
480
+ --data_dir CL_Benchmark \
481
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
482
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task073_commonsenseqa_answer_generation \
483
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/9-task073_commonsenseqa_answer_generation \
484
+ --per_device_train_batch_size $BSZ \
485
+ --per_device_eval_batch_size $EVAL_BSZ \
486
+ --gradient_accumulation_steps $GA \
487
+ --learning_rate 0.0003 \
488
+ --num_train_epochs 100 \
489
+ --run_name gen_script_superni_order1_t5_small_specroute \
490
+ --max_source_length 512 \
491
+ --max_target_length 50 \
492
+ --generation_max_length 50 \
493
+ --add_task_name False \
494
+ --add_dataset_name False \
495
+ --overwrite_output_dir \
496
+ --overwrite_cache \
497
+ --lr_scheduler_type constant \
498
+ --warmup_steps 0 \
499
+ --logging_strategy steps \
500
+ --logging_steps 10 \
501
+ --metric_for_best_model eval_rougeL_for_task073_commonsenseqa_answer_generation \
502
+ --evaluation_strategy steps \
503
+ --save_strategy steps \
504
+ --save_safetensors false \
505
+ --save_total_limit 1 \
506
+ --load_best_model_at_end \
507
+ --lora_r 4 \
508
+ --lora_alpha 32 \
509
+ --lora_dropout 0.0 \
510
+ --data_replay_freq -1 \
511
+ --mlp_hidden_dim 100 \
512
+ --model_name specroute \
513
+ --threshold 0.995 \
514
+ --transthreshold 0.995 \
515
+ $FP16_FLAG
516
+
517
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
518
+ BSZ=8; GA=2; EVAL_BSZ=16
519
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
520
+ BSZ=16; GA=2; EVAL_BSZ=16
521
+ else
522
+ BSZ=32; GA=1; EVAL_BSZ=32
523
+ fi
524
+
525
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
526
+ --do_train \
527
+ --do_predict \
528
+ --predict_with_generate \
529
+ --model_name_or_path $2 \
530
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/9-task073_commonsenseqa_answer_generation/saved_weights \
531
+ --data_dir CL_Benchmark \
532
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
533
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1590_diplomacy_text_generation \
534
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/10-task1590_diplomacy_text_generation \
535
+ --per_device_train_batch_size $BSZ \
536
+ --per_device_eval_batch_size $EVAL_BSZ \
537
+ --gradient_accumulation_steps $GA \
538
+ --learning_rate 0.0003 \
539
+ --num_train_epochs 100 \
540
+ --run_name gen_script_superni_order1_t5_small_specroute \
541
+ --max_source_length 512 \
542
+ --max_target_length 50 \
543
+ --generation_max_length 50 \
544
+ --add_task_name False \
545
+ --add_dataset_name False \
546
+ --overwrite_output_dir \
547
+ --overwrite_cache \
548
+ --lr_scheduler_type constant \
549
+ --warmup_steps 0 \
550
+ --logging_strategy steps \
551
+ --logging_steps 10 \
552
+ --metric_for_best_model eval_rougeL_for_task1590_diplomacy_text_generation \
553
+ --evaluation_strategy steps \
554
+ --save_strategy steps \
555
+ --save_safetensors false \
556
+ --save_total_limit 1 \
557
+ --load_best_model_at_end \
558
+ --lora_r 4 \
559
+ --lora_alpha 32 \
560
+ --lora_dropout 0.0 \
561
+ --data_replay_freq -1 \
562
+ --mlp_hidden_dim 100 \
563
+ --model_name specroute \
564
+ --threshold 0.995 \
565
+ --transthreshold 0.995 \
566
+ $FP16_FLAG
567
+
568
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
569
+ BSZ=8; GA=2; EVAL_BSZ=16
570
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
571
+ BSZ=16; GA=2; EVAL_BSZ=16
572
+ else
573
+ BSZ=32; GA=1; EVAL_BSZ=32
574
+ fi
575
+
576
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
577
+ --do_train \
578
+ --do_predict \
579
+ --predict_with_generate \
580
+ --model_name_or_path $2 \
581
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/10-task1590_diplomacy_text_generation/saved_weights \
582
+ --data_dir CL_Benchmark \
583
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
584
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task748_glucose_reverse_cause_event_detection \
585
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/11-task748_glucose_reverse_cause_event_detection \
586
+ --per_device_train_batch_size $BSZ \
587
+ --per_device_eval_batch_size $EVAL_BSZ \
588
+ --gradient_accumulation_steps $GA \
589
+ --learning_rate 0.0003 \
590
+ --num_train_epochs 100 \
591
+ --run_name gen_script_superni_order1_t5_small_specroute \
592
+ --max_source_length 512 \
593
+ --max_target_length 50 \
594
+ --generation_max_length 50 \
595
+ --add_task_name False \
596
+ --add_dataset_name False \
597
+ --overwrite_output_dir \
598
+ --overwrite_cache \
599
+ --lr_scheduler_type constant \
600
+ --warmup_steps 0 \
601
+ --logging_strategy steps \
602
+ --logging_steps 10 \
603
+ --metric_for_best_model eval_rougeL_for_task748_glucose_reverse_cause_event_detection \
604
+ --evaluation_strategy steps \
605
+ --save_strategy steps \
606
+ --save_safetensors false \
607
+ --save_total_limit 1 \
608
+ --load_best_model_at_end \
609
+ --lora_r 4 \
610
+ --lora_alpha 32 \
611
+ --lora_dropout 0.0 \
612
+ --data_replay_freq -1 \
613
+ --mlp_hidden_dim 100 \
614
+ --model_name specroute \
615
+ --threshold 0.995 \
616
+ --transthreshold 0.995 \
617
+ $FP16_FLAG
618
+
619
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
620
+ BSZ=8; GA=2; EVAL_BSZ=16
621
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
622
+ BSZ=16; GA=2; EVAL_BSZ=16
623
+ else
624
+ BSZ=32; GA=1; EVAL_BSZ=32
625
+ fi
626
+
627
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
628
+ --do_train \
629
+ --do_predict \
630
+ --predict_with_generate \
631
+ --model_name_or_path $2 \
632
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights \
633
+ --data_dir CL_Benchmark \
634
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
635
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task511_reddit_tifu_long_text_summarization \
636
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/12-task511_reddit_tifu_long_text_summarization \
637
+ --per_device_train_batch_size $BSZ \
638
+ --per_device_eval_batch_size $EVAL_BSZ \
639
+ --gradient_accumulation_steps $GA \
640
+ --learning_rate 0.0003 \
641
+ --num_train_epochs 100 \
642
+ --run_name gen_script_superni_order1_t5_small_specroute \
643
+ --max_source_length 512 \
644
+ --max_target_length 50 \
645
+ --generation_max_length 50 \
646
+ --add_task_name False \
647
+ --add_dataset_name False \
648
+ --overwrite_output_dir \
649
+ --overwrite_cache \
650
+ --lr_scheduler_type constant \
651
+ --warmup_steps 0 \
652
+ --logging_strategy steps \
653
+ --logging_steps 10 \
654
+ --metric_for_best_model eval_rougeL_for_task511_reddit_tifu_long_text_summarization \
655
+ --evaluation_strategy steps \
656
+ --save_strategy steps \
657
+ --save_safetensors false \
658
+ --save_total_limit 1 \
659
+ --load_best_model_at_end \
660
+ --lora_r 4 \
661
+ --lora_alpha 32 \
662
+ --lora_dropout 0.0 \
663
+ --data_replay_freq -1 \
664
+ --mlp_hidden_dim 100 \
665
+ --model_name specroute \
666
+ --threshold 0.995 \
667
+ --transthreshold 0.995 \
668
+ $FP16_FLAG
669
+
670
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
671
+ BSZ=8; GA=2; EVAL_BSZ=16
672
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
673
+ BSZ=16; GA=2; EVAL_BSZ=16
674
+ else
675
+ BSZ=32; GA=1; EVAL_BSZ=32
676
+ fi
677
+
678
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
679
+ --do_train \
680
+ --do_predict \
681
+ --predict_with_generate \
682
+ --model_name_or_path $2 \
683
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights \
684
+ --data_dir CL_Benchmark \
685
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
686
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task591_sciq_answer_generation \
687
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/13-task591_sciq_answer_generation \
688
+ --per_device_train_batch_size $BSZ \
689
+ --per_device_eval_batch_size $EVAL_BSZ \
690
+ --gradient_accumulation_steps $GA \
691
+ --learning_rate 0.0003 \
692
+ --num_train_epochs 100 \
693
+ --run_name gen_script_superni_order1_t5_small_specroute \
694
+ --max_source_length 512 \
695
+ --max_target_length 50 \
696
+ --generation_max_length 50 \
697
+ --add_task_name False \
698
+ --add_dataset_name False \
699
+ --overwrite_output_dir \
700
+ --overwrite_cache \
701
+ --lr_scheduler_type constant \
702
+ --warmup_steps 0 \
703
+ --logging_strategy steps \
704
+ --logging_steps 10 \
705
+ --metric_for_best_model eval_rougeL_for_task591_sciq_answer_generation \
706
+ --evaluation_strategy steps \
707
+ --save_strategy steps \
708
+ --save_safetensors false \
709
+ --save_total_limit 1 \
710
+ --load_best_model_at_end \
711
+ --lora_r 4 \
712
+ --lora_alpha 32 \
713
+ --lora_dropout 0.0 \
714
+ --data_replay_freq -1 \
715
+ --mlp_hidden_dim 100 \
716
+ --model_name specroute \
717
+ --threshold 0.995 \
718
+ --transthreshold 0.995 \
719
+ $FP16_FLAG
720
+
721
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
722
+ BSZ=8; GA=2; EVAL_BSZ=16
723
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
724
+ BSZ=16; GA=2; EVAL_BSZ=16
725
+ else
726
+ BSZ=32; GA=1; EVAL_BSZ=32
727
+ fi
728
+
729
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
730
+ --do_train \
731
+ --do_predict \
732
+ --predict_with_generate \
733
+ --model_name_or_path $2 \
734
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/13-task591_sciq_answer_generation/saved_weights \
735
+ --data_dir CL_Benchmark \
736
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
737
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1687_sentiment140_classification \
738
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/14-task1687_sentiment140_classification \
739
+ --per_device_train_batch_size $BSZ \
740
+ --per_device_eval_batch_size $EVAL_BSZ \
741
+ --gradient_accumulation_steps $GA \
742
+ --learning_rate 0.0003 \
743
+ --num_train_epochs 100 \
744
+ --run_name gen_script_superni_order1_t5_small_specroute \
745
+ --max_source_length 512 \
746
+ --max_target_length 50 \
747
+ --generation_max_length 50 \
748
+ --add_task_name False \
749
+ --add_dataset_name False \
750
+ --overwrite_output_dir \
751
+ --overwrite_cache \
752
+ --lr_scheduler_type constant \
753
+ --warmup_steps 0 \
754
+ --logging_strategy steps \
755
+ --logging_steps 10 \
756
+ --metric_for_best_model eval_rougeL_for_task1687_sentiment140_classification \
757
+ --evaluation_strategy steps \
758
+ --save_strategy steps \
759
+ --save_safetensors false \
760
+ --save_total_limit 1 \
761
+ --load_best_model_at_end \
762
+ --lora_r 4 \
763
+ --lora_alpha 32 \
764
+ --lora_dropout 0.0 \
765
+ --data_replay_freq -1 \
766
+ --mlp_hidden_dim 100 \
767
+ --model_name specroute \
768
+ --threshold 0.995 \
769
+ --transthreshold 0.995 \
770
+ $FP16_FLAG
771
+
772
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
773
+ BSZ=8; GA=2; EVAL_BSZ=16
774
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
775
+ BSZ=16; GA=2; EVAL_BSZ=16
776
+ else
777
+ BSZ=32; GA=1; EVAL_BSZ=32
778
+ fi
779
+
780
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
781
+ --do_train \
782
+ --do_predict \
783
+ --predict_with_generate \
784
+ --model_name_or_path $2 \
785
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/13-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/14-task1687_sentiment140_classification/saved_weights \
786
+ --data_dir CL_Benchmark \
787
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
788
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task875_emotion_classification \
789
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_specroute/outputs/15-task875_emotion_classification \
790
+ --per_device_train_batch_size $BSZ \
791
+ --per_device_eval_batch_size $EVAL_BSZ \
792
+ --gradient_accumulation_steps $GA \
793
+ --learning_rate 0.0003 \
794
+ --num_train_epochs 100 \
795
+ --run_name gen_script_superni_order1_t5_small_specroute \
796
+ --max_source_length 512 \
797
+ --max_target_length 50 \
798
+ --generation_max_length 50 \
799
+ --add_task_name False \
800
+ --add_dataset_name False \
801
+ --overwrite_output_dir \
802
+ --overwrite_cache \
803
+ --lr_scheduler_type constant \
804
+ --warmup_steps 0 \
805
+ --logging_strategy steps \
806
+ --logging_steps 10 \
807
+ --metric_for_best_model eval_rougeL_for_task875_emotion_classification \
808
+ --evaluation_strategy steps \
809
+ --save_strategy steps \
810
+ --save_safetensors false \
811
+ --save_total_limit 1 \
812
+ --load_best_model_at_end \
813
+ --lora_r 4 \
814
+ --lora_alpha 32 \
815
+ --lora_dropout 0.0 \
816
+ --data_replay_freq -1 \
817
+ --mlp_hidden_dim 100 \
818
+ --model_name specroute \
819
+ --threshold 0.995 \
820
+ --transthreshold 0.995 \
821
+ $FP16_FLAG
improve_gainlora/T5_small/gen_script_superni_order2_t5_small_gainlora_inflora.sh ADDED
@@ -0,0 +1,743 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
21
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task748_glucose_reverse_cause_event_detection \
22
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection \
23
+ --per_device_train_batch_size 16 \
24
+ --per_device_eval_batch_size 8 \
25
+ --gradient_accumulation_steps 2 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 100 \
28
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_rougeL \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --lora_r 4 \
45
+ --lora_alpha 32 \
46
+ --lora_dropout 0.0 \
47
+ --load_best_model_at_end \
48
+ --data_replay_freq -1 \
49
+ --replay_after_n_epoch 0 \
50
+ --model_name gainlora_inflora \
51
+ --mlp_hidden_dim 100 \
52
+ --threshold 0.995 \
53
+ --transthreshold 0.995 \
54
+ --kl_ratio 0.5 \
55
+ --attn_temperature 1
56
+
57
+
58
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
59
+ --do_train \
60
+ --do_predict \
61
+ --predict_with_generate \
62
+ --model_name_or_path $2 \
63
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights/trans_input.pt \
64
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights \
65
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights/prompts_keys_till_now.pt \
66
+ --data_dir CL_Benchmark \
67
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
68
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
69
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task073_commonsenseqa_answer_generation \
70
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation \
71
+ --per_device_train_batch_size 16 \
72
+ --per_device_eval_batch_size 8 \
73
+ --gradient_accumulation_steps 2 \
74
+ --learning_rate 0.0003 \
75
+ --num_train_epochs 100 \
76
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
77
+ --max_source_length 512 \
78
+ --max_target_length 50 \
79
+ --generation_max_length 50 \
80
+ --add_task_name False \
81
+ --add_dataset_name False \
82
+ --overwrite_output_dir \
83
+ --overwrite_cache \
84
+ --lr_scheduler_type constant \
85
+ --warmup_steps 0 \
86
+ --logging_strategy steps \
87
+ --logging_steps 10 \
88
+ --metric_for_best_model eval_rougeL_for_task073_commonsenseqa_answer_generation \
89
+ --evaluation_strategy steps \
90
+ --save_strategy steps \
91
+ --save_total_limit 1 \
92
+ --load_best_model_at_end \
93
+ --lora_r 4 \
94
+ --lora_alpha 32 \
95
+ --lora_dropout 0.0 \
96
+ --add_instruction_replay \
97
+ --data_replay_freq -1 \
98
+ --replay_after_n_epoch 0 \
99
+ --model_name gainlora_inflora \
100
+ --mlp_hidden_dim 100 \
101
+ --threshold 0.995 \
102
+ --transthreshold 0.995 \
103
+ --kl_ratio 0.5 \
104
+ --attn_temperature 1
105
+
106
+
107
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
108
+ --do_train \
109
+ --do_predict \
110
+ --predict_with_generate \
111
+ --model_name_or_path $2 \
112
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights/trans_input.pt \
113
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights \
114
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights/prompts_keys_till_now.pt \
115
+ --data_dir CL_Benchmark \
116
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
117
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
118
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1590_diplomacy_text_generation \
119
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation \
120
+ --per_device_train_batch_size 16 \
121
+ --per_device_eval_batch_size 8 \
122
+ --gradient_accumulation_steps 2 \
123
+ --learning_rate 0.0003 \
124
+ --num_train_epochs 100 \
125
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
126
+ --max_source_length 512 \
127
+ --max_target_length 50 \
128
+ --generation_max_length 50 \
129
+ --add_task_name False \
130
+ --add_dataset_name False \
131
+ --overwrite_output_dir \
132
+ --overwrite_cache \
133
+ --lr_scheduler_type constant \
134
+ --warmup_steps 0 \
135
+ --logging_strategy steps \
136
+ --logging_steps 10 \
137
+ --metric_for_best_model eval_rougeL_for_task1590_diplomacy_text_generation \
138
+ --evaluation_strategy steps \
139
+ --save_strategy steps \
140
+ --save_total_limit 1 \
141
+ --load_best_model_at_end \
142
+ --lora_r 4 \
143
+ --lora_alpha 32 \
144
+ --lora_dropout 0.0 \
145
+ --add_instruction_replay \
146
+ --data_replay_freq -1 \
147
+ --replay_after_n_epoch 0 \
148
+ --model_name gainlora_inflora \
149
+ --mlp_hidden_dim 100 \
150
+ --threshold 0.995 \
151
+ --transthreshold 0.995 \
152
+ --kl_ratio 0.5 \
153
+ --attn_temperature 1
154
+
155
+
156
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
157
+ --do_train \
158
+ --do_predict \
159
+ --predict_with_generate \
160
+ --model_name_or_path $2 \
161
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights/trans_input.pt \
162
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights \
163
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights/prompts_keys_till_now.pt \
164
+ --data_dir CL_Benchmark \
165
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
166
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
167
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task639_multi_woz_user_utterance_generation \
168
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation \
169
+ --per_device_train_batch_size 16 \
170
+ --per_device_eval_batch_size 8 \
171
+ --gradient_accumulation_steps 2 \
172
+ --learning_rate 0.0003 \
173
+ --num_train_epochs 100 \
174
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
175
+ --max_source_length 512 \
176
+ --max_target_length 50 \
177
+ --generation_max_length 50 \
178
+ --add_task_name False \
179
+ --add_dataset_name False \
180
+ --overwrite_output_dir \
181
+ --overwrite_cache \
182
+ --lr_scheduler_type constant \
183
+ --warmup_steps 0 \
184
+ --logging_strategy steps \
185
+ --logging_steps 10 \
186
+ --metric_for_best_model eval_rougeL_for_task639_multi_woz_user_utterance_generation \
187
+ --evaluation_strategy steps \
188
+ --save_strategy steps \
189
+ --save_total_limit 1 \
190
+ --load_best_model_at_end \
191
+ --lora_r 4 \
192
+ --lora_alpha 32 \
193
+ --lora_dropout 0.0 \
194
+ --add_instruction_replay \
195
+ --data_replay_freq -1 \
196
+ --replay_after_n_epoch 0 \
197
+ --model_name gainlora_inflora \
198
+ --mlp_hidden_dim 100 \
199
+ --threshold 0.995 \
200
+ --transthreshold 0.995 \
201
+ --kl_ratio 0.5 \
202
+ --attn_temperature 1
203
+
204
+
205
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
206
+ --do_train \
207
+ --do_predict \
208
+ --predict_with_generate \
209
+ --model_name_or_path $2 \
210
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights/trans_input.pt \
211
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights \
212
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights/prompts_keys_till_now.pt \
213
+ --data_dir CL_Benchmark \
214
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
215
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
216
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1572_samsum_summary \
217
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary \
218
+ --per_device_train_batch_size 16 \
219
+ --per_device_eval_batch_size 8 \
220
+ --gradient_accumulation_steps 2 \
221
+ --learning_rate 0.0003 \
222
+ --num_train_epochs 100 \
223
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
224
+ --max_source_length 512 \
225
+ --max_target_length 50 \
226
+ --generation_max_length 50 \
227
+ --add_task_name False \
228
+ --add_dataset_name False \
229
+ --overwrite_output_dir \
230
+ --overwrite_cache \
231
+ --lr_scheduler_type constant \
232
+ --warmup_steps 0 \
233
+ --logging_strategy steps \
234
+ --logging_steps 10 \
235
+ --metric_for_best_model eval_rougeL_for_task1572_samsum_summary \
236
+ --evaluation_strategy steps \
237
+ --save_strategy steps \
238
+ --save_total_limit 1 \
239
+ --load_best_model_at_end \
240
+ --lora_r 4 \
241
+ --lora_alpha 32 \
242
+ --lora_dropout 0.0 \
243
+ --add_instruction_replay \
244
+ --data_replay_freq -1 \
245
+ --replay_after_n_epoch 0 \
246
+ --model_name gainlora_inflora \
247
+ --mlp_hidden_dim 100 \
248
+ --threshold 0.995 \
249
+ --transthreshold 0.995 \
250
+ --kl_ratio 0.5 \
251
+ --attn_temperature 1
252
+
253
+
254
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
255
+ --do_train \
256
+ --do_predict \
257
+ --predict_with_generate \
258
+ --model_name_or_path $2 \
259
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights/trans_input.pt \
260
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights \
261
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights/prompts_keys_till_now.pt \
262
+ --data_dir CL_Benchmark \
263
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
264
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
265
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1687_sentiment140_classification \
266
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification \
267
+ --per_device_train_batch_size 16 \
268
+ --per_device_eval_batch_size 8 \
269
+ --gradient_accumulation_steps 2 \
270
+ --learning_rate 0.0003 \
271
+ --num_train_epochs 100 \
272
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
273
+ --max_source_length 512 \
274
+ --max_target_length 50 \
275
+ --generation_max_length 50 \
276
+ --add_task_name False \
277
+ --add_dataset_name False \
278
+ --overwrite_output_dir \
279
+ --overwrite_cache \
280
+ --lr_scheduler_type constant \
281
+ --warmup_steps 0 \
282
+ --logging_strategy steps \
283
+ --logging_steps 10 \
284
+ --metric_for_best_model eval_rougeL_for_task1687_sentiment140_classification \
285
+ --evaluation_strategy steps \
286
+ --save_strategy steps \
287
+ --save_total_limit 1 \
288
+ --load_best_model_at_end \
289
+ --lora_r 4 \
290
+ --lora_alpha 32 \
291
+ --lora_dropout 0.0 \
292
+ --add_instruction_replay \
293
+ --data_replay_freq -1 \
294
+ --replay_after_n_epoch 0 \
295
+ --model_name gainlora_inflora \
296
+ --mlp_hidden_dim 100 \
297
+ --threshold 0.995 \
298
+ --transthreshold 0.995 \
299
+ --kl_ratio 0.5 \
300
+ --attn_temperature 1
301
+
302
+
303
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
304
+ --do_train \
305
+ --do_predict \
306
+ --predict_with_generate \
307
+ --model_name_or_path $2 \
308
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights/trans_input.pt \
309
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights \
310
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights/prompts_keys_till_now.pt \
311
+ --data_dir CL_Benchmark \
312
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
313
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
314
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task591_sciq_answer_generation \
315
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation \
316
+ --per_device_train_batch_size 16 \
317
+ --per_device_eval_batch_size 8 \
318
+ --gradient_accumulation_steps 2 \
319
+ --learning_rate 0.0003 \
320
+ --num_train_epochs 100 \
321
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
322
+ --max_source_length 512 \
323
+ --max_target_length 50 \
324
+ --generation_max_length 50 \
325
+ --add_task_name False \
326
+ --add_dataset_name False \
327
+ --overwrite_output_dir \
328
+ --overwrite_cache \
329
+ --lr_scheduler_type constant \
330
+ --warmup_steps 0 \
331
+ --logging_strategy steps \
332
+ --logging_steps 10 \
333
+ --metric_for_best_model eval_rougeL_for_task591_sciq_answer_generation \
334
+ --evaluation_strategy steps \
335
+ --save_strategy steps \
336
+ --save_total_limit 1 \
337
+ --load_best_model_at_end \
338
+ --lora_r 4 \
339
+ --lora_alpha 32 \
340
+ --lora_dropout 0.0 \
341
+ --add_instruction_replay \
342
+ --data_replay_freq -1 \
343
+ --replay_after_n_epoch 0 \
344
+ --model_name gainlora_inflora \
345
+ --mlp_hidden_dim 100 \
346
+ --threshold 0.995 \
347
+ --transthreshold 0.995 \
348
+ --kl_ratio 0.5 \
349
+ --attn_temperature 1
350
+
351
+
352
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
353
+ --do_train \
354
+ --do_predict \
355
+ --predict_with_generate \
356
+ --model_name_or_path $2 \
357
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights/trans_input.pt \
358
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights \
359
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights/prompts_keys_till_now.pt \
360
+ --data_dir CL_Benchmark \
361
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
362
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
363
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task363_sst2_polarity_classification \
364
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification \
365
+ --per_device_train_batch_size 16 \
366
+ --per_device_eval_batch_size 8 \
367
+ --gradient_accumulation_steps 2 \
368
+ --learning_rate 0.0003 \
369
+ --num_train_epochs 100 \
370
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
371
+ --max_source_length 512 \
372
+ --max_target_length 50 \
373
+ --generation_max_length 50 \
374
+ --add_task_name False \
375
+ --add_dataset_name False \
376
+ --overwrite_output_dir \
377
+ --overwrite_cache \
378
+ --lr_scheduler_type constant \
379
+ --warmup_steps 0 \
380
+ --logging_strategy steps \
381
+ --logging_steps 10 \
382
+ --metric_for_best_model eval_rougeL_for_task363_sst2_polarity_classification \
383
+ --evaluation_strategy steps \
384
+ --save_strategy steps \
385
+ --save_total_limit 1 \
386
+ --load_best_model_at_end \
387
+ --lora_r 4 \
388
+ --lora_alpha 32 \
389
+ --lora_dropout 0.0 \
390
+ --add_instruction_replay \
391
+ --data_replay_freq -1 \
392
+ --replay_after_n_epoch 0 \
393
+ --model_name gainlora_inflora \
394
+ --mlp_hidden_dim 100 \
395
+ --threshold 0.995 \
396
+ --transthreshold 0.995 \
397
+ --kl_ratio 0.5 \
398
+ --attn_temperature 1
399
+
400
+
401
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
402
+ --do_train \
403
+ --do_predict \
404
+ --predict_with_generate \
405
+ --model_name_or_path $2 \
406
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights/trans_input.pt \
407
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights \
408
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights/prompts_keys_till_now.pt \
409
+ --data_dir CL_Benchmark \
410
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
411
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
412
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1510_evalution_relation_extraction \
413
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction \
414
+ --per_device_train_batch_size 16 \
415
+ --per_device_eval_batch_size 8 \
416
+ --gradient_accumulation_steps 2 \
417
+ --learning_rate 0.0003 \
418
+ --num_train_epochs 100 \
419
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
420
+ --max_source_length 512 \
421
+ --max_target_length 50 \
422
+ --generation_max_length 50 \
423
+ --add_task_name False \
424
+ --add_dataset_name False \
425
+ --overwrite_output_dir \
426
+ --overwrite_cache \
427
+ --lr_scheduler_type constant \
428
+ --warmup_steps 0 \
429
+ --logging_strategy steps \
430
+ --logging_steps 10 \
431
+ --metric_for_best_model eval_rougeL_for_task1510_evalution_relation_extraction \
432
+ --evaluation_strategy steps \
433
+ --save_strategy steps \
434
+ --save_total_limit 1 \
435
+ --load_best_model_at_end \
436
+ --lora_r 4 \
437
+ --lora_alpha 32 \
438
+ --lora_dropout 0.0 \
439
+ --add_instruction_replay \
440
+ --data_replay_freq -1 \
441
+ --replay_after_n_epoch 0 \
442
+ --model_name gainlora_inflora \
443
+ --mlp_hidden_dim 100 \
444
+ --threshold 0.995 \
445
+ --transthreshold 0.995 \
446
+ --kl_ratio 0.5 \
447
+ --attn_temperature 1
448
+
449
+
450
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
451
+ --do_train \
452
+ --do_predict \
453
+ --predict_with_generate \
454
+ --model_name_or_path $2 \
455
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights/trans_input.pt \
456
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights \
457
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights/prompts_keys_till_now.pt \
458
+ --data_dir CL_Benchmark \
459
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
460
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
461
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1729_personachat_generate_next \
462
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next \
463
+ --per_device_train_batch_size 16 \
464
+ --per_device_eval_batch_size 8 \
465
+ --gradient_accumulation_steps 2 \
466
+ --learning_rate 0.0003 \
467
+ --num_train_epochs 100 \
468
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
469
+ --max_source_length 512 \
470
+ --max_target_length 50 \
471
+ --generation_max_length 50 \
472
+ --add_task_name False \
473
+ --add_dataset_name False \
474
+ --overwrite_output_dir \
475
+ --overwrite_cache \
476
+ --lr_scheduler_type constant \
477
+ --warmup_steps 0 \
478
+ --logging_strategy steps \
479
+ --logging_steps 10 \
480
+ --metric_for_best_model eval_rougeL_for_task1729_personachat_generate_next \
481
+ --evaluation_strategy steps \
482
+ --save_strategy steps \
483
+ --save_total_limit 1 \
484
+ --load_best_model_at_end \
485
+ --lora_r 4 \
486
+ --lora_alpha 32 \
487
+ --lora_dropout 0.0 \
488
+ --add_instruction_replay \
489
+ --data_replay_freq -1 \
490
+ --replay_after_n_epoch 0 \
491
+ --model_name gainlora_inflora \
492
+ --mlp_hidden_dim 100 \
493
+ --threshold 0.995 \
494
+ --transthreshold 0.995 \
495
+ --kl_ratio 0.5 \
496
+ --attn_temperature 1
497
+
498
+
499
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
500
+ --do_train \
501
+ --do_predict \
502
+ --predict_with_generate \
503
+ --model_name_or_path $2 \
504
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights/trans_input.pt \
505
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights \
506
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights/prompts_keys_till_now.pt \
507
+ --data_dir CL_Benchmark \
508
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
509
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
510
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task181_outcome_extraction \
511
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction \
512
+ --per_device_train_batch_size 16 \
513
+ --per_device_eval_batch_size 8 \
514
+ --gradient_accumulation_steps 2 \
515
+ --learning_rate 0.0003 \
516
+ --num_train_epochs 100 \
517
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
518
+ --max_source_length 512 \
519
+ --max_target_length 50 \
520
+ --generation_max_length 50 \
521
+ --add_task_name False \
522
+ --add_dataset_name False \
523
+ --overwrite_output_dir \
524
+ --overwrite_cache \
525
+ --lr_scheduler_type constant \
526
+ --warmup_steps 0 \
527
+ --logging_strategy steps \
528
+ --logging_steps 10 \
529
+ --metric_for_best_model eval_rougeL_for_task181_outcome_extraction \
530
+ --evaluation_strategy steps \
531
+ --save_strategy steps \
532
+ --save_total_limit 1 \
533
+ --load_best_model_at_end \
534
+ --lora_r 4 \
535
+ --lora_alpha 32 \
536
+ --lora_dropout 0.0 \
537
+ --add_instruction_replay \
538
+ --data_replay_freq -1 \
539
+ --replay_after_n_epoch 0 \
540
+ --model_name gainlora_inflora \
541
+ --mlp_hidden_dim 100 \
542
+ --threshold 0.995 \
543
+ --transthreshold 0.995 \
544
+ --kl_ratio 0.5 \
545
+ --attn_temperature 1
546
+
547
+
548
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
549
+ --do_train \
550
+ --do_predict \
551
+ --predict_with_generate \
552
+ --model_name_or_path $2 \
553
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights/trans_input.pt \
554
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights \
555
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights/prompts_keys_till_now.pt \
556
+ --data_dir CL_Benchmark \
557
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
558
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
559
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task511_reddit_tifu_long_text_summarization \
560
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization \
561
+ --per_device_train_batch_size 16 \
562
+ --per_device_eval_batch_size 8 \
563
+ --gradient_accumulation_steps 2 \
564
+ --learning_rate 0.0003 \
565
+ --num_train_epochs 100 \
566
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
567
+ --max_source_length 512 \
568
+ --max_target_length 50 \
569
+ --generation_max_length 50 \
570
+ --add_task_name False \
571
+ --add_dataset_name False \
572
+ --overwrite_output_dir \
573
+ --overwrite_cache \
574
+ --lr_scheduler_type constant \
575
+ --warmup_steps 0 \
576
+ --logging_strategy steps \
577
+ --logging_steps 10 \
578
+ --metric_for_best_model eval_rougeL_for_task511_reddit_tifu_long_text_summarization \
579
+ --evaluation_strategy steps \
580
+ --save_strategy steps \
581
+ --save_total_limit 1 \
582
+ --load_best_model_at_end \
583
+ --lora_r 4 \
584
+ --lora_alpha 32 \
585
+ --lora_dropout 0.0 \
586
+ --add_instruction_replay \
587
+ --data_replay_freq -1 \
588
+ --replay_after_n_epoch 0 \
589
+ --model_name gainlora_inflora \
590
+ --mlp_hidden_dim 100 \
591
+ --threshold 0.995 \
592
+ --transthreshold 0.995 \
593
+ --kl_ratio 0.5 \
594
+ --attn_temperature 1
595
+
596
+
597
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
598
+ --do_train \
599
+ --do_predict \
600
+ --predict_with_generate \
601
+ --model_name_or_path $2 \
602
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/trans_input.pt \
603
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights \
604
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/prompts_keys_till_now.pt \
605
+ --data_dir CL_Benchmark \
606
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
607
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
608
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task002_quoref_answer_generation \
609
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/13-task002_quoref_answer_generation \
610
+ --per_device_train_batch_size 16 \
611
+ --per_device_eval_batch_size 8 \
612
+ --gradient_accumulation_steps 2 \
613
+ --learning_rate 0.0003 \
614
+ --num_train_epochs 100 \
615
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
616
+ --max_source_length 512 \
617
+ --max_target_length 50 \
618
+ --generation_max_length 50 \
619
+ --add_task_name False \
620
+ --add_dataset_name False \
621
+ --overwrite_output_dir \
622
+ --overwrite_cache \
623
+ --lr_scheduler_type constant \
624
+ --warmup_steps 0 \
625
+ --logging_strategy steps \
626
+ --logging_steps 10 \
627
+ --metric_for_best_model eval_rougeL_for_task002_quoref_answer_generation \
628
+ --evaluation_strategy steps \
629
+ --save_strategy steps \
630
+ --save_total_limit 1 \
631
+ --load_best_model_at_end \
632
+ --lora_r 4 \
633
+ --lora_alpha 32 \
634
+ --lora_dropout 0.0 \
635
+ --add_instruction_replay \
636
+ --data_replay_freq -1 \
637
+ --replay_after_n_epoch 0 \
638
+ --model_name gainlora_inflora \
639
+ --mlp_hidden_dim 100 \
640
+ --threshold 0.995 \
641
+ --transthreshold 0.995 \
642
+ --kl_ratio 0.5 \
643
+ --attn_temperature 1
644
+
645
+
646
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
647
+ --do_train \
648
+ --do_predict \
649
+ --predict_with_generate \
650
+ --model_name_or_path $2 \
651
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/13-task002_quoref_answer_generation/saved_weights/trans_input.pt \
652
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/13-task002_quoref_answer_generation/saved_weights \
653
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/13-task002_quoref_answer_generation/saved_weights/prompts_keys_till_now.pt \
654
+ --data_dir CL_Benchmark \
655
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
656
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
657
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1290_xsum_summarization \
658
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/14-task1290_xsum_summarization \
659
+ --per_device_train_batch_size 16 \
660
+ --per_device_eval_batch_size 8 \
661
+ --gradient_accumulation_steps 2 \
662
+ --learning_rate 0.0003 \
663
+ --num_train_epochs 100 \
664
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
665
+ --max_source_length 512 \
666
+ --max_target_length 50 \
667
+ --generation_max_length 50 \
668
+ --add_task_name False \
669
+ --add_dataset_name False \
670
+ --overwrite_output_dir \
671
+ --overwrite_cache \
672
+ --lr_scheduler_type constant \
673
+ --warmup_steps 0 \
674
+ --logging_strategy steps \
675
+ --logging_steps 10 \
676
+ --metric_for_best_model eval_rougeL_for_task1290_xsum_summarization \
677
+ --evaluation_strategy steps \
678
+ --save_strategy steps \
679
+ --save_total_limit 1 \
680
+ --load_best_model_at_end \
681
+ --lora_r 4 \
682
+ --lora_alpha 32 \
683
+ --lora_dropout 0.0 \
684
+ --add_instruction_replay \
685
+ --data_replay_freq -1 \
686
+ --replay_after_n_epoch 0 \
687
+ --model_name gainlora_inflora \
688
+ --mlp_hidden_dim 100 \
689
+ --threshold 0.995 \
690
+ --transthreshold 0.995 \
691
+ --kl_ratio 0.5 \
692
+ --attn_temperature 1
693
+
694
+
695
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
696
+ --do_train \
697
+ --do_predict \
698
+ --predict_with_generate \
699
+ --model_name_or_path $2 \
700
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/14-task1290_xsum_summarization/saved_weights/trans_input.pt \
701
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/13-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/14-task1290_xsum_summarization/saved_weights \
702
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/14-task1290_xsum_summarization/saved_weights/prompts_keys_till_now.pt \
703
+ --data_dir CL_Benchmark \
704
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
705
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
706
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task875_emotion_classification \
707
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/15-task875_emotion_classification \
708
+ --per_device_train_batch_size 16 \
709
+ --per_device_eval_batch_size 8 \
710
+ --gradient_accumulation_steps 2 \
711
+ --learning_rate 0.0003 \
712
+ --num_train_epochs 100 \
713
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
714
+ --max_source_length 512 \
715
+ --max_target_length 50 \
716
+ --generation_max_length 50 \
717
+ --add_task_name False \
718
+ --add_dataset_name False \
719
+ --overwrite_output_dir \
720
+ --overwrite_cache \
721
+ --lr_scheduler_type constant \
722
+ --warmup_steps 0 \
723
+ --logging_strategy steps \
724
+ --logging_steps 10 \
725
+ --metric_for_best_model eval_rougeL_for_task875_emotion_classification \
726
+ --evaluation_strategy steps \
727
+ --save_strategy steps \
728
+ --save_total_limit 1 \
729
+ --load_best_model_at_end \
730
+ --lora_r 4 \
731
+ --lora_alpha 32 \
732
+ --lora_dropout 0.0 \
733
+ --add_instruction_replay \
734
+ --data_replay_freq -1 \
735
+ --replay_after_n_epoch 0 \
736
+ --model_name gainlora_inflora \
737
+ --mlp_hidden_dim 100 \
738
+ --threshold 0.995 \
739
+ --transthreshold 0.995 \
740
+ --kl_ratio 0.5 \
741
+ --attn_temperature 1
742
+
743
+ python score.py gen_script_superni_order2_t5_small_gainlora_inflora gen_script_superni_order2_t5_small_gainlora_inflora
improve_gainlora/T5_small/gen_script_superni_order2_t5_small_inflora.sh ADDED
@@ -0,0 +1,713 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
21
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task748_glucose_reverse_cause_event_detection \
22
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection \
23
+ --per_device_train_batch_size 16 \
24
+ --per_device_eval_batch_size 8 \
25
+ --gradient_accumulation_steps 2 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 100 \
28
+ --run_name gen_script_superni_order2_t5_small_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_rougeL \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --lora_r 4 \
45
+ --lora_alpha 32 \
46
+ --lora_dropout 0.0 \
47
+ --load_best_model_at_end \
48
+ --data_replay_freq -1 \
49
+ --replay_after_n_epoch 0 \
50
+ --model_name inflora \
51
+ --threshold 0.995 \
52
+ --kl_ratio 0.5 \
53
+ --attn_temperature 1
54
+
55
+
56
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
57
+ --do_train \
58
+ --do_predict \
59
+ --predict_with_generate \
60
+ --model_name_or_path $2 \
61
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights/trans_input.pt \
62
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights \
63
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights/prompts_keys_till_now.pt \
64
+ --data_dir CL_Benchmark \
65
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
66
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
67
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task073_commonsenseqa_answer_generation \
68
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation \
69
+ --per_device_train_batch_size 16 \
70
+ --per_device_eval_batch_size 8 \
71
+ --gradient_accumulation_steps 2 \
72
+ --learning_rate 0.0003 \
73
+ --num_train_epochs 100 \
74
+ --run_name gen_script_superni_order2_t5_small_inflora \
75
+ --max_source_length 512 \
76
+ --max_target_length 50 \
77
+ --generation_max_length 50 \
78
+ --add_task_name False \
79
+ --add_dataset_name False \
80
+ --overwrite_output_dir \
81
+ --overwrite_cache \
82
+ --lr_scheduler_type constant \
83
+ --warmup_steps 0 \
84
+ --logging_strategy steps \
85
+ --logging_steps 10 \
86
+ --metric_for_best_model eval_rougeL_for_task073_commonsenseqa_answer_generation \
87
+ --evaluation_strategy steps \
88
+ --save_strategy steps \
89
+ --save_total_limit 1 \
90
+ --load_best_model_at_end \
91
+ --lora_r 4 \
92
+ --lora_alpha 32 \
93
+ --lora_dropout 0.0 \
94
+ --add_instruction_replay \
95
+ --data_replay_freq -1 \
96
+ --replay_after_n_epoch 0 \
97
+ --model_name inflora \
98
+ --threshold 0.995 \
99
+ --kl_ratio 0.5 \
100
+ --attn_temperature 1
101
+
102
+
103
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
104
+ --do_train \
105
+ --do_predict \
106
+ --predict_with_generate \
107
+ --model_name_or_path $2 \
108
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights/trans_input.pt \
109
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights \
110
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights/prompts_keys_till_now.pt \
111
+ --data_dir CL_Benchmark \
112
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
113
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
114
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1590_diplomacy_text_generation \
115
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation \
116
+ --per_device_train_batch_size 16 \
117
+ --per_device_eval_batch_size 8 \
118
+ --gradient_accumulation_steps 2 \
119
+ --learning_rate 0.0003 \
120
+ --num_train_epochs 100 \
121
+ --run_name gen_script_superni_order2_t5_small_inflora \
122
+ --max_source_length 512 \
123
+ --max_target_length 50 \
124
+ --generation_max_length 50 \
125
+ --add_task_name False \
126
+ --add_dataset_name False \
127
+ --overwrite_output_dir \
128
+ --overwrite_cache \
129
+ --lr_scheduler_type constant \
130
+ --warmup_steps 0 \
131
+ --logging_strategy steps \
132
+ --logging_steps 10 \
133
+ --metric_for_best_model eval_rougeL_for_task1590_diplomacy_text_generation \
134
+ --evaluation_strategy steps \
135
+ --save_strategy steps \
136
+ --save_total_limit 1 \
137
+ --load_best_model_at_end \
138
+ --lora_r 4 \
139
+ --lora_alpha 32 \
140
+ --lora_dropout 0.0 \
141
+ --add_instruction_replay \
142
+ --data_replay_freq -1 \
143
+ --replay_after_n_epoch 0 \
144
+ --model_name inflora \
145
+ --threshold 0.995 \
146
+ --kl_ratio 0.5 \
147
+ --attn_temperature 1
148
+
149
+
150
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
151
+ --do_train \
152
+ --do_predict \
153
+ --predict_with_generate \
154
+ --model_name_or_path $2 \
155
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights/trans_input.pt \
156
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights \
157
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights/prompts_keys_till_now.pt \
158
+ --data_dir CL_Benchmark \
159
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
160
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
161
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task639_multi_woz_user_utterance_generation \
162
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation \
163
+ --per_device_train_batch_size 16 \
164
+ --per_device_eval_batch_size 8 \
165
+ --gradient_accumulation_steps 2 \
166
+ --learning_rate 0.0003 \
167
+ --num_train_epochs 100 \
168
+ --run_name gen_script_superni_order2_t5_small_inflora \
169
+ --max_source_length 512 \
170
+ --max_target_length 50 \
171
+ --generation_max_length 50 \
172
+ --add_task_name False \
173
+ --add_dataset_name False \
174
+ --overwrite_output_dir \
175
+ --overwrite_cache \
176
+ --lr_scheduler_type constant \
177
+ --warmup_steps 0 \
178
+ --logging_strategy steps \
179
+ --logging_steps 10 \
180
+ --metric_for_best_model eval_rougeL_for_task639_multi_woz_user_utterance_generation \
181
+ --evaluation_strategy steps \
182
+ --save_strategy steps \
183
+ --save_total_limit 1 \
184
+ --load_best_model_at_end \
185
+ --lora_r 4 \
186
+ --lora_alpha 32 \
187
+ --lora_dropout 0.0 \
188
+ --add_instruction_replay \
189
+ --data_replay_freq -1 \
190
+ --replay_after_n_epoch 0 \
191
+ --model_name inflora \
192
+ --threshold 0.995 \
193
+ --kl_ratio 0.5 \
194
+ --attn_temperature 1
195
+
196
+
197
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
198
+ --do_train \
199
+ --do_predict \
200
+ --predict_with_generate \
201
+ --model_name_or_path $2 \
202
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights/trans_input.pt \
203
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights \
204
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights/prompts_keys_till_now.pt \
205
+ --data_dir CL_Benchmark \
206
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
207
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
208
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1572_samsum_summary \
209
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary \
210
+ --per_device_train_batch_size 16 \
211
+ --per_device_eval_batch_size 8 \
212
+ --gradient_accumulation_steps 2 \
213
+ --learning_rate 0.0003 \
214
+ --num_train_epochs 100 \
215
+ --run_name gen_script_superni_order2_t5_small_inflora \
216
+ --max_source_length 512 \
217
+ --max_target_length 50 \
218
+ --generation_max_length 50 \
219
+ --add_task_name False \
220
+ --add_dataset_name False \
221
+ --overwrite_output_dir \
222
+ --overwrite_cache \
223
+ --lr_scheduler_type constant \
224
+ --warmup_steps 0 \
225
+ --logging_strategy steps \
226
+ --logging_steps 10 \
227
+ --metric_for_best_model eval_rougeL_for_task1572_samsum_summary \
228
+ --evaluation_strategy steps \
229
+ --save_strategy steps \
230
+ --save_total_limit 1 \
231
+ --load_best_model_at_end \
232
+ --lora_r 4 \
233
+ --lora_alpha 32 \
234
+ --lora_dropout 0.0 \
235
+ --add_instruction_replay \
236
+ --data_replay_freq -1 \
237
+ --replay_after_n_epoch 0 \
238
+ --model_name inflora \
239
+ --threshold 0.995 \
240
+ --kl_ratio 0.5 \
241
+ --attn_temperature 1
242
+
243
+
244
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
245
+ --do_train \
246
+ --do_predict \
247
+ --predict_with_generate \
248
+ --model_name_or_path $2 \
249
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights/trans_input.pt \
250
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights \
251
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights/prompts_keys_till_now.pt \
252
+ --data_dir CL_Benchmark \
253
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
254
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
255
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1687_sentiment140_classification \
256
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification \
257
+ --per_device_train_batch_size 16 \
258
+ --per_device_eval_batch_size 8 \
259
+ --gradient_accumulation_steps 2 \
260
+ --learning_rate 0.0003 \
261
+ --num_train_epochs 100 \
262
+ --run_name gen_script_superni_order2_t5_small_inflora \
263
+ --max_source_length 512 \
264
+ --max_target_length 50 \
265
+ --generation_max_length 50 \
266
+ --add_task_name False \
267
+ --add_dataset_name False \
268
+ --overwrite_output_dir \
269
+ --overwrite_cache \
270
+ --lr_scheduler_type constant \
271
+ --warmup_steps 0 \
272
+ --logging_strategy steps \
273
+ --logging_steps 10 \
274
+ --metric_for_best_model eval_rougeL_for_task1687_sentiment140_classification \
275
+ --evaluation_strategy steps \
276
+ --save_strategy steps \
277
+ --save_total_limit 1 \
278
+ --load_best_model_at_end \
279
+ --lora_r 4 \
280
+ --lora_alpha 32 \
281
+ --lora_dropout 0.0 \
282
+ --add_instruction_replay \
283
+ --data_replay_freq -1 \
284
+ --replay_after_n_epoch 0 \
285
+ --model_name inflora \
286
+ --threshold 0.995 \
287
+ --kl_ratio 0.5 \
288
+ --attn_temperature 1
289
+
290
+
291
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
292
+ --do_train \
293
+ --do_predict \
294
+ --predict_with_generate \
295
+ --model_name_or_path $2 \
296
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights/trans_input.pt \
297
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights \
298
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights/prompts_keys_till_now.pt \
299
+ --data_dir CL_Benchmark \
300
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
301
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
302
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task591_sciq_answer_generation \
303
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation \
304
+ --per_device_train_batch_size 16 \
305
+ --per_device_eval_batch_size 8 \
306
+ --gradient_accumulation_steps 2 \
307
+ --learning_rate 0.0003 \
308
+ --num_train_epochs 100 \
309
+ --run_name gen_script_superni_order2_t5_small_inflora \
310
+ --max_source_length 512 \
311
+ --max_target_length 50 \
312
+ --generation_max_length 50 \
313
+ --add_task_name False \
314
+ --add_dataset_name False \
315
+ --overwrite_output_dir \
316
+ --overwrite_cache \
317
+ --lr_scheduler_type constant \
318
+ --warmup_steps 0 \
319
+ --logging_strategy steps \
320
+ --logging_steps 10 \
321
+ --metric_for_best_model eval_rougeL_for_task591_sciq_answer_generation \
322
+ --evaluation_strategy steps \
323
+ --save_strategy steps \
324
+ --save_total_limit 1 \
325
+ --load_best_model_at_end \
326
+ --lora_r 4 \
327
+ --lora_alpha 32 \
328
+ --lora_dropout 0.0 \
329
+ --add_instruction_replay \
330
+ --data_replay_freq -1 \
331
+ --replay_after_n_epoch 0 \
332
+ --model_name inflora \
333
+ --threshold 0.995 \
334
+ --kl_ratio 0.5 \
335
+ --attn_temperature 1
336
+
337
+
338
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
339
+ --do_train \
340
+ --do_predict \
341
+ --predict_with_generate \
342
+ --model_name_or_path $2 \
343
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights/trans_input.pt \
344
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights \
345
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights/prompts_keys_till_now.pt \
346
+ --data_dir CL_Benchmark \
347
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
348
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
349
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task363_sst2_polarity_classification \
350
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification \
351
+ --per_device_train_batch_size 16 \
352
+ --per_device_eval_batch_size 8 \
353
+ --gradient_accumulation_steps 2 \
354
+ --learning_rate 0.0003 \
355
+ --num_train_epochs 100 \
356
+ --run_name gen_script_superni_order2_t5_small_inflora \
357
+ --max_source_length 512 \
358
+ --max_target_length 50 \
359
+ --generation_max_length 50 \
360
+ --add_task_name False \
361
+ --add_dataset_name False \
362
+ --overwrite_output_dir \
363
+ --overwrite_cache \
364
+ --lr_scheduler_type constant \
365
+ --warmup_steps 0 \
366
+ --logging_strategy steps \
367
+ --logging_steps 10 \
368
+ --metric_for_best_model eval_rougeL_for_task363_sst2_polarity_classification \
369
+ --evaluation_strategy steps \
370
+ --save_strategy steps \
371
+ --save_total_limit 1 \
372
+ --load_best_model_at_end \
373
+ --lora_r 4 \
374
+ --lora_alpha 32 \
375
+ --lora_dropout 0.0 \
376
+ --add_instruction_replay \
377
+ --data_replay_freq -1 \
378
+ --replay_after_n_epoch 0 \
379
+ --model_name inflora \
380
+ --threshold 0.995 \
381
+ --kl_ratio 0.5 \
382
+ --attn_temperature 1
383
+
384
+
385
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
386
+ --do_train \
387
+ --do_predict \
388
+ --predict_with_generate \
389
+ --model_name_or_path $2 \
390
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights/trans_input.pt \
391
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights \
392
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights/prompts_keys_till_now.pt \
393
+ --data_dir CL_Benchmark \
394
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
395
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
396
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1510_evalution_relation_extraction \
397
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction \
398
+ --per_device_train_batch_size 16 \
399
+ --per_device_eval_batch_size 8 \
400
+ --gradient_accumulation_steps 2 \
401
+ --learning_rate 0.0003 \
402
+ --num_train_epochs 100 \
403
+ --run_name gen_script_superni_order2_t5_small_inflora \
404
+ --max_source_length 512 \
405
+ --max_target_length 50 \
406
+ --generation_max_length 50 \
407
+ --add_task_name False \
408
+ --add_dataset_name False \
409
+ --overwrite_output_dir \
410
+ --overwrite_cache \
411
+ --lr_scheduler_type constant \
412
+ --warmup_steps 0 \
413
+ --logging_strategy steps \
414
+ --logging_steps 10 \
415
+ --metric_for_best_model eval_rougeL_for_task1510_evalution_relation_extraction \
416
+ --evaluation_strategy steps \
417
+ --save_strategy steps \
418
+ --save_total_limit 1 \
419
+ --load_best_model_at_end \
420
+ --lora_r 4 \
421
+ --lora_alpha 32 \
422
+ --lora_dropout 0.0 \
423
+ --add_instruction_replay \
424
+ --data_replay_freq -1 \
425
+ --replay_after_n_epoch 0 \
426
+ --model_name inflora \
427
+ --threshold 0.995 \
428
+ --kl_ratio 0.5 \
429
+ --attn_temperature 1
430
+
431
+
432
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
433
+ --do_train \
434
+ --do_predict \
435
+ --predict_with_generate \
436
+ --model_name_or_path $2 \
437
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights/trans_input.pt \
438
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights \
439
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights/prompts_keys_till_now.pt \
440
+ --data_dir CL_Benchmark \
441
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
442
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
443
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1729_personachat_generate_next \
444
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next \
445
+ --per_device_train_batch_size 16 \
446
+ --per_device_eval_batch_size 8 \
447
+ --gradient_accumulation_steps 2 \
448
+ --learning_rate 0.0003 \
449
+ --num_train_epochs 100 \
450
+ --run_name gen_script_superni_order2_t5_small_inflora \
451
+ --max_source_length 512 \
452
+ --max_target_length 50 \
453
+ --generation_max_length 50 \
454
+ --add_task_name False \
455
+ --add_dataset_name False \
456
+ --overwrite_output_dir \
457
+ --overwrite_cache \
458
+ --lr_scheduler_type constant \
459
+ --warmup_steps 0 \
460
+ --logging_strategy steps \
461
+ --logging_steps 10 \
462
+ --metric_for_best_model eval_rougeL_for_task1729_personachat_generate_next \
463
+ --evaluation_strategy steps \
464
+ --save_strategy steps \
465
+ --save_total_limit 1 \
466
+ --load_best_model_at_end \
467
+ --lora_r 4 \
468
+ --lora_alpha 32 \
469
+ --lora_dropout 0.0 \
470
+ --add_instruction_replay \
471
+ --data_replay_freq -1 \
472
+ --replay_after_n_epoch 0 \
473
+ --model_name inflora \
474
+ --threshold 0.995 \
475
+ --kl_ratio 0.5 \
476
+ --attn_temperature 1
477
+
478
+
479
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
480
+ --do_train \
481
+ --do_predict \
482
+ --predict_with_generate \
483
+ --model_name_or_path $2 \
484
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights/trans_input.pt \
485
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights \
486
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights/prompts_keys_till_now.pt \
487
+ --data_dir CL_Benchmark \
488
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
489
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
490
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task181_outcome_extraction \
491
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction \
492
+ --per_device_train_batch_size 16 \
493
+ --per_device_eval_batch_size 8 \
494
+ --gradient_accumulation_steps 2 \
495
+ --learning_rate 0.0003 \
496
+ --num_train_epochs 100 \
497
+ --run_name gen_script_superni_order2_t5_small_inflora \
498
+ --max_source_length 512 \
499
+ --max_target_length 50 \
500
+ --generation_max_length 50 \
501
+ --add_task_name False \
502
+ --add_dataset_name False \
503
+ --overwrite_output_dir \
504
+ --overwrite_cache \
505
+ --lr_scheduler_type constant \
506
+ --warmup_steps 0 \
507
+ --logging_strategy steps \
508
+ --logging_steps 10 \
509
+ --metric_for_best_model eval_rougeL_for_task181_outcome_extraction \
510
+ --evaluation_strategy steps \
511
+ --save_strategy steps \
512
+ --save_total_limit 1 \
513
+ --load_best_model_at_end \
514
+ --lora_r 4 \
515
+ --lora_alpha 32 \
516
+ --lora_dropout 0.0 \
517
+ --add_instruction_replay \
518
+ --data_replay_freq -1 \
519
+ --replay_after_n_epoch 0 \
520
+ --model_name inflora \
521
+ --threshold 0.995 \
522
+ --kl_ratio 0.5 \
523
+ --attn_temperature 1
524
+
525
+
526
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
527
+ --do_train \
528
+ --do_predict \
529
+ --predict_with_generate \
530
+ --model_name_or_path $2 \
531
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights/trans_input.pt \
532
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights \
533
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights/prompts_keys_till_now.pt \
534
+ --data_dir CL_Benchmark \
535
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
536
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
537
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task511_reddit_tifu_long_text_summarization \
538
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization \
539
+ --per_device_train_batch_size 16 \
540
+ --per_device_eval_batch_size 8 \
541
+ --gradient_accumulation_steps 2 \
542
+ --learning_rate 0.0003 \
543
+ --num_train_epochs 100 \
544
+ --run_name gen_script_superni_order2_t5_small_inflora \
545
+ --max_source_length 512 \
546
+ --max_target_length 50 \
547
+ --generation_max_length 50 \
548
+ --add_task_name False \
549
+ --add_dataset_name False \
550
+ --overwrite_output_dir \
551
+ --overwrite_cache \
552
+ --lr_scheduler_type constant \
553
+ --warmup_steps 0 \
554
+ --logging_strategy steps \
555
+ --logging_steps 10 \
556
+ --metric_for_best_model eval_rougeL_for_task511_reddit_tifu_long_text_summarization \
557
+ --evaluation_strategy steps \
558
+ --save_strategy steps \
559
+ --save_total_limit 1 \
560
+ --load_best_model_at_end \
561
+ --lora_r 4 \
562
+ --lora_alpha 32 \
563
+ --lora_dropout 0.0 \
564
+ --add_instruction_replay \
565
+ --data_replay_freq -1 \
566
+ --replay_after_n_epoch 0 \
567
+ --model_name inflora \
568
+ --threshold 0.995 \
569
+ --kl_ratio 0.5 \
570
+ --attn_temperature 1
571
+
572
+
573
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
574
+ --do_train \
575
+ --do_predict \
576
+ --predict_with_generate \
577
+ --model_name_or_path $2 \
578
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/trans_input.pt \
579
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights \
580
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/prompts_keys_till_now.pt \
581
+ --data_dir CL_Benchmark \
582
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
583
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
584
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task002_quoref_answer_generation \
585
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/13-task002_quoref_answer_generation \
586
+ --per_device_train_batch_size 16 \
587
+ --per_device_eval_batch_size 8 \
588
+ --gradient_accumulation_steps 2 \
589
+ --learning_rate 0.0003 \
590
+ --num_train_epochs 100 \
591
+ --run_name gen_script_superni_order2_t5_small_inflora \
592
+ --max_source_length 512 \
593
+ --max_target_length 50 \
594
+ --generation_max_length 50 \
595
+ --add_task_name False \
596
+ --add_dataset_name False \
597
+ --overwrite_output_dir \
598
+ --overwrite_cache \
599
+ --lr_scheduler_type constant \
600
+ --warmup_steps 0 \
601
+ --logging_strategy steps \
602
+ --logging_steps 10 \
603
+ --metric_for_best_model eval_rougeL_for_task002_quoref_answer_generation \
604
+ --evaluation_strategy steps \
605
+ --save_strategy steps \
606
+ --save_total_limit 1 \
607
+ --load_best_model_at_end \
608
+ --lora_r 4 \
609
+ --lora_alpha 32 \
610
+ --lora_dropout 0.0 \
611
+ --add_instruction_replay \
612
+ --data_replay_freq -1 \
613
+ --replay_after_n_epoch 0 \
614
+ --model_name inflora \
615
+ --threshold 0.995 \
616
+ --kl_ratio 0.5 \
617
+ --attn_temperature 1
618
+
619
+
620
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
621
+ --do_train \
622
+ --do_predict \
623
+ --predict_with_generate \
624
+ --model_name_or_path $2 \
625
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/13-task002_quoref_answer_generation/saved_weights/trans_input.pt \
626
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/13-task002_quoref_answer_generation/saved_weights \
627
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/13-task002_quoref_answer_generation/saved_weights/prompts_keys_till_now.pt \
628
+ --data_dir CL_Benchmark \
629
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
630
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
631
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1290_xsum_summarization \
632
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/14-task1290_xsum_summarization \
633
+ --per_device_train_batch_size 16 \
634
+ --per_device_eval_batch_size 8 \
635
+ --gradient_accumulation_steps 2 \
636
+ --learning_rate 0.0003 \
637
+ --num_train_epochs 100 \
638
+ --run_name gen_script_superni_order2_t5_small_inflora \
639
+ --max_source_length 512 \
640
+ --max_target_length 50 \
641
+ --generation_max_length 50 \
642
+ --add_task_name False \
643
+ --add_dataset_name False \
644
+ --overwrite_output_dir \
645
+ --overwrite_cache \
646
+ --lr_scheduler_type constant \
647
+ --warmup_steps 0 \
648
+ --logging_strategy steps \
649
+ --logging_steps 10 \
650
+ --metric_for_best_model eval_rougeL_for_task1290_xsum_summarization \
651
+ --evaluation_strategy steps \
652
+ --save_strategy steps \
653
+ --save_total_limit 1 \
654
+ --load_best_model_at_end \
655
+ --lora_r 4 \
656
+ --lora_alpha 32 \
657
+ --lora_dropout 0.0 \
658
+ --add_instruction_replay \
659
+ --data_replay_freq -1 \
660
+ --replay_after_n_epoch 0 \
661
+ --model_name inflora \
662
+ --threshold 0.995 \
663
+ --kl_ratio 0.5 \
664
+ --attn_temperature 1
665
+
666
+
667
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
668
+ --do_train \
669
+ --do_predict \
670
+ --predict_with_generate \
671
+ --model_name_or_path $2 \
672
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/14-task1290_xsum_summarization/saved_weights/trans_input.pt \
673
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/13-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/14-task1290_xsum_summarization/saved_weights \
674
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/14-task1290_xsum_summarization/saved_weights/prompts_keys_till_now.pt \
675
+ --data_dir CL_Benchmark \
676
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
677
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
678
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task875_emotion_classification \
679
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/15-task875_emotion_classification \
680
+ --per_device_train_batch_size 16 \
681
+ --per_device_eval_batch_size 8 \
682
+ --gradient_accumulation_steps 2 \
683
+ --learning_rate 0.0003 \
684
+ --num_train_epochs 100 \
685
+ --run_name gen_script_superni_order2_t5_small_inflora \
686
+ --max_source_length 512 \
687
+ --max_target_length 50 \
688
+ --generation_max_length 50 \
689
+ --add_task_name False \
690
+ --add_dataset_name False \
691
+ --overwrite_output_dir \
692
+ --overwrite_cache \
693
+ --lr_scheduler_type constant \
694
+ --warmup_steps 0 \
695
+ --logging_strategy steps \
696
+ --logging_steps 10 \
697
+ --metric_for_best_model eval_rougeL_for_task875_emotion_classification \
698
+ --evaluation_strategy steps \
699
+ --save_strategy steps \
700
+ --save_total_limit 1 \
701
+ --load_best_model_at_end \
702
+ --lora_r 4 \
703
+ --lora_alpha 32 \
704
+ --lora_dropout 0.0 \
705
+ --add_instruction_replay \
706
+ --data_replay_freq -1 \
707
+ --replay_after_n_epoch 0 \
708
+ --model_name inflora \
709
+ --threshold 0.995 \
710
+ --kl_ratio 0.5 \
711
+ --attn_temperature 1
712
+
713
+ python score.py gen_script_superni_order2_t5_small_inflora gen_script_superni_order2_t5_small_inflora
improve_gainlora/T5_small/gen_script_superni_order2_t5_small_specroute.sh ADDED
@@ -0,0 +1,804 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:2
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ # ============================================================
15
+ # Auto-detect GPU count and type for optimal parallelism
16
+ # ============================================================
17
+ NUM_GPUS=$(nvidia-smi -L 2>/dev/null | wc -l)
18
+ GPU_MEM=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits 2>/dev/null | head -1)
19
+
20
+ if [ -z "$GPU_MEM" ]; then
21
+ echo "ERROR: No GPU detected!"
22
+ exit 1
23
+ fi
24
+
25
+ # Determine GPU type
26
+ if [ "$GPU_MEM" -lt 20000 ]; then
27
+ IS_T4=1
28
+ echo "[GPU] Detected T4 GPUs (${GPU_MEM}MB VRAM each)"
29
+ else
30
+ IS_T4=0
31
+ echo "[GPU] Detected high-memory GPUs (${GPU_MEM}MB VRAM each)"
32
+ fi
33
+
34
+ # Determine parallelism strategy
35
+ if [ "$IS_T4" -eq 1 ] && [ "$NUM_GPUS" -ge 2 ]; then
36
+ GPU_MODE="t4_2gpu"
37
+ GPU_IDS="0,1"
38
+ FP16_FLAG=""
39
+ echo "[GPU] Strategy: 2x T4 DataParallel + fp32 + gradient_checkpointing"
40
+ elif [ "$IS_T4" -eq 1 ]; then
41
+ GPU_MODE="t4_1gpu"
42
+ GPU_IDS="${1:-0}"
43
+ FP16_FLAG=""
44
+ echo "[GPU] Strategy: 1x T4 + fp32 + gradient_checkpointing"
45
+ else
46
+ GPU_MODE="a100"
47
+ GPU_IDS="${1:-0}"
48
+ FP16_FLAG=""
49
+ echo "[GPU] Strategy: A100 (single GPU, fp32)"
50
+ fi
51
+
52
+ echo "[GPU] Using CUDA_VISIBLE_DEVICES=$GPU_IDS"
53
+ echo "============================================================"
54
+ echo ""
55
+
56
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
57
+ BSZ=8; GA=2; EVAL_BSZ=16
58
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
59
+ BSZ=16; GA=2; EVAL_BSZ=16
60
+ else
61
+ BSZ=32; GA=1; EVAL_BSZ=32
62
+ fi
63
+
64
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
65
+ --do_train \
66
+ --do_predict \
67
+ --predict_with_generate \
68
+ --model_name_or_path $2 \
69
+ --data_dir CL_Benchmark \
70
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
71
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task748_glucose_reverse_cause_event_detection \
72
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection \
73
+ --per_device_train_batch_size $BSZ \
74
+ --per_device_eval_batch_size $EVAL_BSZ \
75
+ --gradient_accumulation_steps $GA \
76
+ --learning_rate 0.0003 \
77
+ --num_train_epochs 100 \
78
+ --run_name gen_script_superni_order2_t5_small_specroute \
79
+ --max_source_length 512 \
80
+ --max_target_length 50 \
81
+ --generation_max_length 50 \
82
+ --add_task_name False \
83
+ --add_dataset_name False \
84
+ --overwrite_output_dir \
85
+ --overwrite_cache \
86
+ --lr_scheduler_type constant \
87
+ --warmup_steps 0 \
88
+ --logging_strategy steps \
89
+ --logging_steps 10 \
90
+ --metric_for_best_model eval_rougeL \
91
+ --evaluation_strategy steps \
92
+ --save_strategy steps \
93
+ --save_total_limit 1 \
94
+ --load_best_model_at_end \
95
+ --lora_r 4 \
96
+ --lora_alpha 32 \
97
+ --lora_dropout 0.0 \
98
+ --run_single True \
99
+ --data_replay_freq -1 \
100
+ --mlp_hidden_dim 100 \
101
+ --model_name specroute \
102
+ --threshold 0.995 \
103
+ --transthreshold 0.995 \
104
+ $FP16_FLAG
105
+
106
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
107
+ BSZ=8; GA=2; EVAL_BSZ=16
108
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
109
+ BSZ=16; GA=2; EVAL_BSZ=16
110
+ else
111
+ BSZ=32; GA=1; EVAL_BSZ=32
112
+ fi
113
+
114
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
115
+ --do_train \
116
+ --do_predict \
117
+ --predict_with_generate \
118
+ --model_name_or_path $2 \
119
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights \
120
+ --data_dir CL_Benchmark \
121
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
122
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task073_commonsenseqa_answer_generation \
123
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation \
124
+ --per_device_train_batch_size $BSZ \
125
+ --per_device_eval_batch_size $EVAL_BSZ \
126
+ --gradient_accumulation_steps $GA \
127
+ --learning_rate 0.0003 \
128
+ --num_train_epochs 100 \
129
+ --run_name gen_script_superni_order2_t5_small_specroute \
130
+ --max_source_length 512 \
131
+ --max_target_length 50 \
132
+ --generation_max_length 50 \
133
+ --add_task_name False \
134
+ --add_dataset_name False \
135
+ --overwrite_output_dir \
136
+ --overwrite_cache \
137
+ --lr_scheduler_type constant \
138
+ --warmup_steps 0 \
139
+ --logging_strategy steps \
140
+ --logging_steps 10 \
141
+ --metric_for_best_model eval_rougeL_for_task073_commonsenseqa_answer_generation \
142
+ --evaluation_strategy steps \
143
+ --save_strategy steps \
144
+ --save_total_limit 1 \
145
+ --load_best_model_at_end \
146
+ --lora_r 4 \
147
+ --lora_alpha 32 \
148
+ --lora_dropout 0.0 \
149
+ --data_replay_freq -1 \
150
+ --mlp_hidden_dim 100 \
151
+ --model_name specroute \
152
+ --threshold 0.995 \
153
+ --transthreshold 0.995 \
154
+ $FP16_FLAG
155
+
156
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
157
+ BSZ=8; GA=2; EVAL_BSZ=16
158
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
159
+ BSZ=16; GA=2; EVAL_BSZ=16
160
+ else
161
+ BSZ=32; GA=1; EVAL_BSZ=32
162
+ fi
163
+
164
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
165
+ --do_train \
166
+ --do_predict \
167
+ --predict_with_generate \
168
+ --model_name_or_path $2 \
169
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights \
170
+ --data_dir CL_Benchmark \
171
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
172
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task875_emotion_classification \
173
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification \
174
+ --per_device_train_batch_size $BSZ \
175
+ --per_device_eval_batch_size $EVAL_BSZ \
176
+ --gradient_accumulation_steps $GA \
177
+ --learning_rate 0.0003 \
178
+ --num_train_epochs 100 \
179
+ --run_name gen_script_superni_order2_t5_small_specroute \
180
+ --max_source_length 512 \
181
+ --max_target_length 50 \
182
+ --generation_max_length 50 \
183
+ --add_task_name False \
184
+ --add_dataset_name False \
185
+ --overwrite_output_dir \
186
+ --overwrite_cache \
187
+ --lr_scheduler_type constant \
188
+ --warmup_steps 0 \
189
+ --logging_strategy steps \
190
+ --logging_steps 10 \
191
+ --metric_for_best_model eval_rougeL_for_task875_emotion_classification \
192
+ --evaluation_strategy steps \
193
+ --save_strategy steps \
194
+ --save_total_limit 1 \
195
+ --load_best_model_at_end \
196
+ --lora_r 4 \
197
+ --lora_alpha 32 \
198
+ --lora_dropout 0.0 \
199
+ --data_replay_freq -1 \
200
+ --mlp_hidden_dim 100 \
201
+ --model_name specroute \
202
+ --threshold 0.995 \
203
+ --transthreshold 0.995 \
204
+ $FP16_FLAG
205
+
206
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
207
+ BSZ=8; GA=2; EVAL_BSZ=16
208
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
209
+ BSZ=16; GA=2; EVAL_BSZ=16
210
+ else
211
+ BSZ=32; GA=1; EVAL_BSZ=32
212
+ fi
213
+
214
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
215
+ --do_train \
216
+ --do_predict \
217
+ --predict_with_generate \
218
+ --model_name_or_path $2 \
219
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights \
220
+ --data_dir CL_Benchmark \
221
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
222
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task002_quoref_answer_generation \
223
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation \
224
+ --per_device_train_batch_size $BSZ \
225
+ --per_device_eval_batch_size $EVAL_BSZ \
226
+ --gradient_accumulation_steps $GA \
227
+ --learning_rate 0.0003 \
228
+ --num_train_epochs 100 \
229
+ --run_name gen_script_superni_order2_t5_small_specroute \
230
+ --max_source_length 512 \
231
+ --max_target_length 50 \
232
+ --generation_max_length 50 \
233
+ --add_task_name False \
234
+ --add_dataset_name False \
235
+ --overwrite_output_dir \
236
+ --overwrite_cache \
237
+ --lr_scheduler_type constant \
238
+ --warmup_steps 0 \
239
+ --logging_strategy steps \
240
+ --logging_steps 10 \
241
+ --metric_for_best_model eval_rougeL_for_task002_quoref_answer_generation \
242
+ --evaluation_strategy steps \
243
+ --save_strategy steps \
244
+ --save_total_limit 1 \
245
+ --load_best_model_at_end \
246
+ --lora_r 4 \
247
+ --lora_alpha 32 \
248
+ --lora_dropout 0.0 \
249
+ --data_replay_freq -1 \
250
+ --mlp_hidden_dim 100 \
251
+ --model_name specroute \
252
+ --threshold 0.995 \
253
+ --transthreshold 0.995 \
254
+ $FP16_FLAG
255
+
256
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
257
+ BSZ=8; GA=2; EVAL_BSZ=16
258
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
259
+ BSZ=16; GA=2; EVAL_BSZ=16
260
+ else
261
+ BSZ=32; GA=1; EVAL_BSZ=32
262
+ fi
263
+
264
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
265
+ --do_train \
266
+ --do_predict \
267
+ --predict_with_generate \
268
+ --model_name_or_path $2 \
269
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation/saved_weights \
270
+ --data_dir CL_Benchmark \
271
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
272
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1687_sentiment140_classification \
273
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/5-task1687_sentiment140_classification \
274
+ --per_device_train_batch_size $BSZ \
275
+ --per_device_eval_batch_size $EVAL_BSZ \
276
+ --gradient_accumulation_steps $GA \
277
+ --learning_rate 0.0003 \
278
+ --num_train_epochs 100 \
279
+ --run_name gen_script_superni_order2_t5_small_specroute \
280
+ --max_source_length 512 \
281
+ --max_target_length 50 \
282
+ --generation_max_length 50 \
283
+ --add_task_name False \
284
+ --add_dataset_name False \
285
+ --overwrite_output_dir \
286
+ --overwrite_cache \
287
+ --lr_scheduler_type constant \
288
+ --warmup_steps 0 \
289
+ --logging_strategy steps \
290
+ --logging_steps 10 \
291
+ --metric_for_best_model eval_rougeL_for_task1687_sentiment140_classification \
292
+ --evaluation_strategy steps \
293
+ --save_strategy steps \
294
+ --save_total_limit 1 \
295
+ --load_best_model_at_end \
296
+ --lora_r 4 \
297
+ --lora_alpha 32 \
298
+ --lora_dropout 0.0 \
299
+ --data_replay_freq -1 \
300
+ --mlp_hidden_dim 100 \
301
+ --model_name specroute \
302
+ --threshold 0.995 \
303
+ --transthreshold 0.995 \
304
+ $FP16_FLAG
305
+
306
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
307
+ BSZ=8; GA=2; EVAL_BSZ=16
308
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
309
+ BSZ=16; GA=2; EVAL_BSZ=16
310
+ else
311
+ BSZ=32; GA=1; EVAL_BSZ=32
312
+ fi
313
+
314
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
315
+ --do_train \
316
+ --do_predict \
317
+ --predict_with_generate \
318
+ --model_name_or_path $2 \
319
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/5-task1687_sentiment140_classification/saved_weights \
320
+ --data_dir CL_Benchmark \
321
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
322
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task591_sciq_answer_generation \
323
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/6-task591_sciq_answer_generation \
324
+ --per_device_train_batch_size $BSZ \
325
+ --per_device_eval_batch_size $EVAL_BSZ \
326
+ --gradient_accumulation_steps $GA \
327
+ --learning_rate 0.0003 \
328
+ --num_train_epochs 100 \
329
+ --run_name gen_script_superni_order2_t5_small_specroute \
330
+ --max_source_length 512 \
331
+ --max_target_length 50 \
332
+ --generation_max_length 50 \
333
+ --add_task_name False \
334
+ --add_dataset_name False \
335
+ --overwrite_output_dir \
336
+ --overwrite_cache \
337
+ --lr_scheduler_type constant \
338
+ --warmup_steps 0 \
339
+ --logging_strategy steps \
340
+ --logging_steps 10 \
341
+ --metric_for_best_model eval_rougeL_for_task591_sciq_answer_generation \
342
+ --evaluation_strategy steps \
343
+ --save_strategy steps \
344
+ --save_total_limit 1 \
345
+ --load_best_model_at_end \
346
+ --lora_r 4 \
347
+ --lora_alpha 32 \
348
+ --lora_dropout 0.0 \
349
+ --data_replay_freq -1 \
350
+ --mlp_hidden_dim 100 \
351
+ --model_name specroute \
352
+ --threshold 0.995 \
353
+ --transthreshold 0.995 \
354
+ $FP16_FLAG
355
+
356
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
357
+ BSZ=8; GA=2; EVAL_BSZ=16
358
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
359
+ BSZ=16; GA=2; EVAL_BSZ=16
360
+ else
361
+ BSZ=32; GA=1; EVAL_BSZ=32
362
+ fi
363
+
364
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
365
+ --do_train \
366
+ --do_predict \
367
+ --predict_with_generate \
368
+ --model_name_or_path $2 \
369
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/5-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/6-task591_sciq_answer_generation/saved_weights \
370
+ --data_dir CL_Benchmark \
371
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
372
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task363_sst2_polarity_classification \
373
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/7-task363_sst2_polarity_classification \
374
+ --per_device_train_batch_size $BSZ \
375
+ --per_device_eval_batch_size $EVAL_BSZ \
376
+ --gradient_accumulation_steps $GA \
377
+ --learning_rate 0.0003 \
378
+ --num_train_epochs 100 \
379
+ --run_name gen_script_superni_order2_t5_small_specroute \
380
+ --max_source_length 512 \
381
+ --max_target_length 50 \
382
+ --generation_max_length 50 \
383
+ --add_task_name False \
384
+ --add_dataset_name False \
385
+ --overwrite_output_dir \
386
+ --overwrite_cache \
387
+ --lr_scheduler_type constant \
388
+ --warmup_steps 0 \
389
+ --logging_strategy steps \
390
+ --logging_steps 10 \
391
+ --metric_for_best_model eval_rougeL_for_task363_sst2_polarity_classification \
392
+ --evaluation_strategy steps \
393
+ --save_strategy steps \
394
+ --save_total_limit 1 \
395
+ --load_best_model_at_end \
396
+ --lora_r 4 \
397
+ --lora_alpha 32 \
398
+ --lora_dropout 0.0 \
399
+ --data_replay_freq -1 \
400
+ --mlp_hidden_dim 100 \
401
+ --model_name specroute \
402
+ --threshold 0.995 \
403
+ --transthreshold 0.995 \
404
+ $FP16_FLAG
405
+
406
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
407
+ BSZ=8; GA=2; EVAL_BSZ=16
408
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
409
+ BSZ=16; GA=2; EVAL_BSZ=16
410
+ else
411
+ BSZ=32; GA=1; EVAL_BSZ=32
412
+ fi
413
+
414
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
415
+ --do_train \
416
+ --do_predict \
417
+ --predict_with_generate \
418
+ --model_name_or_path $2 \
419
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/5-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/6-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/7-task363_sst2_polarity_classification/saved_weights \
420
+ --data_dir CL_Benchmark \
421
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
422
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1572_samsum_summary \
423
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/8-task1572_samsum_summary \
424
+ --per_device_train_batch_size $BSZ \
425
+ --per_device_eval_batch_size $EVAL_BSZ \
426
+ --gradient_accumulation_steps $GA \
427
+ --learning_rate 0.0003 \
428
+ --num_train_epochs 100 \
429
+ --run_name gen_script_superni_order2_t5_small_specroute \
430
+ --max_source_length 512 \
431
+ --max_target_length 50 \
432
+ --generation_max_length 50 \
433
+ --add_task_name False \
434
+ --add_dataset_name False \
435
+ --overwrite_output_dir \
436
+ --overwrite_cache \
437
+ --lr_scheduler_type constant \
438
+ --warmup_steps 0 \
439
+ --logging_strategy steps \
440
+ --logging_steps 10 \
441
+ --metric_for_best_model eval_rougeL_for_task1572_samsum_summary \
442
+ --evaluation_strategy steps \
443
+ --save_strategy steps \
444
+ --save_total_limit 1 \
445
+ --load_best_model_at_end \
446
+ --lora_r 4 \
447
+ --lora_alpha 32 \
448
+ --lora_dropout 0.0 \
449
+ --data_replay_freq -1 \
450
+ --mlp_hidden_dim 100 \
451
+ --model_name specroute \
452
+ --threshold 0.995 \
453
+ --transthreshold 0.995 \
454
+ $FP16_FLAG
455
+
456
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
457
+ BSZ=8; GA=2; EVAL_BSZ=16
458
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
459
+ BSZ=16; GA=2; EVAL_BSZ=16
460
+ else
461
+ BSZ=32; GA=1; EVAL_BSZ=32
462
+ fi
463
+
464
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
465
+ --do_train \
466
+ --do_predict \
467
+ --predict_with_generate \
468
+ --model_name_or_path $2 \
469
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/5-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/6-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/7-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/8-task1572_samsum_summary/saved_weights \
470
+ --data_dir CL_Benchmark \
471
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
472
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task511_reddit_tifu_long_text_summarization \
473
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/9-task511_reddit_tifu_long_text_summarization \
474
+ --per_device_train_batch_size $BSZ \
475
+ --per_device_eval_batch_size $EVAL_BSZ \
476
+ --gradient_accumulation_steps $GA \
477
+ --learning_rate 0.0003 \
478
+ --num_train_epochs 100 \
479
+ --run_name gen_script_superni_order2_t5_small_specroute \
480
+ --max_source_length 512 \
481
+ --max_target_length 50 \
482
+ --generation_max_length 50 \
483
+ --add_task_name False \
484
+ --add_dataset_name False \
485
+ --overwrite_output_dir \
486
+ --overwrite_cache \
487
+ --lr_scheduler_type constant \
488
+ --warmup_steps 0 \
489
+ --logging_strategy steps \
490
+ --logging_steps 10 \
491
+ --metric_for_best_model eval_rougeL_for_task511_reddit_tifu_long_text_summarization \
492
+ --evaluation_strategy steps \
493
+ --save_strategy steps \
494
+ --save_total_limit 1 \
495
+ --load_best_model_at_end \
496
+ --lora_r 4 \
497
+ --lora_alpha 32 \
498
+ --lora_dropout 0.0 \
499
+ --data_replay_freq -1 \
500
+ --mlp_hidden_dim 100 \
501
+ --model_name specroute \
502
+ --threshold 0.995 \
503
+ --transthreshold 0.995 \
504
+ $FP16_FLAG
505
+
506
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
507
+ BSZ=8; GA=2; EVAL_BSZ=16
508
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
509
+ BSZ=16; GA=2; EVAL_BSZ=16
510
+ else
511
+ BSZ=32; GA=1; EVAL_BSZ=32
512
+ fi
513
+
514
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
515
+ --do_train \
516
+ --do_predict \
517
+ --predict_with_generate \
518
+ --model_name_or_path $2 \
519
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/5-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/6-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/7-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/8-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/9-task511_reddit_tifu_long_text_summarization/saved_weights \
520
+ --data_dir CL_Benchmark \
521
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
522
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1290_xsum_summarization \
523
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/10-task1290_xsum_summarization \
524
+ --per_device_train_batch_size $BSZ \
525
+ --per_device_eval_batch_size $EVAL_BSZ \
526
+ --gradient_accumulation_steps $GA \
527
+ --learning_rate 0.0003 \
528
+ --num_train_epochs 100 \
529
+ --run_name gen_script_superni_order2_t5_small_specroute \
530
+ --max_source_length 512 \
531
+ --max_target_length 50 \
532
+ --generation_max_length 50 \
533
+ --add_task_name False \
534
+ --add_dataset_name False \
535
+ --overwrite_output_dir \
536
+ --overwrite_cache \
537
+ --lr_scheduler_type constant \
538
+ --warmup_steps 0 \
539
+ --logging_strategy steps \
540
+ --logging_steps 10 \
541
+ --metric_for_best_model eval_rougeL_for_task1290_xsum_summarization \
542
+ --evaluation_strategy steps \
543
+ --save_strategy steps \
544
+ --save_total_limit 1 \
545
+ --load_best_model_at_end \
546
+ --lora_r 4 \
547
+ --lora_alpha 32 \
548
+ --lora_dropout 0.0 \
549
+ --data_replay_freq -1 \
550
+ --mlp_hidden_dim 100 \
551
+ --model_name specroute \
552
+ --threshold 0.995 \
553
+ --transthreshold 0.995 \
554
+ $FP16_FLAG
555
+
556
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
557
+ BSZ=8; GA=2; EVAL_BSZ=16
558
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
559
+ BSZ=16; GA=2; EVAL_BSZ=16
560
+ else
561
+ BSZ=32; GA=1; EVAL_BSZ=32
562
+ fi
563
+
564
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
565
+ --do_train \
566
+ --do_predict \
567
+ --predict_with_generate \
568
+ --model_name_or_path $2 \
569
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/5-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/6-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/7-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/8-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/9-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/10-task1290_xsum_summarization/saved_weights \
570
+ --data_dir CL_Benchmark \
571
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
572
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task639_multi_woz_user_utterance_generation \
573
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/11-task639_multi_woz_user_utterance_generation \
574
+ --per_device_train_batch_size $BSZ \
575
+ --per_device_eval_batch_size $EVAL_BSZ \
576
+ --gradient_accumulation_steps $GA \
577
+ --learning_rate 0.0003 \
578
+ --num_train_epochs 100 \
579
+ --run_name gen_script_superni_order2_t5_small_specroute \
580
+ --max_source_length 512 \
581
+ --max_target_length 50 \
582
+ --generation_max_length 50 \
583
+ --add_task_name False \
584
+ --add_dataset_name False \
585
+ --overwrite_output_dir \
586
+ --overwrite_cache \
587
+ --lr_scheduler_type constant \
588
+ --warmup_steps 0 \
589
+ --logging_strategy steps \
590
+ --logging_steps 10 \
591
+ --metric_for_best_model eval_rougeL_for_task639_multi_woz_user_utterance_generation \
592
+ --evaluation_strategy steps \
593
+ --save_strategy steps \
594
+ --save_total_limit 1 \
595
+ --load_best_model_at_end \
596
+ --lora_r 4 \
597
+ --lora_alpha 32 \
598
+ --lora_dropout 0.0 \
599
+ --data_replay_freq -1 \
600
+ --mlp_hidden_dim 100 \
601
+ --model_name specroute \
602
+ --threshold 0.995 \
603
+ --transthreshold 0.995 \
604
+ $FP16_FLAG
605
+
606
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
607
+ BSZ=8; GA=2; EVAL_BSZ=16
608
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
609
+ BSZ=16; GA=2; EVAL_BSZ=16
610
+ else
611
+ BSZ=32; GA=1; EVAL_BSZ=32
612
+ fi
613
+
614
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
615
+ --do_train \
616
+ --do_predict \
617
+ --predict_with_generate \
618
+ --model_name_or_path $2 \
619
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/5-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/6-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/7-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/8-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/9-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/10-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/11-task639_multi_woz_user_utterance_generation/saved_weights \
620
+ --data_dir CL_Benchmark \
621
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
622
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1510_evalution_relation_extraction \
623
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/12-task1510_evalution_relation_extraction \
624
+ --per_device_train_batch_size $BSZ \
625
+ --per_device_eval_batch_size $EVAL_BSZ \
626
+ --gradient_accumulation_steps $GA \
627
+ --learning_rate 0.0003 \
628
+ --num_train_epochs 100 \
629
+ --run_name gen_script_superni_order2_t5_small_specroute \
630
+ --max_source_length 512 \
631
+ --max_target_length 50 \
632
+ --generation_max_length 50 \
633
+ --add_task_name False \
634
+ --add_dataset_name False \
635
+ --overwrite_output_dir \
636
+ --overwrite_cache \
637
+ --lr_scheduler_type constant \
638
+ --warmup_steps 0 \
639
+ --logging_strategy steps \
640
+ --logging_steps 10 \
641
+ --metric_for_best_model eval_rougeL_for_task1510_evalution_relation_extraction \
642
+ --evaluation_strategy steps \
643
+ --save_strategy steps \
644
+ --save_total_limit 1 \
645
+ --load_best_model_at_end \
646
+ --lora_r 4 \
647
+ --lora_alpha 32 \
648
+ --lora_dropout 0.0 \
649
+ --data_replay_freq -1 \
650
+ --mlp_hidden_dim 100 \
651
+ --model_name specroute \
652
+ --threshold 0.995 \
653
+ --transthreshold 0.995 \
654
+ $FP16_FLAG
655
+
656
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
657
+ BSZ=8; GA=2; EVAL_BSZ=16
658
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
659
+ BSZ=16; GA=2; EVAL_BSZ=16
660
+ else
661
+ BSZ=32; GA=1; EVAL_BSZ=32
662
+ fi
663
+
664
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
665
+ --do_train \
666
+ --do_predict \
667
+ --predict_with_generate \
668
+ --model_name_or_path $2 \
669
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/5-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/6-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/7-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/8-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/9-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/10-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/11-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/12-task1510_evalution_relation_extraction/saved_weights \
670
+ --data_dir CL_Benchmark \
671
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
672
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task181_outcome_extraction \
673
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/13-task181_outcome_extraction \
674
+ --per_device_train_batch_size $BSZ \
675
+ --per_device_eval_batch_size $EVAL_BSZ \
676
+ --gradient_accumulation_steps $GA \
677
+ --learning_rate 0.0003 \
678
+ --num_train_epochs 100 \
679
+ --run_name gen_script_superni_order2_t5_small_specroute \
680
+ --max_source_length 512 \
681
+ --max_target_length 50 \
682
+ --generation_max_length 50 \
683
+ --add_task_name False \
684
+ --add_dataset_name False \
685
+ --overwrite_output_dir \
686
+ --overwrite_cache \
687
+ --lr_scheduler_type constant \
688
+ --warmup_steps 0 \
689
+ --logging_strategy steps \
690
+ --logging_steps 10 \
691
+ --metric_for_best_model eval_rougeL_for_task181_outcome_extraction \
692
+ --evaluation_strategy steps \
693
+ --save_strategy steps \
694
+ --save_total_limit 1 \
695
+ --load_best_model_at_end \
696
+ --lora_r 4 \
697
+ --lora_alpha 32 \
698
+ --lora_dropout 0.0 \
699
+ --data_replay_freq -1 \
700
+ --mlp_hidden_dim 100 \
701
+ --model_name specroute \
702
+ --threshold 0.995 \
703
+ --transthreshold 0.995 \
704
+ $FP16_FLAG
705
+
706
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
707
+ BSZ=8; GA=2; EVAL_BSZ=16
708
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
709
+ BSZ=16; GA=2; EVAL_BSZ=16
710
+ else
711
+ BSZ=32; GA=1; EVAL_BSZ=32
712
+ fi
713
+
714
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
715
+ --do_train \
716
+ --do_predict \
717
+ --predict_with_generate \
718
+ --model_name_or_path $2 \
719
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/5-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/6-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/7-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/8-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/9-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/10-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/11-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/12-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/13-task181_outcome_extraction/saved_weights \
720
+ --data_dir CL_Benchmark \
721
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
722
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1729_personachat_generate_next \
723
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/14-task1729_personachat_generate_next \
724
+ --per_device_train_batch_size $BSZ \
725
+ --per_device_eval_batch_size $EVAL_BSZ \
726
+ --gradient_accumulation_steps $GA \
727
+ --learning_rate 0.0003 \
728
+ --num_train_epochs 100 \
729
+ --run_name gen_script_superni_order2_t5_small_specroute \
730
+ --max_source_length 512 \
731
+ --max_target_length 50 \
732
+ --generation_max_length 50 \
733
+ --add_task_name False \
734
+ --add_dataset_name False \
735
+ --overwrite_output_dir \
736
+ --overwrite_cache \
737
+ --lr_scheduler_type constant \
738
+ --warmup_steps 0 \
739
+ --logging_strategy steps \
740
+ --logging_steps 10 \
741
+ --metric_for_best_model eval_rougeL_for_task1729_personachat_generate_next \
742
+ --evaluation_strategy steps \
743
+ --save_strategy steps \
744
+ --save_total_limit 1 \
745
+ --load_best_model_at_end \
746
+ --lora_r 4 \
747
+ --lora_alpha 32 \
748
+ --lora_dropout 0.0 \
749
+ --data_replay_freq -1 \
750
+ --mlp_hidden_dim 100 \
751
+ --model_name specroute \
752
+ --threshold 0.995 \
753
+ --transthreshold 0.995 \
754
+ $FP16_FLAG
755
+
756
+ if [ "$GPU_MODE" = "t4_2gpu" ]; then
757
+ BSZ=8; GA=2; EVAL_BSZ=16
758
+ elif [ "$GPU_MODE" = "t4_1gpu" ]; then
759
+ BSZ=16; GA=2; EVAL_BSZ=16
760
+ else
761
+ BSZ=32; GA=1; EVAL_BSZ=32
762
+ fi
763
+
764
+ CUDA_VISIBLE_DEVICES=$GPU_IDS python src/run_t5.py \
765
+ --do_train \
766
+ --do_predict \
767
+ --predict_with_generate \
768
+ --model_name_or_path $2 \
769
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/3-task875_emotion_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/4-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/5-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/6-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/7-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/8-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/9-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/10-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/11-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/12-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/13-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/14-task1729_personachat_generate_next/saved_weights \
770
+ --data_dir CL_Benchmark \
771
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task875_emotion_classification,task002_quoref_answer_generation,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1572_samsum_summary,task511_reddit_tifu_long_text_summarization,task1290_xsum_summarization,task639_multi_woz_user_utterance_generation,task1510_evalution_relation_extraction,task181_outcome_extraction,task1729_personachat_generate_next,task1590_diplomacy_text_generation \
772
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1590_diplomacy_text_generation \
773
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_specroute/outputs/15-task1590_diplomacy_text_generation \
774
+ --per_device_train_batch_size $BSZ \
775
+ --per_device_eval_batch_size $EVAL_BSZ \
776
+ --gradient_accumulation_steps $GA \
777
+ --learning_rate 0.0003 \
778
+ --num_train_epochs 100 \
779
+ --run_name gen_script_superni_order2_t5_small_specroute \
780
+ --max_source_length 512 \
781
+ --max_target_length 50 \
782
+ --generation_max_length 50 \
783
+ --add_task_name False \
784
+ --add_dataset_name False \
785
+ --overwrite_output_dir \
786
+ --overwrite_cache \
787
+ --lr_scheduler_type constant \
788
+ --warmup_steps 0 \
789
+ --logging_strategy steps \
790
+ --logging_steps 10 \
791
+ --metric_for_best_model eval_rougeL_for_task1590_diplomacy_text_generation \
792
+ --evaluation_strategy steps \
793
+ --save_strategy steps \
794
+ --save_total_limit 1 \
795
+ --load_best_model_at_end \
796
+ --lora_r 4 \
797
+ --lora_alpha 32 \
798
+ --lora_dropout 0.0 \
799
+ --data_replay_freq -1 \
800
+ --mlp_hidden_dim 100 \
801
+ --model_name specroute \
802
+ --threshold 0.995 \
803
+ --transthreshold 0.995 \
804
+ $FP16_FLAG
root_gainlora/T5_small/gen_script_long_order3_t5_small_gainlora_inflora.sh ADDED
@@ -0,0 +1,763 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --predict_with_generate \
17
+ --model_name_or_path $2 \
18
+ --data_dir CL_Benchmark \
19
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
20
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/yelp \
21
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp \
22
+ --per_device_train_batch_size 32 \
23
+ --per_device_eval_batch_size 256 \
24
+ --gradient_accumulation_steps 1 \
25
+ --learning_rate 0.0003 \
26
+ --num_train_epochs 10 \
27
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
28
+ --max_source_length 512 \
29
+ --max_target_length 50 \
30
+ --generation_max_length 50 \
31
+ --add_task_name False \
32
+ --add_dataset_name False \
33
+ --overwrite_output_dir \
34
+ --overwrite_cache \
35
+ --lr_scheduler_type constant \
36
+ --warmup_steps 0 \
37
+ --logging_strategy steps \
38
+ --logging_steps 10 \
39
+ --metric_for_best_model eval_exact_match \
40
+ --evaluation_strategy steps \
41
+ --save_strategy steps \
42
+ --save_total_limit 1 \
43
+ --load_best_model_at_end \
44
+ --lora_r 8 \
45
+ --lora_alpha 32 \
46
+ --lora_dropout 0.0 \
47
+ --add_instruction_replay \
48
+ --data_replay_freq -1 \
49
+ --replay_after_n_epoch 0 \
50
+ --mlp_hidden_dim 100 \
51
+ --model_name gainlora_inflora \
52
+ --threshold 0.995 \
53
+ --transthreshold 0.995
54
+
55
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/checkpoint*
56
+
57
+ sleep 5
58
+
59
+
60
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
61
+ --do_train \
62
+ --predict_with_generate \
63
+ --model_name_or_path $2 \
64
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights/trans_input.pt \
65
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights \
66
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights/prompts_keys_till_now.pt \
67
+ --data_dir CL_Benchmark \
68
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
69
+ --gen_data_dir generated_data/lora_gen_long_t5 \
70
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/amazon \
71
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon \
72
+ --per_device_train_batch_size 32 \
73
+ --per_device_eval_batch_size 256 \
74
+ --gradient_accumulation_steps 1 \
75
+ --learning_rate 0.0003 \
76
+ --num_train_epochs 10 \
77
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
78
+ --max_source_length 512 \
79
+ --max_target_length 50 \
80
+ --generation_max_length 50 \
81
+ --add_task_name False \
82
+ --add_dataset_name False \
83
+ --overwrite_output_dir \
84
+ --overwrite_cache \
85
+ --lr_scheduler_type constant \
86
+ --warmup_steps 0 \
87
+ --logging_strategy steps \
88
+ --logging_steps 10 \
89
+ --metric_for_best_model eval_exact_match_for_amazon \
90
+ --evaluation_strategy steps \
91
+ --save_strategy steps \
92
+ --save_total_limit 1 \
93
+ --load_best_model_at_end \
94
+ --lora_r 8 \
95
+ --lora_alpha 32 \
96
+ --lora_dropout 0.0 \
97
+ --data_replay_freq -1 \
98
+ --kl_ratio 0.1 \
99
+ --attn_temperature 1 \
100
+ --mlp_hidden_dim 100 \
101
+ --model_name gainlora_inflora \
102
+ --threshold 0.995 \
103
+ --transthreshold 0.995
104
+
105
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/checkpoint*
106
+
107
+ sleep 5
108
+
109
+
110
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
111
+ --do_train \
112
+ --predict_with_generate \
113
+ --model_name_or_path $2 \
114
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights/trans_input.pt \
115
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights \
116
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights/prompts_keys_till_now.pt \
117
+ --data_dir CL_Benchmark \
118
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
119
+ --gen_data_dir generated_data/lora_gen_long_t5 \
120
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/mnli \
121
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli \
122
+ --per_device_train_batch_size 32 \
123
+ --per_device_eval_batch_size 256 \
124
+ --gradient_accumulation_steps 1 \
125
+ --learning_rate 0.0003 \
126
+ --num_train_epochs 10 \
127
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
128
+ --max_source_length 512 \
129
+ --max_target_length 50 \
130
+ --generation_max_length 50 \
131
+ --add_task_name False \
132
+ --add_dataset_name False \
133
+ --overwrite_output_dir \
134
+ --overwrite_cache \
135
+ --lr_scheduler_type constant \
136
+ --warmup_steps 0 \
137
+ --logging_strategy steps \
138
+ --logging_steps 10 \
139
+ --metric_for_best_model eval_exact_match_for_mnli \
140
+ --evaluation_strategy steps \
141
+ --save_strategy steps \
142
+ --save_total_limit 1 \
143
+ --load_best_model_at_end \
144
+ --lora_r 8 \
145
+ --lora_alpha 32 \
146
+ --lora_dropout 0.0 \
147
+ --data_replay_freq -1 \
148
+ --kl_ratio 0.1 \
149
+ --attn_temperature 1 \
150
+ --mlp_hidden_dim 100 \
151
+ --model_name gainlora_inflora \
152
+ --threshold 0.995 \
153
+ --transthreshold 0.995
154
+
155
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/checkpoint*
156
+
157
+ sleep 5
158
+
159
+
160
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
161
+ --do_train \
162
+ --predict_with_generate \
163
+ --model_name_or_path $2 \
164
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights/trans_input.pt \
165
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights \
166
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights/prompts_keys_till_now.pt \
167
+ --data_dir CL_Benchmark \
168
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
169
+ --gen_data_dir generated_data/lora_gen_long_t5 \
170
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/cb \
171
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb \
172
+ --per_device_train_batch_size 32 \
173
+ --per_device_eval_batch_size 256 \
174
+ --gradient_accumulation_steps 1 \
175
+ --learning_rate 0.0003 \
176
+ --num_train_epochs 10 \
177
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
178
+ --max_source_length 512 \
179
+ --max_target_length 50 \
180
+ --generation_max_length 50 \
181
+ --add_task_name False \
182
+ --add_dataset_name False \
183
+ --overwrite_output_dir \
184
+ --overwrite_cache \
185
+ --lr_scheduler_type constant \
186
+ --warmup_steps 0 \
187
+ --logging_strategy steps \
188
+ --logging_steps 10 \
189
+ --metric_for_best_model eval_exact_match_for_cb \
190
+ --evaluation_strategy steps \
191
+ --save_strategy steps \
192
+ --save_total_limit 1 \
193
+ --load_best_model_at_end \
194
+ --lora_r 8 \
195
+ --lora_alpha 32 \
196
+ --lora_dropout 0.0 \
197
+ --data_replay_freq -1 \
198
+ --kl_ratio 0.1 \
199
+ --attn_temperature 1 \
200
+ --mlp_hidden_dim 100 \
201
+ --model_name gainlora_inflora \
202
+ --threshold 0.995 \
203
+ --transthreshold 0.995
204
+
205
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/checkpoint*
206
+
207
+ sleep 5
208
+
209
+
210
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
211
+ --do_train \
212
+ --predict_with_generate \
213
+ --model_name_or_path $2 \
214
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights/trans_input.pt \
215
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights \
216
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights/prompts_keys_till_now.pt \
217
+ --data_dir CL_Benchmark \
218
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
219
+ --gen_data_dir generated_data/lora_gen_long_t5 \
220
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/copa \
221
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa \
222
+ --per_device_train_batch_size 32 \
223
+ --per_device_eval_batch_size 256 \
224
+ --gradient_accumulation_steps 1 \
225
+ --learning_rate 0.0003 \
226
+ --num_train_epochs 10 \
227
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
228
+ --max_source_length 512 \
229
+ --max_target_length 50 \
230
+ --generation_max_length 50 \
231
+ --add_task_name False \
232
+ --add_dataset_name False \
233
+ --overwrite_output_dir \
234
+ --overwrite_cache \
235
+ --lr_scheduler_type constant \
236
+ --warmup_steps 0 \
237
+ --logging_strategy steps \
238
+ --logging_steps 10 \
239
+ --metric_for_best_model eval_exact_match_for_copa \
240
+ --evaluation_strategy steps \
241
+ --save_strategy steps \
242
+ --save_total_limit 1 \
243
+ --load_best_model_at_end \
244
+ --lora_r 8 \
245
+ --lora_alpha 32 \
246
+ --lora_dropout 0.0 \
247
+ --data_replay_freq -1 \
248
+ --kl_ratio 0.1 \
249
+ --attn_temperature 1 \
250
+ --mlp_hidden_dim 100 \
251
+ --model_name gainlora_inflora \
252
+ --threshold 0.995 \
253
+ --transthreshold 0.995
254
+
255
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/checkpoint*
256
+
257
+ sleep 5
258
+
259
+
260
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
261
+ --do_train \
262
+ --predict_with_generate \
263
+ --model_name_or_path $2 \
264
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights/trans_input.pt \
265
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights \
266
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights/prompts_keys_till_now.pt \
267
+ --data_dir CL_Benchmark \
268
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
269
+ --gen_data_dir generated_data/lora_gen_long_t5 \
270
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/qqp \
271
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp \
272
+ --per_device_train_batch_size 32 \
273
+ --per_device_eval_batch_size 256 \
274
+ --gradient_accumulation_steps 1 \
275
+ --learning_rate 0.0003 \
276
+ --num_train_epochs 10 \
277
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
278
+ --max_source_length 512 \
279
+ --max_target_length 50 \
280
+ --generation_max_length 50 \
281
+ --add_task_name False \
282
+ --add_dataset_name False \
283
+ --overwrite_output_dir \
284
+ --overwrite_cache \
285
+ --lr_scheduler_type constant \
286
+ --warmup_steps 0 \
287
+ --logging_strategy steps \
288
+ --logging_steps 10 \
289
+ --metric_for_best_model eval_exact_match_for_qqp \
290
+ --evaluation_strategy steps \
291
+ --save_strategy steps \
292
+ --save_total_limit 1 \
293
+ --load_best_model_at_end \
294
+ --lora_r 8 \
295
+ --lora_alpha 32 \
296
+ --lora_dropout 0.0 \
297
+ --data_replay_freq -1 \
298
+ --kl_ratio 0.1 \
299
+ --attn_temperature 1 \
300
+ --mlp_hidden_dim 100 \
301
+ --model_name gainlora_inflora \
302
+ --threshold 0.995 \
303
+ --transthreshold 0.995
304
+
305
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/checkpoint*
306
+
307
+ sleep 5
308
+
309
+
310
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
311
+ --do_train \
312
+ --predict_with_generate \
313
+ --model_name_or_path $2 \
314
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights/trans_input.pt \
315
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights \
316
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights/prompts_keys_till_now.pt \
317
+ --data_dir CL_Benchmark \
318
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
319
+ --gen_data_dir generated_data/lora_gen_long_t5 \
320
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/rte \
321
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte \
322
+ --per_device_train_batch_size 32 \
323
+ --per_device_eval_batch_size 256 \
324
+ --gradient_accumulation_steps 1 \
325
+ --learning_rate 0.0003 \
326
+ --num_train_epochs 10 \
327
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
328
+ --max_source_length 512 \
329
+ --max_target_length 50 \
330
+ --generation_max_length 50 \
331
+ --add_task_name False \
332
+ --add_dataset_name False \
333
+ --overwrite_output_dir \
334
+ --overwrite_cache \
335
+ --lr_scheduler_type constant \
336
+ --warmup_steps 0 \
337
+ --logging_strategy steps \
338
+ --logging_steps 10 \
339
+ --metric_for_best_model eval_exact_match_for_rte \
340
+ --evaluation_strategy steps \
341
+ --save_strategy steps \
342
+ --save_total_limit 1 \
343
+ --load_best_model_at_end \
344
+ --lora_r 8 \
345
+ --lora_alpha 32 \
346
+ --lora_dropout 0.0 \
347
+ --data_replay_freq -1 \
348
+ --kl_ratio 0.1 \
349
+ --attn_temperature 1 \
350
+ --mlp_hidden_dim 100 \
351
+ --model_name gainlora_inflora \
352
+ --threshold 0.995 \
353
+ --transthreshold 0.995
354
+
355
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/checkpoint*
356
+
357
+ sleep 5
358
+
359
+
360
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
361
+ --do_train \
362
+ --predict_with_generate \
363
+ --model_name_or_path $2 \
364
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights/trans_input.pt \
365
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights \
366
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights/prompts_keys_till_now.pt \
367
+ --data_dir CL_Benchmark \
368
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
369
+ --gen_data_dir generated_data/lora_gen_long_t5 \
370
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/imdb \
371
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb \
372
+ --per_device_train_batch_size 32 \
373
+ --per_device_eval_batch_size 256 \
374
+ --gradient_accumulation_steps 1 \
375
+ --learning_rate 0.0003 \
376
+ --num_train_epochs 10 \
377
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
378
+ --max_source_length 512 \
379
+ --max_target_length 50 \
380
+ --generation_max_length 50 \
381
+ --add_task_name False \
382
+ --add_dataset_name False \
383
+ --overwrite_output_dir \
384
+ --overwrite_cache \
385
+ --lr_scheduler_type constant \
386
+ --warmup_steps 0 \
387
+ --logging_strategy steps \
388
+ --logging_steps 10 \
389
+ --metric_for_best_model eval_exact_match_for_imdb \
390
+ --evaluation_strategy steps \
391
+ --save_strategy steps \
392
+ --save_total_limit 1 \
393
+ --load_best_model_at_end \
394
+ --lora_r 8 \
395
+ --lora_alpha 32 \
396
+ --lora_dropout 0.0 \
397
+ --data_replay_freq -1 \
398
+ --kl_ratio 0.1 \
399
+ --attn_temperature 1 \
400
+ --mlp_hidden_dim 100 \
401
+ --model_name gainlora_inflora \
402
+ --threshold 0.995 \
403
+ --transthreshold 0.995
404
+
405
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/checkpoint*
406
+
407
+ sleep 5
408
+
409
+
410
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
411
+ --do_train \
412
+ --predict_with_generate \
413
+ --model_name_or_path $2 \
414
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights/trans_input.pt \
415
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights \
416
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights/prompts_keys_till_now.pt \
417
+ --data_dir CL_Benchmark \
418
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
419
+ --gen_data_dir generated_data/lora_gen_long_t5 \
420
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/sst2 \
421
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2 \
422
+ --per_device_train_batch_size 32 \
423
+ --per_device_eval_batch_size 256 \
424
+ --gradient_accumulation_steps 1 \
425
+ --learning_rate 0.0003 \
426
+ --num_train_epochs 10 \
427
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
428
+ --max_source_length 512 \
429
+ --max_target_length 50 \
430
+ --generation_max_length 50 \
431
+ --add_task_name False \
432
+ --add_dataset_name False \
433
+ --overwrite_output_dir \
434
+ --overwrite_cache \
435
+ --lr_scheduler_type constant \
436
+ --warmup_steps 0 \
437
+ --logging_strategy steps \
438
+ --logging_steps 10 \
439
+ --metric_for_best_model eval_exact_match_for_sst2 \
440
+ --evaluation_strategy steps \
441
+ --save_strategy steps \
442
+ --save_total_limit 1 \
443
+ --load_best_model_at_end \
444
+ --lora_r 8 \
445
+ --lora_alpha 32 \
446
+ --lora_dropout 0.0 \
447
+ --data_replay_freq -1 \
448
+ --kl_ratio 0.1 \
449
+ --attn_temperature 1 \
450
+ --mlp_hidden_dim 100 \
451
+ --model_name gainlora_inflora \
452
+ --threshold 0.995 \
453
+ --transthreshold 0.995
454
+
455
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/checkpoint*
456
+
457
+ sleep 5
458
+
459
+
460
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
461
+ --do_train \
462
+ --predict_with_generate \
463
+ --model_name_or_path $2 \
464
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights/trans_input.pt \
465
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights \
466
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights/prompts_keys_till_now.pt \
467
+ --data_dir CL_Benchmark \
468
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
469
+ --gen_data_dir generated_data/lora_gen_long_t5 \
470
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/dbpedia \
471
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia \
472
+ --per_device_train_batch_size 32 \
473
+ --per_device_eval_batch_size 256 \
474
+ --gradient_accumulation_steps 1 \
475
+ --learning_rate 0.0003 \
476
+ --num_train_epochs 10 \
477
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
478
+ --max_source_length 512 \
479
+ --max_target_length 50 \
480
+ --generation_max_length 50 \
481
+ --add_task_name False \
482
+ --add_dataset_name False \
483
+ --overwrite_output_dir \
484
+ --overwrite_cache \
485
+ --lr_scheduler_type constant \
486
+ --warmup_steps 0 \
487
+ --logging_strategy steps \
488
+ --logging_steps 10 \
489
+ --metric_for_best_model eval_exact_match_for_dbpedia \
490
+ --evaluation_strategy steps \
491
+ --save_strategy steps \
492
+ --save_total_limit 1 \
493
+ --load_best_model_at_end \
494
+ --lora_r 8 \
495
+ --lora_alpha 32 \
496
+ --lora_dropout 0.0 \
497
+ --data_replay_freq -1 \
498
+ --kl_ratio 0.1 \
499
+ --attn_temperature 1 \
500
+ --mlp_hidden_dim 100 \
501
+ --model_name gainlora_inflora \
502
+ --threshold 0.995 \
503
+ --transthreshold 0.995
504
+
505
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/checkpoint*
506
+
507
+ sleep 5
508
+
509
+
510
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
511
+ --do_train \
512
+ --predict_with_generate \
513
+ --model_name_or_path $2 \
514
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights/trans_input.pt \
515
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights \
516
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights/prompts_keys_till_now.pt \
517
+ --data_dir CL_Benchmark \
518
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
519
+ --gen_data_dir generated_data/lora_gen_long_t5 \
520
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/agnews \
521
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews \
522
+ --per_device_train_batch_size 32 \
523
+ --per_device_eval_batch_size 256 \
524
+ --gradient_accumulation_steps 1 \
525
+ --learning_rate 0.0003 \
526
+ --num_train_epochs 10 \
527
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
528
+ --max_source_length 512 \
529
+ --max_target_length 50 \
530
+ --generation_max_length 50 \
531
+ --add_task_name False \
532
+ --add_dataset_name False \
533
+ --overwrite_output_dir \
534
+ --overwrite_cache \
535
+ --lr_scheduler_type constant \
536
+ --warmup_steps 0 \
537
+ --logging_strategy steps \
538
+ --logging_steps 10 \
539
+ --metric_for_best_model eval_exact_match_for_agnews \
540
+ --evaluation_strategy steps \
541
+ --save_strategy steps \
542
+ --save_total_limit 1 \
543
+ --load_best_model_at_end \
544
+ --lora_r 8 \
545
+ --lora_alpha 32 \
546
+ --lora_dropout 0.0 \
547
+ --data_replay_freq -1 \
548
+ --kl_ratio 0.1 \
549
+ --attn_temperature 1 \
550
+ --mlp_hidden_dim 100 \
551
+ --model_name gainlora_inflora \
552
+ --threshold 0.995 \
553
+ --transthreshold 0.995
554
+
555
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/checkpoint*
556
+
557
+ sleep 5
558
+
559
+
560
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
561
+ --do_train \
562
+ --predict_with_generate \
563
+ --model_name_or_path $2 \
564
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights/trans_input.pt \
565
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights \
566
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights/prompts_keys_till_now.pt \
567
+ --data_dir CL_Benchmark \
568
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
569
+ --gen_data_dir generated_data/lora_gen_long_t5 \
570
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/yahoo \
571
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo \
572
+ --per_device_train_batch_size 32 \
573
+ --per_device_eval_batch_size 256 \
574
+ --gradient_accumulation_steps 1 \
575
+ --learning_rate 0.0003 \
576
+ --num_train_epochs 10 \
577
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
578
+ --max_source_length 512 \
579
+ --max_target_length 50 \
580
+ --generation_max_length 50 \
581
+ --add_task_name False \
582
+ --add_dataset_name False \
583
+ --overwrite_output_dir \
584
+ --overwrite_cache \
585
+ --lr_scheduler_type constant \
586
+ --warmup_steps 0 \
587
+ --logging_strategy steps \
588
+ --logging_steps 10 \
589
+ --metric_for_best_model eval_exact_match_for_yahoo \
590
+ --evaluation_strategy steps \
591
+ --save_strategy steps \
592
+ --save_total_limit 1 \
593
+ --load_best_model_at_end \
594
+ --lora_r 8 \
595
+ --lora_alpha 32 \
596
+ --lora_dropout 0.0 \
597
+ --data_replay_freq -1 \
598
+ --kl_ratio 0.1 \
599
+ --attn_temperature 1 \
600
+ --mlp_hidden_dim 100 \
601
+ --model_name gainlora_inflora \
602
+ --threshold 0.995 \
603
+ --transthreshold 0.995
604
+
605
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/checkpoint*
606
+
607
+ sleep 5
608
+
609
+
610
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
611
+ --do_train \
612
+ --predict_with_generate \
613
+ --model_name_or_path $2 \
614
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/saved_weights/trans_input.pt \
615
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/saved_weights \
616
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/saved_weights/prompts_keys_till_now.pt \
617
+ --data_dir CL_Benchmark \
618
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
619
+ --gen_data_dir generated_data/lora_gen_long_t5 \
620
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/multirc \
621
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc \
622
+ --per_device_train_batch_size 32 \
623
+ --per_device_eval_batch_size 256 \
624
+ --gradient_accumulation_steps 1 \
625
+ --learning_rate 0.0003 \
626
+ --num_train_epochs 10 \
627
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
628
+ --max_source_length 512 \
629
+ --max_target_length 50 \
630
+ --generation_max_length 50 \
631
+ --add_task_name False \
632
+ --add_dataset_name False \
633
+ --overwrite_output_dir \
634
+ --overwrite_cache \
635
+ --lr_scheduler_type constant \
636
+ --warmup_steps 0 \
637
+ --logging_strategy steps \
638
+ --logging_steps 10 \
639
+ --metric_for_best_model eval_exact_match_for_multirc \
640
+ --evaluation_strategy steps \
641
+ --save_strategy steps \
642
+ --save_total_limit 1 \
643
+ --load_best_model_at_end \
644
+ --lora_r 8 \
645
+ --lora_alpha 32 \
646
+ --lora_dropout 0.0 \
647
+ --data_replay_freq -1 \
648
+ --kl_ratio 0.1 \
649
+ --attn_temperature 1 \
650
+ --mlp_hidden_dim 100 \
651
+ --model_name gainlora_inflora \
652
+ --threshold 0.995 \
653
+ --transthreshold 0.995
654
+
655
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc/checkpoint*
656
+
657
+ sleep 5
658
+
659
+
660
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
661
+ --do_train \
662
+ --predict_with_generate \
663
+ --model_name_or_path $2 \
664
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc/saved_weights/trans_input.pt \
665
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc/saved_weights \
666
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc/saved_weights/prompts_keys_till_now.pt \
667
+ --data_dir CL_Benchmark \
668
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
669
+ --gen_data_dir generated_data/lora_gen_long_t5 \
670
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/boolq \
671
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/14-boolq \
672
+ --per_device_train_batch_size 32 \
673
+ --per_device_eval_batch_size 256 \
674
+ --gradient_accumulation_steps 1 \
675
+ --learning_rate 0.0003 \
676
+ --num_train_epochs 10 \
677
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
678
+ --max_source_length 512 \
679
+ --max_target_length 50 \
680
+ --generation_max_length 50 \
681
+ --add_task_name False \
682
+ --add_dataset_name False \
683
+ --overwrite_output_dir \
684
+ --overwrite_cache \
685
+ --lr_scheduler_type constant \
686
+ --warmup_steps 0 \
687
+ --logging_strategy steps \
688
+ --logging_steps 10 \
689
+ --metric_for_best_model eval_exact_match_for_boolq \
690
+ --evaluation_strategy steps \
691
+ --save_strategy steps \
692
+ --save_total_limit 1 \
693
+ --load_best_model_at_end \
694
+ --lora_r 8 \
695
+ --lora_alpha 32 \
696
+ --lora_dropout 0.0 \
697
+ --data_replay_freq -1 \
698
+ --kl_ratio 0.1 \
699
+ --attn_temperature 1 \
700
+ --mlp_hidden_dim 100 \
701
+ --model_name gainlora_inflora \
702
+ --threshold 0.995 \
703
+ --transthreshold 0.995
704
+
705
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/14-boolq/checkpoint*
706
+
707
+ sleep 5
708
+
709
+
710
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
711
+ --do_train \
712
+ --do_predict \
713
+ --predict_with_generate \
714
+ --model_name_or_path $2 \
715
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/14-boolq/saved_weights/trans_input.pt \
716
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/12-yahoo/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/13-multirc/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/14-boolq/saved_weights \
717
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/14-boolq/saved_weights/prompts_keys_till_now.pt \
718
+ --data_dir CL_Benchmark \
719
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
720
+ --gen_data_dir generated_data/lora_gen_long_t5 \
721
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/wic \
722
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/15-wic \
723
+ --per_device_train_batch_size 32 \
724
+ --per_device_eval_batch_size 256 \
725
+ --gradient_accumulation_steps 1 \
726
+ --learning_rate 0.0003 \
727
+ --num_train_epochs 10 \
728
+ --run_name gen_script_long_order3_t5_small_gainlora_inflora \
729
+ --max_source_length 512 \
730
+ --max_target_length 50 \
731
+ --generation_max_length 50 \
732
+ --add_task_name False \
733
+ --add_dataset_name False \
734
+ --overwrite_output_dir \
735
+ --overwrite_cache \
736
+ --lr_scheduler_type constant \
737
+ --warmup_steps 0 \
738
+ --logging_strategy steps \
739
+ --logging_steps 10 \
740
+ --metric_for_best_model eval_exact_match_for_wic \
741
+ --evaluation_strategy steps \
742
+ --save_strategy steps \
743
+ --save_total_limit 1 \
744
+ --load_best_model_at_end \
745
+ --lora_r 8 \
746
+ --lora_alpha 32 \
747
+ --lora_dropout 0.0 \
748
+ --data_replay_freq -1 \
749
+ --kl_ratio 0.1 \
750
+ --attn_temperature 1 \
751
+ --mlp_hidden_dim 100 \
752
+ --model_name gainlora_inflora \
753
+ --threshold 0.995 \
754
+ --transthreshold 0.995
755
+
756
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_gainlora_inflora/outputs/15-wic/checkpoint*
757
+
758
+ sleep 5
759
+
760
+ CUDA_VISIBLE_DEVICES=$1 python score.py gen_script_long_order3_t5_small_gainlora_inflora gen_script_long_order3_t5_small_gainlora_inflora
761
+
762
+
763
+
root_gainlora/T5_small/gen_script_long_order3_t5_small_inflora.sh ADDED
@@ -0,0 +1,744 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
21
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/yelp \
22
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp \
23
+ --per_device_train_batch_size 32 \
24
+ --per_device_eval_batch_size 256 \
25
+ --gradient_accumulation_steps 1 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 10 \
28
+ --run_name gen_script_long_order3_t5_small_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_exact_match \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --load_best_model_at_end \
45
+ --lora_r 8 \
46
+ --lora_alpha 32 \
47
+ --lora_dropout 0.0 \
48
+ --add_instruction_replay \
49
+ --data_replay_freq -1 \
50
+ --replay_after_n_epoch 0 \
51
+ --model_name inflora \
52
+ --threshold 0.995
53
+
54
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/checkpoint*
55
+
56
+ sleep 5
57
+
58
+
59
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
60
+ --do_train \
61
+ --do_predict \
62
+ --predict_with_generate \
63
+ --model_name_or_path $2 \
64
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights/trans_input.pt \
65
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights \
66
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights/prompts_keys_till_now.pt \
67
+ --data_dir CL_Benchmark \
68
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
69
+ --gen_data_dir generated_data/lora_gen_long_t5 \
70
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/amazon \
71
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon \
72
+ --per_device_train_batch_size 32 \
73
+ --per_device_eval_batch_size 256 \
74
+ --gradient_accumulation_steps 1 \
75
+ --learning_rate 0.0003 \
76
+ --num_train_epochs 10 \
77
+ --run_name gen_script_long_order3_t5_small_inflora \
78
+ --max_source_length 512 \
79
+ --max_target_length 50 \
80
+ --generation_max_length 50 \
81
+ --add_task_name False \
82
+ --add_dataset_name False \
83
+ --overwrite_output_dir \
84
+ --overwrite_cache \
85
+ --lr_scheduler_type constant \
86
+ --warmup_steps 0 \
87
+ --logging_strategy steps \
88
+ --logging_steps 10 \
89
+ --metric_for_best_model eval_exact_match_for_amazon \
90
+ --evaluation_strategy steps \
91
+ --save_strategy steps \
92
+ --save_total_limit 1 \
93
+ --load_best_model_at_end \
94
+ --lora_r 8 \
95
+ --lora_alpha 32 \
96
+ --lora_dropout 0.0 \
97
+ --data_replay_freq -1 \
98
+ --kl_ratio 0.1 \
99
+ --attn_temperature 1 \
100
+ --model_name inflora \
101
+ --threshold 0.995
102
+
103
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/checkpoint*
104
+
105
+ sleep 5
106
+
107
+
108
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
109
+ --do_train \
110
+ --do_predict \
111
+ --predict_with_generate \
112
+ --model_name_or_path $2 \
113
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights/trans_input.pt \
114
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights \
115
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights/prompts_keys_till_now.pt \
116
+ --data_dir CL_Benchmark \
117
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
118
+ --gen_data_dir generated_data/lora_gen_long_t5 \
119
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/mnli \
120
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli \
121
+ --per_device_train_batch_size 32 \
122
+ --per_device_eval_batch_size 256 \
123
+ --gradient_accumulation_steps 1 \
124
+ --learning_rate 0.0003 \
125
+ --num_train_epochs 10 \
126
+ --run_name gen_script_long_order3_t5_small_inflora \
127
+ --max_source_length 512 \
128
+ --max_target_length 50 \
129
+ --generation_max_length 50 \
130
+ --add_task_name False \
131
+ --add_dataset_name False \
132
+ --overwrite_output_dir \
133
+ --overwrite_cache \
134
+ --lr_scheduler_type constant \
135
+ --warmup_steps 0 \
136
+ --logging_strategy steps \
137
+ --logging_steps 10 \
138
+ --metric_for_best_model eval_exact_match_for_mnli \
139
+ --evaluation_strategy steps \
140
+ --save_strategy steps \
141
+ --save_total_limit 1 \
142
+ --load_best_model_at_end \
143
+ --lora_r 8 \
144
+ --lora_alpha 32 \
145
+ --lora_dropout 0.0 \
146
+ --data_replay_freq -1 \
147
+ --kl_ratio 0.1 \
148
+ --attn_temperature 1 \
149
+ --model_name inflora \
150
+ --threshold 0.995
151
+
152
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/checkpoint*
153
+
154
+ sleep 5
155
+
156
+
157
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
158
+ --do_train \
159
+ --do_predict \
160
+ --predict_with_generate \
161
+ --model_name_or_path $2 \
162
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights/trans_input.pt \
163
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights \
164
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights/prompts_keys_till_now.pt \
165
+ --data_dir CL_Benchmark \
166
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
167
+ --gen_data_dir generated_data/lora_gen_long_t5 \
168
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/cb \
169
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb \
170
+ --per_device_train_batch_size 32 \
171
+ --per_device_eval_batch_size 256 \
172
+ --gradient_accumulation_steps 1 \
173
+ --learning_rate 0.0003 \
174
+ --num_train_epochs 10 \
175
+ --run_name gen_script_long_order3_t5_small_inflora \
176
+ --max_source_length 512 \
177
+ --max_target_length 50 \
178
+ --generation_max_length 50 \
179
+ --add_task_name False \
180
+ --add_dataset_name False \
181
+ --overwrite_output_dir \
182
+ --overwrite_cache \
183
+ --lr_scheduler_type constant \
184
+ --warmup_steps 0 \
185
+ --logging_strategy steps \
186
+ --logging_steps 10 \
187
+ --metric_for_best_model eval_exact_match_for_cb \
188
+ --evaluation_strategy steps \
189
+ --save_strategy steps \
190
+ --save_total_limit 1 \
191
+ --load_best_model_at_end \
192
+ --lora_r 8 \
193
+ --lora_alpha 32 \
194
+ --lora_dropout 0.0 \
195
+ --data_replay_freq -1 \
196
+ --kl_ratio 0.1 \
197
+ --attn_temperature 1 \
198
+ --model_name inflora \
199
+ --threshold 0.995
200
+
201
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/checkpoint*
202
+
203
+ sleep 5
204
+
205
+
206
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
207
+ --do_train \
208
+ --do_predict \
209
+ --predict_with_generate \
210
+ --model_name_or_path $2 \
211
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights/trans_input.pt \
212
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights \
213
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights/prompts_keys_till_now.pt \
214
+ --data_dir CL_Benchmark \
215
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
216
+ --gen_data_dir generated_data/lora_gen_long_t5 \
217
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/copa \
218
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa \
219
+ --per_device_train_batch_size 32 \
220
+ --per_device_eval_batch_size 256 \
221
+ --gradient_accumulation_steps 1 \
222
+ --learning_rate 0.0003 \
223
+ --num_train_epochs 10 \
224
+ --run_name gen_script_long_order3_t5_small_inflora \
225
+ --max_source_length 512 \
226
+ --max_target_length 50 \
227
+ --generation_max_length 50 \
228
+ --add_task_name False \
229
+ --add_dataset_name False \
230
+ --overwrite_output_dir \
231
+ --overwrite_cache \
232
+ --lr_scheduler_type constant \
233
+ --warmup_steps 0 \
234
+ --logging_strategy steps \
235
+ --logging_steps 10 \
236
+ --metric_for_best_model eval_exact_match_for_copa \
237
+ --evaluation_strategy steps \
238
+ --save_strategy steps \
239
+ --save_total_limit 1 \
240
+ --load_best_model_at_end \
241
+ --lora_r 8 \
242
+ --lora_alpha 32 \
243
+ --lora_dropout 0.0 \
244
+ --data_replay_freq -1 \
245
+ --kl_ratio 0.1 \
246
+ --attn_temperature 1 \
247
+ --model_name inflora \
248
+ --threshold 0.995
249
+
250
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/checkpoint*
251
+
252
+ sleep 5
253
+
254
+
255
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
256
+ --do_train \
257
+ --do_predict \
258
+ --predict_with_generate \
259
+ --model_name_or_path $2 \
260
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights/trans_input.pt \
261
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights \
262
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights/prompts_keys_till_now.pt \
263
+ --data_dir CL_Benchmark \
264
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
265
+ --gen_data_dir generated_data/lora_gen_long_t5 \
266
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/qqp \
267
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp \
268
+ --per_device_train_batch_size 32 \
269
+ --per_device_eval_batch_size 256 \
270
+ --gradient_accumulation_steps 1 \
271
+ --learning_rate 0.0003 \
272
+ --num_train_epochs 10 \
273
+ --run_name gen_script_long_order3_t5_small_inflora \
274
+ --max_source_length 512 \
275
+ --max_target_length 50 \
276
+ --generation_max_length 50 \
277
+ --add_task_name False \
278
+ --add_dataset_name False \
279
+ --overwrite_output_dir \
280
+ --overwrite_cache \
281
+ --lr_scheduler_type constant \
282
+ --warmup_steps 0 \
283
+ --logging_strategy steps \
284
+ --logging_steps 10 \
285
+ --metric_for_best_model eval_exact_match_for_qqp \
286
+ --evaluation_strategy steps \
287
+ --save_strategy steps \
288
+ --save_total_limit 1 \
289
+ --load_best_model_at_end \
290
+ --lora_r 8 \
291
+ --lora_alpha 32 \
292
+ --lora_dropout 0.0 \
293
+ --data_replay_freq -1 \
294
+ --kl_ratio 0.1 \
295
+ --attn_temperature 1 \
296
+ --model_name inflora \
297
+ --threshold 0.995
298
+
299
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/checkpoint*
300
+
301
+ sleep 5
302
+
303
+
304
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
305
+ --do_train \
306
+ --do_predict \
307
+ --predict_with_generate \
308
+ --model_name_or_path $2 \
309
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights/trans_input.pt \
310
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights \
311
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights/prompts_keys_till_now.pt \
312
+ --data_dir CL_Benchmark \
313
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
314
+ --gen_data_dir generated_data/lora_gen_long_t5 \
315
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/rte \
316
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte \
317
+ --per_device_train_batch_size 32 \
318
+ --per_device_eval_batch_size 256 \
319
+ --gradient_accumulation_steps 1 \
320
+ --learning_rate 0.0003 \
321
+ --num_train_epochs 10 \
322
+ --run_name gen_script_long_order3_t5_small_inflora \
323
+ --max_source_length 512 \
324
+ --max_target_length 50 \
325
+ --generation_max_length 50 \
326
+ --add_task_name False \
327
+ --add_dataset_name False \
328
+ --overwrite_output_dir \
329
+ --overwrite_cache \
330
+ --lr_scheduler_type constant \
331
+ --warmup_steps 0 \
332
+ --logging_strategy steps \
333
+ --logging_steps 10 \
334
+ --metric_for_best_model eval_exact_match_for_rte \
335
+ --evaluation_strategy steps \
336
+ --save_strategy steps \
337
+ --save_total_limit 1 \
338
+ --load_best_model_at_end \
339
+ --lora_r 8 \
340
+ --lora_alpha 32 \
341
+ --lora_dropout 0.0 \
342
+ --data_replay_freq -1 \
343
+ --kl_ratio 0.1 \
344
+ --attn_temperature 1 \
345
+ --model_name inflora \
346
+ --threshold 0.995
347
+
348
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/checkpoint*
349
+
350
+ sleep 5
351
+
352
+
353
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
354
+ --do_train \
355
+ --do_predict \
356
+ --predict_with_generate \
357
+ --model_name_or_path $2 \
358
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights/trans_input.pt \
359
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights \
360
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights/prompts_keys_till_now.pt \
361
+ --data_dir CL_Benchmark \
362
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
363
+ --gen_data_dir generated_data/lora_gen_long_t5 \
364
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/imdb \
365
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb \
366
+ --per_device_train_batch_size 32 \
367
+ --per_device_eval_batch_size 256 \
368
+ --gradient_accumulation_steps 1 \
369
+ --learning_rate 0.0003 \
370
+ --num_train_epochs 10 \
371
+ --run_name gen_script_long_order3_t5_small_inflora \
372
+ --max_source_length 512 \
373
+ --max_target_length 50 \
374
+ --generation_max_length 50 \
375
+ --add_task_name False \
376
+ --add_dataset_name False \
377
+ --overwrite_output_dir \
378
+ --overwrite_cache \
379
+ --lr_scheduler_type constant \
380
+ --warmup_steps 0 \
381
+ --logging_strategy steps \
382
+ --logging_steps 10 \
383
+ --metric_for_best_model eval_exact_match_for_imdb \
384
+ --evaluation_strategy steps \
385
+ --save_strategy steps \
386
+ --save_total_limit 1 \
387
+ --load_best_model_at_end \
388
+ --lora_r 8 \
389
+ --lora_alpha 32 \
390
+ --lora_dropout 0.0 \
391
+ --data_replay_freq -1 \
392
+ --kl_ratio 0.1 \
393
+ --attn_temperature 1 \
394
+ --model_name inflora \
395
+ --threshold 0.995
396
+
397
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/checkpoint*
398
+
399
+ sleep 5
400
+
401
+
402
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
403
+ --do_train \
404
+ --do_predict \
405
+ --predict_with_generate \
406
+ --model_name_or_path $2 \
407
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights/trans_input.pt \
408
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights \
409
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights/prompts_keys_till_now.pt \
410
+ --data_dir CL_Benchmark \
411
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
412
+ --gen_data_dir generated_data/lora_gen_long_t5 \
413
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/sst2 \
414
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2 \
415
+ --per_device_train_batch_size 32 \
416
+ --per_device_eval_batch_size 256 \
417
+ --gradient_accumulation_steps 1 \
418
+ --learning_rate 0.0003 \
419
+ --num_train_epochs 10 \
420
+ --run_name gen_script_long_order3_t5_small_inflora \
421
+ --max_source_length 512 \
422
+ --max_target_length 50 \
423
+ --generation_max_length 50 \
424
+ --add_task_name False \
425
+ --add_dataset_name False \
426
+ --overwrite_output_dir \
427
+ --overwrite_cache \
428
+ --lr_scheduler_type constant \
429
+ --warmup_steps 0 \
430
+ --logging_strategy steps \
431
+ --logging_steps 10 \
432
+ --metric_for_best_model eval_exact_match_for_sst2 \
433
+ --evaluation_strategy steps \
434
+ --save_strategy steps \
435
+ --save_total_limit 1 \
436
+ --load_best_model_at_end \
437
+ --lora_r 8 \
438
+ --lora_alpha 32 \
439
+ --lora_dropout 0.0 \
440
+ --data_replay_freq -1 \
441
+ --kl_ratio 0.1 \
442
+ --attn_temperature 1 \
443
+ --model_name inflora \
444
+ --threshold 0.995
445
+
446
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/checkpoint*
447
+
448
+ sleep 5
449
+
450
+
451
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
452
+ --do_train \
453
+ --do_predict \
454
+ --predict_with_generate \
455
+ --model_name_or_path $2 \
456
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights/trans_input.pt \
457
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights \
458
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights/prompts_keys_till_now.pt \
459
+ --data_dir CL_Benchmark \
460
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
461
+ --gen_data_dir generated_data/lora_gen_long_t5 \
462
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/dbpedia \
463
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia \
464
+ --per_device_train_batch_size 32 \
465
+ --per_device_eval_batch_size 256 \
466
+ --gradient_accumulation_steps 1 \
467
+ --learning_rate 0.0003 \
468
+ --num_train_epochs 10 \
469
+ --run_name gen_script_long_order3_t5_small_inflora \
470
+ --max_source_length 512 \
471
+ --max_target_length 50 \
472
+ --generation_max_length 50 \
473
+ --add_task_name False \
474
+ --add_dataset_name False \
475
+ --overwrite_output_dir \
476
+ --overwrite_cache \
477
+ --lr_scheduler_type constant \
478
+ --warmup_steps 0 \
479
+ --logging_strategy steps \
480
+ --logging_steps 10 \
481
+ --metric_for_best_model eval_exact_match_for_dbpedia \
482
+ --evaluation_strategy steps \
483
+ --save_strategy steps \
484
+ --save_total_limit 1 \
485
+ --load_best_model_at_end \
486
+ --lora_r 8 \
487
+ --lora_alpha 32 \
488
+ --lora_dropout 0.0 \
489
+ --data_replay_freq -1 \
490
+ --kl_ratio 0.1 \
491
+ --attn_temperature 1 \
492
+ --model_name inflora \
493
+ --threshold 0.995
494
+
495
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/checkpoint*
496
+
497
+ sleep 5
498
+
499
+
500
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
501
+ --do_train \
502
+ --do_predict \
503
+ --predict_with_generate \
504
+ --model_name_or_path $2 \
505
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights/trans_input.pt \
506
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights \
507
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights/prompts_keys_till_now.pt \
508
+ --data_dir CL_Benchmark \
509
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
510
+ --gen_data_dir generated_data/lora_gen_long_t5 \
511
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/agnews \
512
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews \
513
+ --per_device_train_batch_size 32 \
514
+ --per_device_eval_batch_size 256 \
515
+ --gradient_accumulation_steps 1 \
516
+ --learning_rate 0.0003 \
517
+ --num_train_epochs 10 \
518
+ --run_name gen_script_long_order3_t5_small_inflora \
519
+ --max_source_length 512 \
520
+ --max_target_length 50 \
521
+ --generation_max_length 50 \
522
+ --add_task_name False \
523
+ --add_dataset_name False \
524
+ --overwrite_output_dir \
525
+ --overwrite_cache \
526
+ --lr_scheduler_type constant \
527
+ --warmup_steps 0 \
528
+ --logging_strategy steps \
529
+ --logging_steps 10 \
530
+ --metric_for_best_model eval_exact_match_for_agnews \
531
+ --evaluation_strategy steps \
532
+ --save_strategy steps \
533
+ --save_total_limit 1 \
534
+ --load_best_model_at_end \
535
+ --lora_r 8 \
536
+ --lora_alpha 32 \
537
+ --lora_dropout 0.0 \
538
+ --data_replay_freq -1 \
539
+ --kl_ratio 0.1 \
540
+ --attn_temperature 1 \
541
+ --model_name inflora \
542
+ --threshold 0.995
543
+
544
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/checkpoint*
545
+
546
+ sleep 5
547
+
548
+
549
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
550
+ --do_train \
551
+ --do_predict \
552
+ --predict_with_generate \
553
+ --model_name_or_path $2 \
554
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights/trans_input.pt \
555
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights \
556
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights/prompts_keys_till_now.pt \
557
+ --data_dir CL_Benchmark \
558
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
559
+ --gen_data_dir generated_data/lora_gen_long_t5 \
560
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/yahoo \
561
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo \
562
+ --per_device_train_batch_size 32 \
563
+ --per_device_eval_batch_size 256 \
564
+ --gradient_accumulation_steps 1 \
565
+ --learning_rate 0.0003 \
566
+ --num_train_epochs 10 \
567
+ --run_name gen_script_long_order3_t5_small_inflora \
568
+ --max_source_length 512 \
569
+ --max_target_length 50 \
570
+ --generation_max_length 50 \
571
+ --add_task_name False \
572
+ --add_dataset_name False \
573
+ --overwrite_output_dir \
574
+ --overwrite_cache \
575
+ --lr_scheduler_type constant \
576
+ --warmup_steps 0 \
577
+ --logging_strategy steps \
578
+ --logging_steps 10 \
579
+ --metric_for_best_model eval_exact_match_for_yahoo \
580
+ --evaluation_strategy steps \
581
+ --save_strategy steps \
582
+ --save_total_limit 1 \
583
+ --load_best_model_at_end \
584
+ --lora_r 8 \
585
+ --lora_alpha 32 \
586
+ --lora_dropout 0.0 \
587
+ --data_replay_freq -1 \
588
+ --kl_ratio 0.1 \
589
+ --attn_temperature 1 \
590
+ --model_name inflora \
591
+ --threshold 0.995
592
+
593
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/checkpoint*
594
+
595
+ sleep 5
596
+
597
+
598
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
599
+ --do_train \
600
+ --do_predict \
601
+ --predict_with_generate \
602
+ --model_name_or_path $2 \
603
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/saved_weights/trans_input.pt \
604
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/saved_weights \
605
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/saved_weights/prompts_keys_till_now.pt \
606
+ --data_dir CL_Benchmark \
607
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
608
+ --gen_data_dir generated_data/lora_gen_long_t5 \
609
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/multirc \
610
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc \
611
+ --per_device_train_batch_size 32 \
612
+ --per_device_eval_batch_size 256 \
613
+ --gradient_accumulation_steps 1 \
614
+ --learning_rate 0.0003 \
615
+ --num_train_epochs 10 \
616
+ --run_name gen_script_long_order3_t5_small_inflora \
617
+ --max_source_length 512 \
618
+ --max_target_length 50 \
619
+ --generation_max_length 50 \
620
+ --add_task_name False \
621
+ --add_dataset_name False \
622
+ --overwrite_output_dir \
623
+ --overwrite_cache \
624
+ --lr_scheduler_type constant \
625
+ --warmup_steps 0 \
626
+ --logging_strategy steps \
627
+ --logging_steps 10 \
628
+ --metric_for_best_model eval_exact_match_for_multirc \
629
+ --evaluation_strategy steps \
630
+ --save_strategy steps \
631
+ --save_total_limit 1 \
632
+ --load_best_model_at_end \
633
+ --lora_r 8 \
634
+ --lora_alpha 32 \
635
+ --lora_dropout 0.0 \
636
+ --data_replay_freq -1 \
637
+ --kl_ratio 0.1 \
638
+ --attn_temperature 1 \
639
+ --model_name inflora \
640
+ --threshold 0.995
641
+
642
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc/checkpoint*
643
+
644
+ sleep 5
645
+
646
+
647
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
648
+ --do_train \
649
+ --do_predict \
650
+ --predict_with_generate \
651
+ --model_name_or_path $2 \
652
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc/saved_weights/trans_input.pt \
653
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc/saved_weights \
654
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc/saved_weights/prompts_keys_till_now.pt \
655
+ --data_dir CL_Benchmark \
656
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
657
+ --gen_data_dir generated_data/lora_gen_long_t5 \
658
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/boolq \
659
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/14-boolq \
660
+ --per_device_train_batch_size 32 \
661
+ --per_device_eval_batch_size 256 \
662
+ --gradient_accumulation_steps 1 \
663
+ --learning_rate 0.0003 \
664
+ --num_train_epochs 10 \
665
+ --run_name gen_script_long_order3_t5_small_inflora \
666
+ --max_source_length 512 \
667
+ --max_target_length 50 \
668
+ --generation_max_length 50 \
669
+ --add_task_name False \
670
+ --add_dataset_name False \
671
+ --overwrite_output_dir \
672
+ --overwrite_cache \
673
+ --lr_scheduler_type constant \
674
+ --warmup_steps 0 \
675
+ --logging_strategy steps \
676
+ --logging_steps 10 \
677
+ --metric_for_best_model eval_exact_match_for_boolq \
678
+ --evaluation_strategy steps \
679
+ --save_strategy steps \
680
+ --save_total_limit 1 \
681
+ --load_best_model_at_end \
682
+ --lora_r 8 \
683
+ --lora_alpha 32 \
684
+ --lora_dropout 0.0 \
685
+ --data_replay_freq -1 \
686
+ --kl_ratio 0.1 \
687
+ --attn_temperature 1 \
688
+ --model_name inflora \
689
+ --threshold 0.995
690
+
691
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/14-boolq/checkpoint*
692
+
693
+ sleep 5
694
+
695
+
696
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
697
+ --do_train \
698
+ --do_predict \
699
+ --predict_with_generate \
700
+ --model_name_or_path $2 \
701
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/14-boolq/saved_weights/trans_input.pt \
702
+ --previous_lora_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/1-yelp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/2-amazon/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/3-mnli/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/4-cb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/5-copa/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/6-qqp/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/9-sst2/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/10-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/11-agnews/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/12-yahoo/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/13-multirc/saved_weights,logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/14-boolq/saved_weights \
703
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/14-boolq/saved_weights/prompts_keys_till_now.pt \
704
+ --data_dir CL_Benchmark \
705
+ --task_order yelp,amazon,mnli,cb,copa,qqp,rte,imdb,sst2,dbpedia,agnews,yahoo,multirc,boolq,wic \
706
+ --gen_data_dir generated_data/lora_gen_long_t5 \
707
+ --task_config_dir configs/gen_script_long_order3_t5_small_configs/wic \
708
+ --output_dir logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/15-wic \
709
+ --per_device_train_batch_size 32 \
710
+ --per_device_eval_batch_size 256 \
711
+ --gradient_accumulation_steps 1 \
712
+ --learning_rate 0.0003 \
713
+ --num_train_epochs 10 \
714
+ --run_name gen_script_long_order3_t5_small_inflora \
715
+ --max_source_length 512 \
716
+ --max_target_length 50 \
717
+ --generation_max_length 50 \
718
+ --add_task_name False \
719
+ --add_dataset_name False \
720
+ --overwrite_output_dir \
721
+ --overwrite_cache \
722
+ --lr_scheduler_type constant \
723
+ --warmup_steps 0 \
724
+ --logging_strategy steps \
725
+ --logging_steps 10 \
726
+ --metric_for_best_model eval_exact_match_for_wic \
727
+ --evaluation_strategy steps \
728
+ --save_strategy steps \
729
+ --save_total_limit 1 \
730
+ --load_best_model_at_end \
731
+ --lora_r 8 \
732
+ --lora_alpha 32 \
733
+ --lora_dropout 0.0 \
734
+ --data_replay_freq -1 \
735
+ --kl_ratio 0.1 \
736
+ --attn_temperature 1 \
737
+ --model_name inflora \
738
+ --threshold 0.995
739
+
740
+ rm -rf logs_and_outputs/gen_script_long_order3_t5_small_inflora/outputs/15-wic/checkpoint*
741
+
742
+ sleep 5
743
+
744
+ CUDA_VISIBLE_DEVICES=$1 python score.py gen_script_long_order3_t5_small_inflora gen_script_long_order3_t5_small_inflora
root_gainlora/T5_small/gen_script_long_order4_t5_small_gainlora_inflora.sh ADDED
@@ -0,0 +1,774 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
21
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/mnli \
22
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli \
23
+ --per_device_train_batch_size 32 \
24
+ --per_device_eval_batch_size 256 \
25
+ --gradient_accumulation_steps 1 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 10 \
28
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_exact_match \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --load_best_model_at_end \
45
+ --lora_r 8 \
46
+ --lora_alpha 32 \
47
+ --lora_dropout 0.0 \
48
+ --add_instruction_replay \
49
+ --data_replay_freq -1 \
50
+ --replay_after_n_epoch 0 \
51
+ --mlp_hidden_dim 100 \
52
+ --model_name gainlora_inflora \
53
+ --threshold 0.995 \
54
+ --transthreshold 0.995
55
+
56
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/checkpoint*
57
+
58
+ sleep 5
59
+
60
+
61
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
62
+ --do_train \
63
+ --do_predict \
64
+ --predict_with_generate \
65
+ --model_name_or_path $2 \
66
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights/trans_input.pt \
67
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights \
68
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights/prompts_keys_till_now.pt \
69
+ --data_dir CL_Benchmark \
70
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
71
+ --gen_data_dir generated_data/lora_gen_long_t5 \
72
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/cb \
73
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb \
74
+ --per_device_train_batch_size 32 \
75
+ --per_device_eval_batch_size 256 \
76
+ --gradient_accumulation_steps 1 \
77
+ --learning_rate 0.0003 \
78
+ --num_train_epochs 10\
79
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
80
+ --max_source_length 512 \
81
+ --max_target_length 50 \
82
+ --generation_max_length 50 \
83
+ --add_task_name False \
84
+ --add_dataset_name False \
85
+ --overwrite_output_dir \
86
+ --overwrite_cache \
87
+ --lr_scheduler_type constant \
88
+ --warmup_steps 0 \
89
+ --logging_strategy steps \
90
+ --logging_steps 10 \
91
+ --metric_for_best_model eval_exact_match_for_cb \
92
+ --evaluation_strategy steps \
93
+ --save_strategy steps \
94
+ --save_total_limit 1 \
95
+ --load_best_model_at_end \
96
+ --lora_r 8 \
97
+ --lora_alpha 32 \
98
+ --lora_dropout 0.0 \
99
+ --data_replay_freq -1 \
100
+ --kl_ratio 0.1 \
101
+ --attn_temperature 1 \
102
+ --mlp_hidden_dim 100 \
103
+ --model_name gainlora_inflora \
104
+ --threshold 0.995 \
105
+ --transthreshold 0.995
106
+
107
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/checkpoint*
108
+
109
+ sleep 5
110
+
111
+
112
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
113
+ --do_train \
114
+ --do_predict \
115
+ --predict_with_generate \
116
+ --model_name_or_path $2 \
117
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights/trans_input.pt \
118
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights \
119
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights/prompts_keys_till_now.pt \
120
+ --data_dir CL_Benchmark \
121
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
122
+ --gen_data_dir generated_data/lora_gen_long_t5 \
123
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/wic \
124
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic \
125
+ --per_device_train_batch_size 32 \
126
+ --per_device_eval_batch_size 256 \
127
+ --gradient_accumulation_steps 1 \
128
+ --learning_rate 0.0003 \
129
+ --num_train_epochs 10\
130
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
131
+ --max_source_length 512 \
132
+ --max_target_length 50 \
133
+ --generation_max_length 50 \
134
+ --add_task_name False \
135
+ --add_dataset_name False \
136
+ --overwrite_output_dir \
137
+ --overwrite_cache \
138
+ --lr_scheduler_type constant \
139
+ --warmup_steps 0 \
140
+ --logging_strategy steps \
141
+ --logging_steps 10 \
142
+ --metric_for_best_model eval_exact_match_for_wic \
143
+ --evaluation_strategy steps \
144
+ --save_strategy steps \
145
+ --save_total_limit 1 \
146
+ --load_best_model_at_end \
147
+ --lora_r 8 \
148
+ --lora_alpha 32 \
149
+ --lora_dropout 0.0 \
150
+ --data_replay_freq -1 \
151
+ --kl_ratio 0.1 \
152
+ --attn_temperature 1 \
153
+ --mlp_hidden_dim 100 \
154
+ --model_name gainlora_inflora \
155
+ --threshold 0.995 \
156
+ --transthreshold 0.995
157
+
158
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/checkpoint*
159
+
160
+ sleep 5
161
+
162
+
163
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
164
+ --do_train \
165
+ --do_predict \
166
+ --predict_with_generate \
167
+ --model_name_or_path $2 \
168
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights/trans_input.pt \
169
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights \
170
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights/prompts_keys_till_now.pt \
171
+ --data_dir CL_Benchmark \
172
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
173
+ --gen_data_dir generated_data/lora_gen_long_t5 \
174
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/copa \
175
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa \
176
+ --per_device_train_batch_size 32 \
177
+ --per_device_eval_batch_size 256 \
178
+ --gradient_accumulation_steps 1 \
179
+ --learning_rate 0.0003 \
180
+ --num_train_epochs 10\
181
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
182
+ --max_source_length 512 \
183
+ --max_target_length 50 \
184
+ --generation_max_length 50 \
185
+ --add_task_name False \
186
+ --add_dataset_name False \
187
+ --overwrite_output_dir \
188
+ --overwrite_cache \
189
+ --lr_scheduler_type constant \
190
+ --warmup_steps 0 \
191
+ --logging_strategy steps \
192
+ --logging_steps 10 \
193
+ --metric_for_best_model eval_exact_match_for_copa \
194
+ --evaluation_strategy steps \
195
+ --save_strategy steps \
196
+ --save_total_limit 1 \
197
+ --load_best_model_at_end \
198
+ --lora_r 8 \
199
+ --lora_alpha 32 \
200
+ --lora_dropout 0.0 \
201
+ --data_replay_freq -1 \
202
+ --kl_ratio 0.1 \
203
+ --attn_temperature 1 \
204
+ --mlp_hidden_dim 100 \
205
+ --model_name gainlora_inflora \
206
+ --threshold 0.995 \
207
+ --transthreshold 0.995
208
+
209
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/checkpoint*
210
+
211
+ sleep 5
212
+
213
+
214
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
215
+ --do_train \
216
+ --do_predict \
217
+ --predict_with_generate \
218
+ --model_name_or_path $2 \
219
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights/trans_input.pt \
220
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights \
221
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights/prompts_keys_till_now.pt \
222
+ --data_dir CL_Benchmark \
223
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
224
+ --gen_data_dir generated_data/lora_gen_long_t5 \
225
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/qqp \
226
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp \
227
+ --per_device_train_batch_size 32 \
228
+ --per_device_eval_batch_size 256 \
229
+ --gradient_accumulation_steps 1 \
230
+ --learning_rate 0.0003 \
231
+ --num_train_epochs 10\
232
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
233
+ --max_source_length 512 \
234
+ --max_target_length 50 \
235
+ --generation_max_length 50 \
236
+ --add_task_name False \
237
+ --add_dataset_name False \
238
+ --overwrite_output_dir \
239
+ --overwrite_cache \
240
+ --lr_scheduler_type constant \
241
+ --warmup_steps 0 \
242
+ --logging_strategy steps \
243
+ --logging_steps 10 \
244
+ --metric_for_best_model eval_exact_match_for_qqp \
245
+ --evaluation_strategy steps \
246
+ --save_strategy steps \
247
+ --save_total_limit 1 \
248
+ --load_best_model_at_end \
249
+ --lora_r 8 \
250
+ --lora_alpha 32 \
251
+ --lora_dropout 0.0 \
252
+ --data_replay_freq -1 \
253
+ --kl_ratio 0.1 \
254
+ --attn_temperature 1 \
255
+ --mlp_hidden_dim 100 \
256
+ --model_name gainlora_inflora \
257
+ --threshold 0.995 \
258
+ --transthreshold 0.995
259
+
260
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/checkpoint*
261
+
262
+ sleep 5
263
+
264
+
265
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
266
+ --do_train \
267
+ --do_predict \
268
+ --predict_with_generate \
269
+ --model_name_or_path $2 \
270
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights/trans_input.pt \
271
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights \
272
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights/prompts_keys_till_now.pt \
273
+ --data_dir CL_Benchmark \
274
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
275
+ --gen_data_dir generated_data/lora_gen_long_t5 \
276
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/boolq \
277
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq \
278
+ --per_device_train_batch_size 32 \
279
+ --per_device_eval_batch_size 256 \
280
+ --gradient_accumulation_steps 1 \
281
+ --learning_rate 0.0003 \
282
+ --num_train_epochs 10\
283
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
284
+ --max_source_length 512 \
285
+ --max_target_length 50 \
286
+ --generation_max_length 50 \
287
+ --add_task_name False \
288
+ --add_dataset_name False \
289
+ --overwrite_output_dir \
290
+ --overwrite_cache \
291
+ --lr_scheduler_type constant \
292
+ --warmup_steps 0 \
293
+ --logging_strategy steps \
294
+ --logging_steps 10 \
295
+ --metric_for_best_model eval_exact_match_for_boolq \
296
+ --evaluation_strategy steps \
297
+ --save_strategy steps \
298
+ --save_total_limit 1 \
299
+ --load_best_model_at_end \
300
+ --lora_r 8 \
301
+ --lora_alpha 32 \
302
+ --lora_dropout 0.0 \
303
+ --data_replay_freq -1 \
304
+ --kl_ratio 0.1 \
305
+ --attn_temperature 1 \
306
+ --mlp_hidden_dim 100 \
307
+ --model_name gainlora_inflora \
308
+ --threshold 0.995 \
309
+ --transthreshold 0.995
310
+
311
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/checkpoint*
312
+
313
+ sleep 5
314
+
315
+
316
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
317
+ --do_train \
318
+ --do_predict \
319
+ --predict_with_generate \
320
+ --model_name_or_path $2 \
321
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights/trans_input.pt \
322
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights \
323
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights/prompts_keys_till_now.pt \
324
+ --data_dir CL_Benchmark \
325
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
326
+ --gen_data_dir generated_data/lora_gen_long_t5 \
327
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/rte \
328
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte \
329
+ --per_device_train_batch_size 32 \
330
+ --per_device_eval_batch_size 256 \
331
+ --gradient_accumulation_steps 1 \
332
+ --learning_rate 0.0003 \
333
+ --num_train_epochs 10\
334
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
335
+ --max_source_length 512 \
336
+ --max_target_length 50 \
337
+ --generation_max_length 50 \
338
+ --add_task_name False \
339
+ --add_dataset_name False \
340
+ --overwrite_output_dir \
341
+ --overwrite_cache \
342
+ --lr_scheduler_type constant \
343
+ --warmup_steps 0 \
344
+ --logging_strategy steps \
345
+ --logging_steps 10 \
346
+ --metric_for_best_model eval_exact_match_for_rte \
347
+ --evaluation_strategy steps \
348
+ --save_strategy steps \
349
+ --save_total_limit 1 \
350
+ --load_best_model_at_end \
351
+ --lora_r 8 \
352
+ --lora_alpha 32 \
353
+ --lora_dropout 0.0 \
354
+ --data_replay_freq -1 \
355
+ --kl_ratio 0.1 \
356
+ --attn_temperature 1 \
357
+ --mlp_hidden_dim 100 \
358
+ --model_name gainlora_inflora \
359
+ --threshold 0.995 \
360
+ --transthreshold 0.995
361
+
362
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/checkpoint*
363
+
364
+ sleep 5
365
+
366
+
367
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
368
+ --do_train \
369
+ --do_predict \
370
+ --predict_with_generate \
371
+ --model_name_or_path $2 \
372
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights/trans_input.pt \
373
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights \
374
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights/prompts_keys_till_now.pt \
375
+ --data_dir CL_Benchmark \
376
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
377
+ --gen_data_dir generated_data/lora_gen_long_t5 \
378
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/imdb \
379
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb \
380
+ --per_device_train_batch_size 32 \
381
+ --per_device_eval_batch_size 256 \
382
+ --gradient_accumulation_steps 1 \
383
+ --learning_rate 0.0003 \
384
+ --num_train_epochs 10\
385
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
386
+ --max_source_length 512 \
387
+ --max_target_length 50 \
388
+ --generation_max_length 50 \
389
+ --add_task_name False \
390
+ --add_dataset_name False \
391
+ --overwrite_output_dir \
392
+ --overwrite_cache \
393
+ --lr_scheduler_type constant \
394
+ --warmup_steps 0 \
395
+ --logging_strategy steps \
396
+ --logging_steps 10 \
397
+ --metric_for_best_model eval_exact_match_for_imdb \
398
+ --evaluation_strategy steps \
399
+ --save_strategy steps \
400
+ --save_total_limit 1 \
401
+ --load_best_model_at_end \
402
+ --lora_r 8 \
403
+ --lora_alpha 32 \
404
+ --lora_dropout 0.0 \
405
+ --data_replay_freq -1 \
406
+ --kl_ratio 0.1 \
407
+ --attn_temperature 1 \
408
+ --mlp_hidden_dim 100 \
409
+ --model_name gainlora_inflora \
410
+ --threshold 0.995 \
411
+ --transthreshold 0.995
412
+
413
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/checkpoint*
414
+
415
+ sleep 5
416
+
417
+
418
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
419
+ --do_train \
420
+ --do_predict \
421
+ --predict_with_generate \
422
+ --model_name_or_path $2 \
423
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights/trans_input.pt \
424
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights \
425
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights/prompts_keys_till_now.pt \
426
+ --data_dir CL_Benchmark \
427
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
428
+ --gen_data_dir generated_data/lora_gen_long_t5 \
429
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/yelp \
430
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp \
431
+ --per_device_train_batch_size 32 \
432
+ --per_device_eval_batch_size 256 \
433
+ --gradient_accumulation_steps 1 \
434
+ --learning_rate 0.0003 \
435
+ --num_train_epochs 10\
436
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
437
+ --max_source_length 512 \
438
+ --max_target_length 50 \
439
+ --generation_max_length 50 \
440
+ --add_task_name False \
441
+ --add_dataset_name False \
442
+ --overwrite_output_dir \
443
+ --overwrite_cache \
444
+ --lr_scheduler_type constant \
445
+ --warmup_steps 0 \
446
+ --logging_strategy steps \
447
+ --logging_steps 10 \
448
+ --metric_for_best_model eval_exact_match_for_yelp \
449
+ --evaluation_strategy steps \
450
+ --save_strategy steps \
451
+ --save_total_limit 1 \
452
+ --load_best_model_at_end \
453
+ --lora_r 8 \
454
+ --lora_alpha 32 \
455
+ --lora_dropout 0.0 \
456
+ --data_replay_freq -1 \
457
+ --kl_ratio 0.1 \
458
+ --attn_temperature 1 \
459
+ --mlp_hidden_dim 100 \
460
+ --model_name gainlora_inflora \
461
+ --threshold 0.995 \
462
+ --transthreshold 0.995
463
+
464
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/checkpoint*
465
+
466
+ sleep 5
467
+
468
+
469
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
470
+ --do_train \
471
+ --do_predict \
472
+ --predict_with_generate \
473
+ --model_name_or_path $2 \
474
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights/trans_input.pt \
475
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights \
476
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights/prompts_keys_till_now.pt \
477
+ --data_dir CL_Benchmark \
478
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
479
+ --gen_data_dir generated_data/lora_gen_long_t5 \
480
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/amazon \
481
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon \
482
+ --per_device_train_batch_size 32 \
483
+ --per_device_eval_batch_size 256 \
484
+ --gradient_accumulation_steps 1 \
485
+ --learning_rate 0.0003 \
486
+ --num_train_epochs 10\
487
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
488
+ --max_source_length 512 \
489
+ --max_target_length 50 \
490
+ --generation_max_length 50 \
491
+ --add_task_name False \
492
+ --add_dataset_name False \
493
+ --overwrite_output_dir \
494
+ --overwrite_cache \
495
+ --lr_scheduler_type constant \
496
+ --warmup_steps 0 \
497
+ --logging_strategy steps \
498
+ --logging_steps 10 \
499
+ --metric_for_best_model eval_exact_match_for_amazon \
500
+ --evaluation_strategy steps \
501
+ --save_strategy steps \
502
+ --save_total_limit 1 \
503
+ --load_best_model_at_end \
504
+ --lora_r 8 \
505
+ --lora_alpha 32 \
506
+ --lora_dropout 0.0 \
507
+ --data_replay_freq -1 \
508
+ --kl_ratio 0.1 \
509
+ --attn_temperature 1 \
510
+ --mlp_hidden_dim 100 \
511
+ --model_name gainlora_inflora \
512
+ --threshold 0.995 \
513
+ --transthreshold 0.995
514
+
515
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/checkpoint*
516
+
517
+ sleep 5
518
+
519
+
520
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
521
+ --do_train \
522
+ --do_predict \
523
+ --predict_with_generate \
524
+ --model_name_or_path $2 \
525
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights/trans_input.pt \
526
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights \
527
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights/prompts_keys_till_now.pt \
528
+ --data_dir CL_Benchmark \
529
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
530
+ --gen_data_dir generated_data/lora_gen_long_t5 \
531
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/sst2 \
532
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2 \
533
+ --per_device_train_batch_size 32 \
534
+ --per_device_eval_batch_size 256 \
535
+ --gradient_accumulation_steps 1 \
536
+ --learning_rate 0.0003 \
537
+ --num_train_epochs 10\
538
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
539
+ --max_source_length 512 \
540
+ --max_target_length 50 \
541
+ --generation_max_length 50 \
542
+ --add_task_name False \
543
+ --add_dataset_name False \
544
+ --overwrite_output_dir \
545
+ --overwrite_cache \
546
+ --lr_scheduler_type constant \
547
+ --warmup_steps 0 \
548
+ --logging_strategy steps \
549
+ --logging_steps 10 \
550
+ --metric_for_best_model eval_exact_match_for_sst2 \
551
+ --evaluation_strategy steps \
552
+ --save_strategy steps \
553
+ --save_total_limit 1 \
554
+ --load_best_model_at_end \
555
+ --lora_r 8 \
556
+ --lora_alpha 32 \
557
+ --lora_dropout 0.0 \
558
+ --data_replay_freq -1 \
559
+ --kl_ratio 0.1 \
560
+ --attn_temperature 1 \
561
+ --mlp_hidden_dim 100 \
562
+ --model_name gainlora_inflora \
563
+ --threshold 0.995 \
564
+ --transthreshold 0.995
565
+
566
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/checkpoint*
567
+
568
+ sleep 5
569
+
570
+
571
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
572
+ --do_train \
573
+ --do_predict \
574
+ --predict_with_generate \
575
+ --model_name_or_path $2 \
576
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights/trans_input.pt \
577
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights \
578
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights/prompts_keys_till_now.pt \
579
+ --data_dir CL_Benchmark \
580
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
581
+ --gen_data_dir generated_data/lora_gen_long_t5 \
582
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/dbpedia \
583
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia \
584
+ --per_device_train_batch_size 32 \
585
+ --per_device_eval_batch_size 256 \
586
+ --gradient_accumulation_steps 1 \
587
+ --learning_rate 0.0003 \
588
+ --num_train_epochs 10\
589
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
590
+ --max_source_length 512 \
591
+ --max_target_length 50 \
592
+ --generation_max_length 50 \
593
+ --add_task_name False \
594
+ --add_dataset_name False \
595
+ --overwrite_output_dir \
596
+ --overwrite_cache \
597
+ --lr_scheduler_type constant \
598
+ --warmup_steps 0 \
599
+ --logging_strategy steps \
600
+ --logging_steps 10 \
601
+ --metric_for_best_model eval_exact_match_for_dbpedia \
602
+ --evaluation_strategy steps \
603
+ --save_strategy steps \
604
+ --save_total_limit 1 \
605
+ --load_best_model_at_end \
606
+ --lora_r 8 \
607
+ --lora_alpha 32 \
608
+ --lora_dropout 0.0 \
609
+ --data_replay_freq -1 \
610
+ --kl_ratio 0.1 \
611
+ --attn_temperature 1 \
612
+ --mlp_hidden_dim 100 \
613
+ --model_name gainlora_inflora \
614
+ --threshold 0.995 \
615
+ --transthreshold 0.995
616
+
617
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/checkpoint*
618
+
619
+ sleep 5
620
+
621
+
622
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
623
+ --do_train \
624
+ --do_predict \
625
+ --predict_with_generate \
626
+ --model_name_or_path $2 \
627
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/saved_weights/trans_input.pt \
628
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/saved_weights \
629
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/saved_weights/prompts_keys_till_now.pt \
630
+ --data_dir CL_Benchmark \
631
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
632
+ --gen_data_dir generated_data/lora_gen_long_t5 \
633
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/agnews \
634
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews \
635
+ --per_device_train_batch_size 32 \
636
+ --per_device_eval_batch_size 256 \
637
+ --gradient_accumulation_steps 1 \
638
+ --learning_rate 0.0003 \
639
+ --num_train_epochs 10\
640
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
641
+ --max_source_length 512 \
642
+ --max_target_length 50 \
643
+ --generation_max_length 50 \
644
+ --add_task_name False \
645
+ --add_dataset_name False \
646
+ --overwrite_output_dir \
647
+ --overwrite_cache \
648
+ --lr_scheduler_type constant \
649
+ --warmup_steps 0 \
650
+ --logging_strategy steps \
651
+ --logging_steps 10 \
652
+ --metric_for_best_model eval_exact_match_for_agnews \
653
+ --evaluation_strategy steps \
654
+ --save_strategy steps \
655
+ --save_total_limit 1 \
656
+ --load_best_model_at_end \
657
+ --lora_r 8 \
658
+ --lora_alpha 32 \
659
+ --lora_dropout 0.0 \
660
+ --data_replay_freq -1 \
661
+ --kl_ratio 0.1 \
662
+ --attn_temperature 1 \
663
+ --mlp_hidden_dim 100 \
664
+ --model_name gainlora_inflora \
665
+ --threshold 0.995 \
666
+ --transthreshold 0.995
667
+
668
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews/checkpoint*
669
+
670
+ sleep 5
671
+
672
+
673
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
674
+ --do_train \
675
+ --do_predict \
676
+ --predict_with_generate \
677
+ --model_name_or_path $2 \
678
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews/saved_weights/trans_input.pt \
679
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews/saved_weights \
680
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews/saved_weights/prompts_keys_till_now.pt \
681
+ --data_dir CL_Benchmark \
682
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
683
+ --gen_data_dir generated_data/lora_gen_long_t5 \
684
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/multirc \
685
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/14-multirc \
686
+ --per_device_train_batch_size 32 \
687
+ --per_device_eval_batch_size 256 \
688
+ --gradient_accumulation_steps 1 \
689
+ --learning_rate 0.0003 \
690
+ --num_train_epochs 10\
691
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
692
+ --max_source_length 512 \
693
+ --max_target_length 50 \
694
+ --generation_max_length 50 \
695
+ --add_task_name False \
696
+ --add_dataset_name False \
697
+ --overwrite_output_dir \
698
+ --overwrite_cache \
699
+ --lr_scheduler_type constant \
700
+ --warmup_steps 0 \
701
+ --logging_strategy steps \
702
+ --logging_steps 10 \
703
+ --metric_for_best_model eval_exact_match_for_multirc \
704
+ --evaluation_strategy steps \
705
+ --save_strategy steps \
706
+ --save_total_limit 1 \
707
+ --load_best_model_at_end \
708
+ --lora_r 8 \
709
+ --lora_alpha 32 \
710
+ --lora_dropout 0.0 \
711
+ --data_replay_freq -1 \
712
+ --kl_ratio 0.1 \
713
+ --attn_temperature 1 \
714
+ --mlp_hidden_dim 100 \
715
+ --model_name gainlora_inflora \
716
+ --threshold 0.995 \
717
+ --transthreshold 0.995
718
+
719
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/14-multirc/checkpoint*
720
+
721
+ sleep 5
722
+
723
+
724
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
725
+ --do_train \
726
+ --do_predict \
727
+ --predict_with_generate \
728
+ --model_name_or_path $2 \
729
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/14-multirc/saved_weights/trans_input.pt \
730
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/12-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/13-agnews/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/14-multirc/saved_weights \
731
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/14-multirc/saved_weights/prompts_keys_till_now.pt \
732
+ --data_dir CL_Benchmark \
733
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
734
+ --gen_data_dir generated_data/lora_gen_long_t5 \
735
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/yahoo \
736
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/15-yahoo \
737
+ --per_device_train_batch_size 32 \
738
+ --per_device_eval_batch_size 256 \
739
+ --gradient_accumulation_steps 1 \
740
+ --learning_rate 0.0003 \
741
+ --num_train_epochs 10\
742
+ --run_name gen_script_long_order4_t5_small_gainlora_inflora \
743
+ --max_source_length 512 \
744
+ --max_target_length 50 \
745
+ --generation_max_length 50 \
746
+ --add_task_name False \
747
+ --add_dataset_name False \
748
+ --overwrite_output_dir \
749
+ --overwrite_cache \
750
+ --lr_scheduler_type constant \
751
+ --warmup_steps 0 \
752
+ --logging_strategy steps \
753
+ --logging_steps 10 \
754
+ --metric_for_best_model eval_exact_match_for_yahoo \
755
+ --evaluation_strategy steps \
756
+ --save_strategy steps \
757
+ --save_total_limit 1 \
758
+ --load_best_model_at_end \
759
+ --lora_r 8 \
760
+ --lora_alpha 32 \
761
+ --lora_dropout 0.0 \
762
+ --data_replay_freq -1 \
763
+ --kl_ratio 0.1 \
764
+ --attn_temperature 1 \
765
+ --mlp_hidden_dim 100 \
766
+ --model_name gainlora_inflora \
767
+ --threshold 0.995 \
768
+ --transthreshold 0.995
769
+
770
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_gainlora_inflora/outputs/15-yahoo/checkpoint*
771
+
772
+ sleep 5
773
+
774
+ CUDA_VISIBLE_DEVICES=$1 python score.py gen_script_long_order4_t5_small_gainlora_inflora gen_script_long_order4_t5_small_gainlora_inflora
root_gainlora/T5_small/gen_script_long_order4_t5_small_inflora.sh ADDED
@@ -0,0 +1,744 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
21
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/mnli \
22
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli \
23
+ --per_device_train_batch_size 32 \
24
+ --per_device_eval_batch_size 256 \
25
+ --gradient_accumulation_steps 1 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 10 \
28
+ --run_name gen_script_long_order4_t5_small_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_exact_match \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --load_best_model_at_end \
45
+ --lora_r 8 \
46
+ --lora_alpha 32 \
47
+ --lora_dropout 0.0 \
48
+ --add_instruction_replay \
49
+ --data_replay_freq -1 \
50
+ --replay_after_n_epoch 0 \
51
+ --model_name inflora \
52
+ --threshold 0.995
53
+
54
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/checkpoint*
55
+
56
+ sleep 5
57
+
58
+
59
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
60
+ --do_train \
61
+ --do_predict \
62
+ --predict_with_generate \
63
+ --model_name_or_path $2 \
64
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights/trans_input.pt \
65
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights \
66
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights/prompts_keys_till_now.pt \
67
+ --data_dir CL_Benchmark \
68
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
69
+ --gen_data_dir generated_data/lora_gen_long_t5 \
70
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/cb \
71
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb \
72
+ --per_device_train_batch_size 32 \
73
+ --per_device_eval_batch_size 256 \
74
+ --gradient_accumulation_steps 1 \
75
+ --learning_rate 0.0003 \
76
+ --num_train_epochs 10\
77
+ --run_name gen_script_long_order4_t5_small_inflora \
78
+ --max_source_length 512 \
79
+ --max_target_length 50 \
80
+ --generation_max_length 50 \
81
+ --add_task_name False \
82
+ --add_dataset_name False \
83
+ --overwrite_output_dir \
84
+ --overwrite_cache \
85
+ --lr_scheduler_type constant \
86
+ --warmup_steps 0 \
87
+ --logging_strategy steps \
88
+ --logging_steps 10 \
89
+ --metric_for_best_model eval_exact_match_for_cb \
90
+ --evaluation_strategy steps \
91
+ --save_strategy steps \
92
+ --save_total_limit 1 \
93
+ --load_best_model_at_end \
94
+ --lora_r 8 \
95
+ --lora_alpha 32 \
96
+ --lora_dropout 0.0 \
97
+ --data_replay_freq -1 \
98
+ --kl_ratio 0.1 \
99
+ --attn_temperature 1 \
100
+ --model_name inflora \
101
+ --threshold 0.995
102
+
103
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/checkpoint*
104
+
105
+ sleep 5
106
+
107
+
108
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
109
+ --do_train \
110
+ --do_predict \
111
+ --predict_with_generate \
112
+ --model_name_or_path $2 \
113
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights/trans_input.pt \
114
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights \
115
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights/prompts_keys_till_now.pt \
116
+ --data_dir CL_Benchmark \
117
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
118
+ --gen_data_dir generated_data/lora_gen_long_t5 \
119
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/wic \
120
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic \
121
+ --per_device_train_batch_size 32 \
122
+ --per_device_eval_batch_size 256 \
123
+ --gradient_accumulation_steps 1 \
124
+ --learning_rate 0.0003 \
125
+ --num_train_epochs 10\
126
+ --run_name gen_script_long_order4_t5_small_inflora \
127
+ --max_source_length 512 \
128
+ --max_target_length 50 \
129
+ --generation_max_length 50 \
130
+ --add_task_name False \
131
+ --add_dataset_name False \
132
+ --overwrite_output_dir \
133
+ --overwrite_cache \
134
+ --lr_scheduler_type constant \
135
+ --warmup_steps 0 \
136
+ --logging_strategy steps \
137
+ --logging_steps 10 \
138
+ --metric_for_best_model eval_exact_match_for_wic \
139
+ --evaluation_strategy steps \
140
+ --save_strategy steps \
141
+ --save_total_limit 1 \
142
+ --load_best_model_at_end \
143
+ --lora_r 8 \
144
+ --lora_alpha 32 \
145
+ --lora_dropout 0.0 \
146
+ --data_replay_freq -1 \
147
+ --kl_ratio 0.1 \
148
+ --attn_temperature 1 \
149
+ --model_name inflora \
150
+ --threshold 0.995
151
+
152
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/checkpoint*
153
+
154
+ sleep 5
155
+
156
+
157
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
158
+ --do_train \
159
+ --do_predict \
160
+ --predict_with_generate \
161
+ --model_name_or_path $2 \
162
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights/trans_input.pt \
163
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights \
164
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights/prompts_keys_till_now.pt \
165
+ --data_dir CL_Benchmark \
166
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
167
+ --gen_data_dir generated_data/lora_gen_long_t5 \
168
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/copa \
169
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa \
170
+ --per_device_train_batch_size 32 \
171
+ --per_device_eval_batch_size 256 \
172
+ --gradient_accumulation_steps 1 \
173
+ --learning_rate 0.0003 \
174
+ --num_train_epochs 10\
175
+ --run_name gen_script_long_order4_t5_small_inflora \
176
+ --max_source_length 512 \
177
+ --max_target_length 50 \
178
+ --generation_max_length 50 \
179
+ --add_task_name False \
180
+ --add_dataset_name False \
181
+ --overwrite_output_dir \
182
+ --overwrite_cache \
183
+ --lr_scheduler_type constant \
184
+ --warmup_steps 0 \
185
+ --logging_strategy steps \
186
+ --logging_steps 10 \
187
+ --metric_for_best_model eval_exact_match_for_copa \
188
+ --evaluation_strategy steps \
189
+ --save_strategy steps \
190
+ --save_total_limit 1 \
191
+ --load_best_model_at_end \
192
+ --lora_r 8 \
193
+ --lora_alpha 32 \
194
+ --lora_dropout 0.0 \
195
+ --data_replay_freq -1 \
196
+ --kl_ratio 0.1 \
197
+ --attn_temperature 1 \
198
+ --model_name inflora \
199
+ --threshold 0.995
200
+
201
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/checkpoint*
202
+
203
+ sleep 5
204
+
205
+
206
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
207
+ --do_train \
208
+ --do_predict \
209
+ --predict_with_generate \
210
+ --model_name_or_path $2 \
211
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights/trans_input.pt \
212
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights \
213
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights/prompts_keys_till_now.pt \
214
+ --data_dir CL_Benchmark \
215
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
216
+ --gen_data_dir generated_data/lora_gen_long_t5 \
217
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/qqp \
218
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp \
219
+ --per_device_train_batch_size 32 \
220
+ --per_device_eval_batch_size 256 \
221
+ --gradient_accumulation_steps 1 \
222
+ --learning_rate 0.0003 \
223
+ --num_train_epochs 10\
224
+ --run_name gen_script_long_order4_t5_small_inflora \
225
+ --max_source_length 512 \
226
+ --max_target_length 50 \
227
+ --generation_max_length 50 \
228
+ --add_task_name False \
229
+ --add_dataset_name False \
230
+ --overwrite_output_dir \
231
+ --overwrite_cache \
232
+ --lr_scheduler_type constant \
233
+ --warmup_steps 0 \
234
+ --logging_strategy steps \
235
+ --logging_steps 10 \
236
+ --metric_for_best_model eval_exact_match_for_qqp \
237
+ --evaluation_strategy steps \
238
+ --save_strategy steps \
239
+ --save_total_limit 1 \
240
+ --load_best_model_at_end \
241
+ --lora_r 8 \
242
+ --lora_alpha 32 \
243
+ --lora_dropout 0.0 \
244
+ --data_replay_freq -1 \
245
+ --kl_ratio 0.1 \
246
+ --attn_temperature 1 \
247
+ --model_name inflora \
248
+ --threshold 0.995
249
+
250
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/checkpoint*
251
+
252
+ sleep 5
253
+
254
+
255
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
256
+ --do_train \
257
+ --do_predict \
258
+ --predict_with_generate \
259
+ --model_name_or_path $2 \
260
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights/trans_input.pt \
261
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights \
262
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights/prompts_keys_till_now.pt \
263
+ --data_dir CL_Benchmark \
264
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
265
+ --gen_data_dir generated_data/lora_gen_long_t5 \
266
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/boolq \
267
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq \
268
+ --per_device_train_batch_size 32 \
269
+ --per_device_eval_batch_size 256 \
270
+ --gradient_accumulation_steps 1 \
271
+ --learning_rate 0.0003 \
272
+ --num_train_epochs 10\
273
+ --run_name gen_script_long_order4_t5_small_inflora \
274
+ --max_source_length 512 \
275
+ --max_target_length 50 \
276
+ --generation_max_length 50 \
277
+ --add_task_name False \
278
+ --add_dataset_name False \
279
+ --overwrite_output_dir \
280
+ --overwrite_cache \
281
+ --lr_scheduler_type constant \
282
+ --warmup_steps 0 \
283
+ --logging_strategy steps \
284
+ --logging_steps 10 \
285
+ --metric_for_best_model eval_exact_match_for_boolq \
286
+ --evaluation_strategy steps \
287
+ --save_strategy steps \
288
+ --save_total_limit 1 \
289
+ --load_best_model_at_end \
290
+ --lora_r 8 \
291
+ --lora_alpha 32 \
292
+ --lora_dropout 0.0 \
293
+ --data_replay_freq -1 \
294
+ --kl_ratio 0.1 \
295
+ --attn_temperature 1 \
296
+ --model_name inflora \
297
+ --threshold 0.995
298
+
299
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/checkpoint*
300
+
301
+ sleep 5
302
+
303
+
304
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
305
+ --do_train \
306
+ --do_predict \
307
+ --predict_with_generate \
308
+ --model_name_or_path $2 \
309
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights/trans_input.pt \
310
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights \
311
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights/prompts_keys_till_now.pt \
312
+ --data_dir CL_Benchmark \
313
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
314
+ --gen_data_dir generated_data/lora_gen_long_t5 \
315
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/rte \
316
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte \
317
+ --per_device_train_batch_size 32 \
318
+ --per_device_eval_batch_size 256 \
319
+ --gradient_accumulation_steps 1 \
320
+ --learning_rate 0.0003 \
321
+ --num_train_epochs 10\
322
+ --run_name gen_script_long_order4_t5_small_inflora \
323
+ --max_source_length 512 \
324
+ --max_target_length 50 \
325
+ --generation_max_length 50 \
326
+ --add_task_name False \
327
+ --add_dataset_name False \
328
+ --overwrite_output_dir \
329
+ --overwrite_cache \
330
+ --lr_scheduler_type constant \
331
+ --warmup_steps 0 \
332
+ --logging_strategy steps \
333
+ --logging_steps 10 \
334
+ --metric_for_best_model eval_exact_match_for_rte \
335
+ --evaluation_strategy steps \
336
+ --save_strategy steps \
337
+ --save_total_limit 1 \
338
+ --load_best_model_at_end \
339
+ --lora_r 8 \
340
+ --lora_alpha 32 \
341
+ --lora_dropout 0.0 \
342
+ --data_replay_freq -1 \
343
+ --kl_ratio 0.1 \
344
+ --attn_temperature 1 \
345
+ --model_name inflora \
346
+ --threshold 0.995
347
+
348
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/checkpoint*
349
+
350
+ sleep 5
351
+
352
+
353
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
354
+ --do_train \
355
+ --do_predict \
356
+ --predict_with_generate \
357
+ --model_name_or_path $2 \
358
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights/trans_input.pt \
359
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights \
360
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights/prompts_keys_till_now.pt \
361
+ --data_dir CL_Benchmark \
362
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
363
+ --gen_data_dir generated_data/lora_gen_long_t5 \
364
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/imdb \
365
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb \
366
+ --per_device_train_batch_size 32 \
367
+ --per_device_eval_batch_size 256 \
368
+ --gradient_accumulation_steps 1 \
369
+ --learning_rate 0.0003 \
370
+ --num_train_epochs 10\
371
+ --run_name gen_script_long_order4_t5_small_inflora \
372
+ --max_source_length 512 \
373
+ --max_target_length 50 \
374
+ --generation_max_length 50 \
375
+ --add_task_name False \
376
+ --add_dataset_name False \
377
+ --overwrite_output_dir \
378
+ --overwrite_cache \
379
+ --lr_scheduler_type constant \
380
+ --warmup_steps 0 \
381
+ --logging_strategy steps \
382
+ --logging_steps 10 \
383
+ --metric_for_best_model eval_exact_match_for_imdb \
384
+ --evaluation_strategy steps \
385
+ --save_strategy steps \
386
+ --save_total_limit 1 \
387
+ --load_best_model_at_end \
388
+ --lora_r 8 \
389
+ --lora_alpha 32 \
390
+ --lora_dropout 0.0 \
391
+ --data_replay_freq -1 \
392
+ --kl_ratio 0.1 \
393
+ --attn_temperature 1 \
394
+ --model_name inflora \
395
+ --threshold 0.995
396
+
397
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/checkpoint*
398
+
399
+ sleep 5
400
+
401
+
402
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
403
+ --do_train \
404
+ --do_predict \
405
+ --predict_with_generate \
406
+ --model_name_or_path $2 \
407
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights/trans_input.pt \
408
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights \
409
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights/prompts_keys_till_now.pt \
410
+ --data_dir CL_Benchmark \
411
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
412
+ --gen_data_dir generated_data/lora_gen_long_t5 \
413
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/yelp \
414
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp \
415
+ --per_device_train_batch_size 32 \
416
+ --per_device_eval_batch_size 256 \
417
+ --gradient_accumulation_steps 1 \
418
+ --learning_rate 0.0003 \
419
+ --num_train_epochs 10\
420
+ --run_name gen_script_long_order4_t5_small_inflora \
421
+ --max_source_length 512 \
422
+ --max_target_length 50 \
423
+ --generation_max_length 50 \
424
+ --add_task_name False \
425
+ --add_dataset_name False \
426
+ --overwrite_output_dir \
427
+ --overwrite_cache \
428
+ --lr_scheduler_type constant \
429
+ --warmup_steps 0 \
430
+ --logging_strategy steps \
431
+ --logging_steps 10 \
432
+ --metric_for_best_model eval_exact_match_for_yelp \
433
+ --evaluation_strategy steps \
434
+ --save_strategy steps \
435
+ --save_total_limit 1 \
436
+ --load_best_model_at_end \
437
+ --lora_r 8 \
438
+ --lora_alpha 32 \
439
+ --lora_dropout 0.0 \
440
+ --data_replay_freq -1 \
441
+ --kl_ratio 0.1 \
442
+ --attn_temperature 1 \
443
+ --model_name inflora \
444
+ --threshold 0.995
445
+
446
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/checkpoint*
447
+
448
+ sleep 5
449
+
450
+
451
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
452
+ --do_train \
453
+ --do_predict \
454
+ --predict_with_generate \
455
+ --model_name_or_path $2 \
456
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights/trans_input.pt \
457
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights \
458
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights/prompts_keys_till_now.pt \
459
+ --data_dir CL_Benchmark \
460
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
461
+ --gen_data_dir generated_data/lora_gen_long_t5 \
462
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/amazon \
463
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon \
464
+ --per_device_train_batch_size 32 \
465
+ --per_device_eval_batch_size 256 \
466
+ --gradient_accumulation_steps 1 \
467
+ --learning_rate 0.0003 \
468
+ --num_train_epochs 10\
469
+ --run_name gen_script_long_order4_t5_small_inflora \
470
+ --max_source_length 512 \
471
+ --max_target_length 50 \
472
+ --generation_max_length 50 \
473
+ --add_task_name False \
474
+ --add_dataset_name False \
475
+ --overwrite_output_dir \
476
+ --overwrite_cache \
477
+ --lr_scheduler_type constant \
478
+ --warmup_steps 0 \
479
+ --logging_strategy steps \
480
+ --logging_steps 10 \
481
+ --metric_for_best_model eval_exact_match_for_amazon \
482
+ --evaluation_strategy steps \
483
+ --save_strategy steps \
484
+ --save_total_limit 1 \
485
+ --load_best_model_at_end \
486
+ --lora_r 8 \
487
+ --lora_alpha 32 \
488
+ --lora_dropout 0.0 \
489
+ --data_replay_freq -1 \
490
+ --kl_ratio 0.1 \
491
+ --attn_temperature 1 \
492
+ --model_name inflora \
493
+ --threshold 0.995
494
+
495
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/checkpoint*
496
+
497
+ sleep 5
498
+
499
+
500
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
501
+ --do_train \
502
+ --do_predict \
503
+ --predict_with_generate \
504
+ --model_name_or_path $2 \
505
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights/trans_input.pt \
506
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights \
507
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights/prompts_keys_till_now.pt \
508
+ --data_dir CL_Benchmark \
509
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
510
+ --gen_data_dir generated_data/lora_gen_long_t5 \
511
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/sst2 \
512
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2 \
513
+ --per_device_train_batch_size 32 \
514
+ --per_device_eval_batch_size 256 \
515
+ --gradient_accumulation_steps 1 \
516
+ --learning_rate 0.0003 \
517
+ --num_train_epochs 10\
518
+ --run_name gen_script_long_order4_t5_small_inflora \
519
+ --max_source_length 512 \
520
+ --max_target_length 50 \
521
+ --generation_max_length 50 \
522
+ --add_task_name False \
523
+ --add_dataset_name False \
524
+ --overwrite_output_dir \
525
+ --overwrite_cache \
526
+ --lr_scheduler_type constant \
527
+ --warmup_steps 0 \
528
+ --logging_strategy steps \
529
+ --logging_steps 10 \
530
+ --metric_for_best_model eval_exact_match_for_sst2 \
531
+ --evaluation_strategy steps \
532
+ --save_strategy steps \
533
+ --save_total_limit 1 \
534
+ --load_best_model_at_end \
535
+ --lora_r 8 \
536
+ --lora_alpha 32 \
537
+ --lora_dropout 0.0 \
538
+ --data_replay_freq -1 \
539
+ --kl_ratio 0.1 \
540
+ --attn_temperature 1 \
541
+ --model_name inflora \
542
+ --threshold 0.995
543
+
544
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/checkpoint*
545
+
546
+ sleep 5
547
+
548
+
549
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
550
+ --do_train \
551
+ --do_predict \
552
+ --predict_with_generate \
553
+ --model_name_or_path $2 \
554
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights/trans_input.pt \
555
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights \
556
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights/prompts_keys_till_now.pt \
557
+ --data_dir CL_Benchmark \
558
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
559
+ --gen_data_dir generated_data/lora_gen_long_t5 \
560
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/dbpedia \
561
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia \
562
+ --per_device_train_batch_size 32 \
563
+ --per_device_eval_batch_size 256 \
564
+ --gradient_accumulation_steps 1 \
565
+ --learning_rate 0.0003 \
566
+ --num_train_epochs 10\
567
+ --run_name gen_script_long_order4_t5_small_inflora \
568
+ --max_source_length 512 \
569
+ --max_target_length 50 \
570
+ --generation_max_length 50 \
571
+ --add_task_name False \
572
+ --add_dataset_name False \
573
+ --overwrite_output_dir \
574
+ --overwrite_cache \
575
+ --lr_scheduler_type constant \
576
+ --warmup_steps 0 \
577
+ --logging_strategy steps \
578
+ --logging_steps 10 \
579
+ --metric_for_best_model eval_exact_match_for_dbpedia \
580
+ --evaluation_strategy steps \
581
+ --save_strategy steps \
582
+ --save_total_limit 1 \
583
+ --load_best_model_at_end \
584
+ --lora_r 8 \
585
+ --lora_alpha 32 \
586
+ --lora_dropout 0.0 \
587
+ --data_replay_freq -1 \
588
+ --kl_ratio 0.1 \
589
+ --attn_temperature 1 \
590
+ --model_name inflora \
591
+ --threshold 0.995
592
+
593
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/checkpoint*
594
+
595
+ sleep 5
596
+
597
+
598
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
599
+ --do_train \
600
+ --do_predict \
601
+ --predict_with_generate \
602
+ --model_name_or_path $2 \
603
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/saved_weights/trans_input.pt \
604
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/saved_weights \
605
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/saved_weights/prompts_keys_till_now.pt \
606
+ --data_dir CL_Benchmark \
607
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
608
+ --gen_data_dir generated_data/lora_gen_long_t5 \
609
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/agnews \
610
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews \
611
+ --per_device_train_batch_size 32 \
612
+ --per_device_eval_batch_size 256 \
613
+ --gradient_accumulation_steps 1 \
614
+ --learning_rate 0.0003 \
615
+ --num_train_epochs 10\
616
+ --run_name gen_script_long_order4_t5_small_inflora \
617
+ --max_source_length 512 \
618
+ --max_target_length 50 \
619
+ --generation_max_length 50 \
620
+ --add_task_name False \
621
+ --add_dataset_name False \
622
+ --overwrite_output_dir \
623
+ --overwrite_cache \
624
+ --lr_scheduler_type constant \
625
+ --warmup_steps 0 \
626
+ --logging_strategy steps \
627
+ --logging_steps 10 \
628
+ --metric_for_best_model eval_exact_match_for_agnews \
629
+ --evaluation_strategy steps \
630
+ --save_strategy steps \
631
+ --save_total_limit 1 \
632
+ --load_best_model_at_end \
633
+ --lora_r 8 \
634
+ --lora_alpha 32 \
635
+ --lora_dropout 0.0 \
636
+ --data_replay_freq -1 \
637
+ --kl_ratio 0.1 \
638
+ --attn_temperature 1 \
639
+ --model_name inflora \
640
+ --threshold 0.995
641
+
642
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews/checkpoint*
643
+
644
+ sleep 5
645
+
646
+
647
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
648
+ --do_train \
649
+ --do_predict \
650
+ --predict_with_generate \
651
+ --model_name_or_path $2 \
652
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews/saved_weights/trans_input.pt \
653
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews/saved_weights \
654
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews/saved_weights/prompts_keys_till_now.pt \
655
+ --data_dir CL_Benchmark \
656
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
657
+ --gen_data_dir generated_data/lora_gen_long_t5 \
658
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/multirc \
659
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/14-multirc \
660
+ --per_device_train_batch_size 32 \
661
+ --per_device_eval_batch_size 256 \
662
+ --gradient_accumulation_steps 1 \
663
+ --learning_rate 0.0003 \
664
+ --num_train_epochs 10\
665
+ --run_name gen_script_long_order4_t5_small_inflora \
666
+ --max_source_length 512 \
667
+ --max_target_length 50 \
668
+ --generation_max_length 50 \
669
+ --add_task_name False \
670
+ --add_dataset_name False \
671
+ --overwrite_output_dir \
672
+ --overwrite_cache \
673
+ --lr_scheduler_type constant \
674
+ --warmup_steps 0 \
675
+ --logging_strategy steps \
676
+ --logging_steps 10 \
677
+ --metric_for_best_model eval_exact_match_for_multirc \
678
+ --evaluation_strategy steps \
679
+ --save_strategy steps \
680
+ --save_total_limit 1 \
681
+ --load_best_model_at_end \
682
+ --lora_r 8 \
683
+ --lora_alpha 32 \
684
+ --lora_dropout 0.0 \
685
+ --data_replay_freq -1 \
686
+ --kl_ratio 0.1 \
687
+ --attn_temperature 1 \
688
+ --model_name inflora \
689
+ --threshold 0.995
690
+
691
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/14-multirc/checkpoint*
692
+
693
+ sleep 5
694
+
695
+
696
+ CUDA_VISIBLE_DEVICES=1 python src/run_t5.py \
697
+ --do_train \
698
+ --do_predict \
699
+ --predict_with_generate \
700
+ --model_name_or_path $2 \
701
+ --load_checkpoint_from logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/14-multirc/saved_weights/trans_input.pt \
702
+ --previous_lora_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/1-mnli/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/2-cb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/3-wic/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/4-copa/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/5-qqp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/6-boolq/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/7-rte/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/8-imdb/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/9-yelp/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/10-amazon/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/11-sst2/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/12-dbpedia/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/13-agnews/saved_weights,logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/14-multirc/saved_weights \
703
+ --previous_prompt_key_path logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/14-multirc/saved_weights/prompts_keys_till_now.pt \
704
+ --data_dir CL_Benchmark \
705
+ --task_order mnli,cb,wic,copa,qqp,boolq,rte,imdb,yelp,amazon,sst2,dbpedia,agnews,multirc,yahoo \
706
+ --gen_data_dir generated_data/lora_gen_long_t5 \
707
+ --task_config_dir configs/gen_script_long_order4_t5_small_configs/yahoo \
708
+ --output_dir logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/15-yahoo \
709
+ --per_device_train_batch_size 32 \
710
+ --per_device_eval_batch_size 256 \
711
+ --gradient_accumulation_steps 1 \
712
+ --learning_rate 0.0003 \
713
+ --num_train_epochs 10\
714
+ --run_name gen_script_long_order4_t5_small_inflora \
715
+ --max_source_length 512 \
716
+ --max_target_length 50 \
717
+ --generation_max_length 50 \
718
+ --add_task_name False \
719
+ --add_dataset_name False \
720
+ --overwrite_output_dir \
721
+ --overwrite_cache \
722
+ --lr_scheduler_type constant \
723
+ --warmup_steps 0 \
724
+ --logging_strategy steps \
725
+ --logging_steps 10 \
726
+ --metric_for_best_model eval_exact_match_for_yahoo \
727
+ --evaluation_strategy steps \
728
+ --save_strategy steps \
729
+ --save_total_limit 1 \
730
+ --load_best_model_at_end \
731
+ --lora_r 8 \
732
+ --lora_alpha 32 \
733
+ --lora_dropout 0.0 \
734
+ --data_replay_freq -1 \
735
+ --kl_ratio 0.1 \
736
+ --attn_temperature 1 \
737
+ --model_name inflora \
738
+ --threshold 0.995
739
+
740
+ rm -rf logs_and_outputs/gen_script_long_order4_t5_small_inflora/outputs/15-yahoo/checkpoint*
741
+
742
+ sleep 5
743
+
744
+ CUDA_VISIBLE_DEVICES=1 python score.py gen_script_long_order4_t5_small_inflora gen_script_long_order4_t5_small_inflora
root_gainlora/T5_small/gen_script_superni_order1_t5_small_gainlora_inflora.sh ADDED
@@ -0,0 +1,744 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
21
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1572_samsum_summary \
22
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary \
23
+ --per_device_train_batch_size 16 \
24
+ --per_device_eval_batch_size 8 \
25
+ --gradient_accumulation_steps 2 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 100 \
28
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_rougeL \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --lora_r 4 \
45
+ --lora_alpha 32 \
46
+ --lora_dropout 0.0 \
47
+ --load_best_model_at_end \
48
+ --data_replay_freq -1 \
49
+ --replay_after_n_epoch 0 \
50
+ --kl_ratio 0.5 \
51
+ --attn_temperature 1 \
52
+ --mlp_hidden_dim 100 \
53
+ --model_name gainlora_inflora \
54
+ --threshold 0.995 \
55
+ --transthreshold 0.995
56
+
57
+
58
+
59
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
60
+ --do_train \
61
+ --do_predict \
62
+ --predict_with_generate \
63
+ --model_name_or_path $2 \
64
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights/trans_input.pt \
65
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights \
66
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights/prompts_keys_till_now.pt \
67
+ --data_dir CL_Benchmark \
68
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
69
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
70
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task363_sst2_polarity_classification \
71
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification \
72
+ --per_device_train_batch_size 16 \
73
+ --per_device_eval_batch_size 8 \
74
+ --gradient_accumulation_steps 2 \
75
+ --learning_rate 0.0003 \
76
+ --num_train_epochs 100 \
77
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
78
+ --max_source_length 512 \
79
+ --max_target_length 50 \
80
+ --generation_max_length 50 \
81
+ --add_task_name False \
82
+ --add_dataset_name False \
83
+ --overwrite_output_dir \
84
+ --overwrite_cache \
85
+ --lr_scheduler_type constant \
86
+ --warmup_steps 0 \
87
+ --logging_strategy steps \
88
+ --logging_steps 10 \
89
+ --metric_for_best_model eval_rougeL_for_task363_sst2_polarity_classification \
90
+ --evaluation_strategy steps \
91
+ --save_strategy steps \
92
+ --save_total_limit 1 \
93
+ --load_best_model_at_end \
94
+ --lora_r 4 \
95
+ --lora_alpha 32 \
96
+ --lora_dropout 0.0 \
97
+ --add_instruction_replay \
98
+ --data_replay_freq -1 \
99
+ --replay_after_n_epoch 0 \
100
+ --kl_ratio 0.5 \
101
+ --attn_temperature 1 \
102
+ --mlp_hidden_dim 100 \
103
+ --model_name gainlora_inflora \
104
+ --threshold 0.995 \
105
+ --transthreshold 0.995
106
+
107
+
108
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
109
+ --do_train \
110
+ --do_predict \
111
+ --predict_with_generate \
112
+ --model_name_or_path $2 \
113
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights/trans_input.pt \
114
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights \
115
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights/prompts_keys_till_now.pt \
116
+ --data_dir CL_Benchmark \
117
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
118
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
119
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1290_xsum_summarization \
120
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization \
121
+ --per_device_train_batch_size 16 \
122
+ --per_device_eval_batch_size 8 \
123
+ --gradient_accumulation_steps 2 \
124
+ --learning_rate 0.0003 \
125
+ --num_train_epochs 100 \
126
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
127
+ --max_source_length 512 \
128
+ --max_target_length 50 \
129
+ --generation_max_length 50 \
130
+ --add_task_name False \
131
+ --add_dataset_name False \
132
+ --overwrite_output_dir \
133
+ --overwrite_cache \
134
+ --lr_scheduler_type constant \
135
+ --warmup_steps 0 \
136
+ --logging_strategy steps \
137
+ --logging_steps 10 \
138
+ --metric_for_best_model eval_rougeL_for_task1290_xsum_summarization \
139
+ --evaluation_strategy steps \
140
+ --save_strategy steps \
141
+ --save_total_limit 1 \
142
+ --load_best_model_at_end \
143
+ --lora_r 4 \
144
+ --lora_alpha 32 \
145
+ --lora_dropout 0.0 \
146
+ --add_instruction_replay \
147
+ --data_replay_freq -1 \
148
+ --replay_after_n_epoch 0 \
149
+ --kl_ratio 0.5 \
150
+ --attn_temperature 1 \
151
+ --mlp_hidden_dim 100 \
152
+ --model_name gainlora_inflora \
153
+ --threshold 0.995 \
154
+ --transthreshold 0.995
155
+
156
+
157
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
158
+ --do_train \
159
+ --do_predict \
160
+ --predict_with_generate \
161
+ --model_name_or_path $2 \
162
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights/trans_input.pt \
163
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights \
164
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights/prompts_keys_till_now.pt \
165
+ --data_dir CL_Benchmark \
166
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
167
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
168
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task181_outcome_extraction \
169
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction \
170
+ --per_device_train_batch_size 16 \
171
+ --per_device_eval_batch_size 8 \
172
+ --gradient_accumulation_steps 2 \
173
+ --learning_rate 0.0003 \
174
+ --num_train_epochs 100 \
175
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
176
+ --max_source_length 512 \
177
+ --max_target_length 50 \
178
+ --generation_max_length 50 \
179
+ --add_task_name False \
180
+ --add_dataset_name False \
181
+ --overwrite_output_dir \
182
+ --overwrite_cache \
183
+ --lr_scheduler_type constant \
184
+ --warmup_steps 0 \
185
+ --logging_strategy steps \
186
+ --logging_steps 10 \
187
+ --metric_for_best_model eval_rougeL_for_task181_outcome_extraction \
188
+ --evaluation_strategy steps \
189
+ --save_strategy steps \
190
+ --save_total_limit 1 \
191
+ --load_best_model_at_end \
192
+ --lora_r 4 \
193
+ --lora_alpha 32 \
194
+ --lora_dropout 0.0 \
195
+ --add_instruction_replay \
196
+ --data_replay_freq -1 \
197
+ --replay_after_n_epoch 0 \
198
+ --kl_ratio 0.5 \
199
+ --attn_temperature 1 \
200
+ --mlp_hidden_dim 100 \
201
+ --model_name gainlora_inflora \
202
+ --threshold 0.995 \
203
+ --transthreshold 0.995
204
+
205
+
206
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
207
+ --do_train \
208
+ --do_predict \
209
+ --predict_with_generate \
210
+ --model_name_or_path $2 \
211
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights/trans_input.pt \
212
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights \
213
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights/prompts_keys_till_now.pt \
214
+ --data_dir CL_Benchmark \
215
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
216
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
217
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task002_quoref_answer_generation \
218
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation \
219
+ --per_device_train_batch_size 16 \
220
+ --per_device_eval_batch_size 8 \
221
+ --gradient_accumulation_steps 2 \
222
+ --learning_rate 0.0003 \
223
+ --num_train_epochs 100 \
224
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
225
+ --max_source_length 512 \
226
+ --max_target_length 50 \
227
+ --generation_max_length 50 \
228
+ --add_task_name False \
229
+ --add_dataset_name False \
230
+ --overwrite_output_dir \
231
+ --overwrite_cache \
232
+ --lr_scheduler_type constant \
233
+ --warmup_steps 0 \
234
+ --logging_strategy steps \
235
+ --logging_steps 10 \
236
+ --metric_for_best_model eval_rougeL_for_task002_quoref_answer_generation \
237
+ --evaluation_strategy steps \
238
+ --save_strategy steps \
239
+ --save_total_limit 1 \
240
+ --load_best_model_at_end \
241
+ --lora_r 4 \
242
+ --lora_alpha 32 \
243
+ --lora_dropout 0.0 \
244
+ --add_instruction_replay \
245
+ --data_replay_freq -1 \
246
+ --replay_after_n_epoch 0 \
247
+ --kl_ratio 0.5 \
248
+ --attn_temperature 1 \
249
+ --mlp_hidden_dim 100 \
250
+ --model_name gainlora_inflora \
251
+ --threshold 0.995 \
252
+ --transthreshold 0.995
253
+
254
+
255
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
256
+ --do_train \
257
+ --do_predict \
258
+ --predict_with_generate \
259
+ --model_name_or_path $2 \
260
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights/trans_input.pt \
261
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights \
262
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights/prompts_keys_till_now.pt \
263
+ --data_dir CL_Benchmark \
264
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
265
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
266
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1510_evalution_relation_extraction \
267
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction \
268
+ --per_device_train_batch_size 16 \
269
+ --per_device_eval_batch_size 8 \
270
+ --gradient_accumulation_steps 2 \
271
+ --learning_rate 0.0003 \
272
+ --num_train_epochs 100 \
273
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
274
+ --max_source_length 512 \
275
+ --max_target_length 50 \
276
+ --generation_max_length 50 \
277
+ --add_task_name False \
278
+ --add_dataset_name False \
279
+ --overwrite_output_dir \
280
+ --overwrite_cache \
281
+ --lr_scheduler_type constant \
282
+ --warmup_steps 0 \
283
+ --logging_strategy steps \
284
+ --logging_steps 10 \
285
+ --metric_for_best_model eval_rougeL_for_task1510_evalution_relation_extraction \
286
+ --evaluation_strategy steps \
287
+ --save_strategy steps \
288
+ --save_total_limit 1 \
289
+ --load_best_model_at_end \
290
+ --lora_r 4 \
291
+ --lora_alpha 32 \
292
+ --lora_dropout 0.0 \
293
+ --add_instruction_replay \
294
+ --data_replay_freq -1 \
295
+ --replay_after_n_epoch 0 \
296
+ --kl_ratio 0.5 \
297
+ --attn_temperature 1 \
298
+ --mlp_hidden_dim 100 \
299
+ --model_name gainlora_inflora \
300
+ --threshold 0.995 \
301
+ --transthreshold 0.995
302
+
303
+
304
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
305
+ --do_train \
306
+ --do_predict \
307
+ --predict_with_generate \
308
+ --model_name_or_path $2 \
309
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights/trans_input.pt \
310
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights \
311
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights/prompts_keys_till_now.pt \
312
+ --data_dir CL_Benchmark \
313
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
314
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
315
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task639_multi_woz_user_utterance_generation \
316
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation \
317
+ --per_device_train_batch_size 16 \
318
+ --per_device_eval_batch_size 8 \
319
+ --gradient_accumulation_steps 2 \
320
+ --learning_rate 0.0003 \
321
+ --num_train_epochs 100 \
322
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
323
+ --max_source_length 512 \
324
+ --max_target_length 50 \
325
+ --generation_max_length 50 \
326
+ --add_task_name False \
327
+ --add_dataset_name False \
328
+ --overwrite_output_dir \
329
+ --overwrite_cache \
330
+ --lr_scheduler_type constant \
331
+ --warmup_steps 0 \
332
+ --logging_strategy steps \
333
+ --logging_steps 10 \
334
+ --metric_for_best_model eval_rougeL_for_task639_multi_woz_user_utterance_generation \
335
+ --evaluation_strategy steps \
336
+ --save_strategy steps \
337
+ --save_total_limit 1 \
338
+ --load_best_model_at_end \
339
+ --lora_r 4 \
340
+ --lora_alpha 32 \
341
+ --lora_dropout 0.0 \
342
+ --add_instruction_replay \
343
+ --data_replay_freq -1 \
344
+ --replay_after_n_epoch 0 \
345
+ --kl_ratio 0.5 \
346
+ --attn_temperature 1 \
347
+ --mlp_hidden_dim 100 \
348
+ --model_name gainlora_inflora \
349
+ --threshold 0.995 \
350
+ --transthreshold 0.995
351
+
352
+
353
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
354
+ --do_train \
355
+ --do_predict \
356
+ --predict_with_generate \
357
+ --model_name_or_path $2 \
358
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights/trans_input.pt \
359
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights \
360
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights/prompts_keys_till_now.pt \
361
+ --data_dir CL_Benchmark \
362
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
363
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
364
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1729_personachat_generate_next \
365
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next \
366
+ --per_device_train_batch_size 16 \
367
+ --per_device_eval_batch_size 8 \
368
+ --gradient_accumulation_steps 2 \
369
+ --learning_rate 0.0003 \
370
+ --num_train_epochs 100 \
371
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
372
+ --max_source_length 512 \
373
+ --max_target_length 50 \
374
+ --generation_max_length 50 \
375
+ --add_task_name False \
376
+ --add_dataset_name False \
377
+ --overwrite_output_dir \
378
+ --overwrite_cache \
379
+ --lr_scheduler_type constant \
380
+ --warmup_steps 0 \
381
+ --logging_strategy steps \
382
+ --logging_steps 10 \
383
+ --metric_for_best_model eval_rougeL_for_task1729_personachat_generate_next \
384
+ --evaluation_strategy steps \
385
+ --save_strategy steps \
386
+ --save_total_limit 1 \
387
+ --load_best_model_at_end \
388
+ --lora_r 4 \
389
+ --lora_alpha 32 \
390
+ --lora_dropout 0.0 \
391
+ --add_instruction_replay \
392
+ --data_replay_freq -1 \
393
+ --replay_after_n_epoch 0 \
394
+ --kl_ratio 0.5 \
395
+ --attn_temperature 1 \
396
+ --mlp_hidden_dim 100 \
397
+ --model_name gainlora_inflora \
398
+ --threshold 0.995 \
399
+ --transthreshold 0.995
400
+
401
+
402
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
403
+ --do_train \
404
+ --do_predict \
405
+ --predict_with_generate \
406
+ --model_name_or_path $2 \
407
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights/trans_input.pt \
408
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights \
409
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights/prompts_keys_till_now.pt \
410
+ --data_dir CL_Benchmark \
411
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
412
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
413
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task073_commonsenseqa_answer_generation \
414
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation \
415
+ --per_device_train_batch_size 16 \
416
+ --per_device_eval_batch_size 8 \
417
+ --gradient_accumulation_steps 2 \
418
+ --learning_rate 0.0003 \
419
+ --num_train_epochs 100 \
420
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
421
+ --max_source_length 512 \
422
+ --max_target_length 50 \
423
+ --generation_max_length 50 \
424
+ --add_task_name False \
425
+ --add_dataset_name False \
426
+ --overwrite_output_dir \
427
+ --overwrite_cache \
428
+ --lr_scheduler_type constant \
429
+ --warmup_steps 0 \
430
+ --logging_strategy steps \
431
+ --logging_steps 10 \
432
+ --metric_for_best_model eval_rougeL_for_task073_commonsenseqa_answer_generation \
433
+ --evaluation_strategy steps \
434
+ --save_strategy steps \
435
+ --save_total_limit 1 \
436
+ --load_best_model_at_end \
437
+ --lora_r 4 \
438
+ --lora_alpha 32 \
439
+ --lora_dropout 0.0 \
440
+ --add_instruction_replay \
441
+ --data_replay_freq -1 \
442
+ --replay_after_n_epoch 0 \
443
+ --kl_ratio 0.5 \
444
+ --attn_temperature 1 \
445
+ --mlp_hidden_dim 100 \
446
+ --model_name gainlora_inflora \
447
+ --threshold 0.995 \
448
+ --transthreshold 0.995
449
+
450
+
451
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
452
+ --do_train \
453
+ --do_predict \
454
+ --predict_with_generate \
455
+ --model_name_or_path $2 \
456
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights/trans_input.pt \
457
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights \
458
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights/prompts_keys_till_now.pt \
459
+ --data_dir CL_Benchmark \
460
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
461
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
462
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1590_diplomacy_text_generation \
463
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation \
464
+ --per_device_train_batch_size 16 \
465
+ --per_device_eval_batch_size 8 \
466
+ --gradient_accumulation_steps 2 \
467
+ --learning_rate 0.0003 \
468
+ --num_train_epochs 100 \
469
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
470
+ --max_source_length 512 \
471
+ --max_target_length 50 \
472
+ --generation_max_length 50 \
473
+ --add_task_name False \
474
+ --add_dataset_name False \
475
+ --overwrite_output_dir \
476
+ --overwrite_cache \
477
+ --lr_scheduler_type constant \
478
+ --warmup_steps 0 \
479
+ --logging_strategy steps \
480
+ --logging_steps 10 \
481
+ --metric_for_best_model eval_rougeL_for_task1590_diplomacy_text_generation \
482
+ --evaluation_strategy steps \
483
+ --save_strategy steps \
484
+ --save_total_limit 1 \
485
+ --load_best_model_at_end \
486
+ --lora_r 4 \
487
+ --lora_alpha 32 \
488
+ --lora_dropout 0.0 \
489
+ --add_instruction_replay \
490
+ --data_replay_freq -1 \
491
+ --replay_after_n_epoch 0 \
492
+ --kl_ratio 0.5 \
493
+ --attn_temperature 1 \
494
+ --mlp_hidden_dim 100 \
495
+ --model_name gainlora_inflora \
496
+ --threshold 0.995 \
497
+ --transthreshold 0.995
498
+
499
+
500
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
501
+ --do_train \
502
+ --do_predict \
503
+ --predict_with_generate \
504
+ --model_name_or_path $2 \
505
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights/trans_input.pt \
506
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights \
507
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights/prompts_keys_till_now.pt \
508
+ --data_dir CL_Benchmark \
509
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
510
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
511
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task748_glucose_reverse_cause_event_detection \
512
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection \
513
+ --per_device_train_batch_size 16 \
514
+ --per_device_eval_batch_size 8 \
515
+ --gradient_accumulation_steps 2 \
516
+ --learning_rate 0.0003 \
517
+ --num_train_epochs 100 \
518
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
519
+ --max_source_length 512 \
520
+ --max_target_length 50 \
521
+ --generation_max_length 50 \
522
+ --add_task_name False \
523
+ --add_dataset_name False \
524
+ --overwrite_output_dir \
525
+ --overwrite_cache \
526
+ --lr_scheduler_type constant \
527
+ --warmup_steps 0 \
528
+ --logging_strategy steps \
529
+ --logging_steps 10 \
530
+ --metric_for_best_model eval_rougeL_for_task748_glucose_reverse_cause_event_detection \
531
+ --evaluation_strategy steps \
532
+ --save_strategy steps \
533
+ --save_total_limit 1 \
534
+ --load_best_model_at_end \
535
+ --lora_r 4 \
536
+ --lora_alpha 32 \
537
+ --lora_dropout 0.0 \
538
+ --add_instruction_replay \
539
+ --data_replay_freq -1 \
540
+ --replay_after_n_epoch 0 \
541
+ --kl_ratio 0.5 \
542
+ --attn_temperature 1 \
543
+ --mlp_hidden_dim 100 \
544
+ --model_name gainlora_inflora \
545
+ --threshold 0.995 \
546
+ --transthreshold 0.995
547
+
548
+
549
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
550
+ --do_train \
551
+ --do_predict \
552
+ --predict_with_generate \
553
+ --model_name_or_path $2 \
554
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights/trans_input.pt \
555
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights \
556
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights/prompts_keys_till_now.pt \
557
+ --data_dir CL_Benchmark \
558
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
559
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
560
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task511_reddit_tifu_long_text_summarization \
561
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization \
562
+ --per_device_train_batch_size 16 \
563
+ --per_device_eval_batch_size 8 \
564
+ --gradient_accumulation_steps 2 \
565
+ --learning_rate 0.0003 \
566
+ --num_train_epochs 100 \
567
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
568
+ --max_source_length 512 \
569
+ --max_target_length 50 \
570
+ --generation_max_length 50 \
571
+ --add_task_name False \
572
+ --add_dataset_name False \
573
+ --overwrite_output_dir \
574
+ --overwrite_cache \
575
+ --lr_scheduler_type constant \
576
+ --warmup_steps 0 \
577
+ --logging_strategy steps \
578
+ --logging_steps 10 \
579
+ --metric_for_best_model eval_rougeL_for_task511_reddit_tifu_long_text_summarization \
580
+ --evaluation_strategy steps \
581
+ --save_strategy steps \
582
+ --save_total_limit 1 \
583
+ --load_best_model_at_end \
584
+ --lora_r 4 \
585
+ --lora_alpha 32 \
586
+ --lora_dropout 0.0 \
587
+ --add_instruction_replay \
588
+ --data_replay_freq -1 \
589
+ --replay_after_n_epoch 0 \
590
+ --kl_ratio 0.5 \
591
+ --attn_temperature 1 \
592
+ --mlp_hidden_dim 100 \
593
+ --model_name gainlora_inflora \
594
+ --threshold 0.995 \
595
+ --transthreshold 0.995
596
+
597
+
598
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
599
+ --do_train \
600
+ --do_predict \
601
+ --predict_with_generate \
602
+ --model_name_or_path $2 \
603
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/trans_input.pt \
604
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights \
605
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/prompts_keys_till_now.pt \
606
+ --data_dir CL_Benchmark \
607
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
608
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
609
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task591_sciq_answer_generation \
610
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/13-task591_sciq_answer_generation \
611
+ --per_device_train_batch_size 16 \
612
+ --per_device_eval_batch_size 8 \
613
+ --gradient_accumulation_steps 2 \
614
+ --learning_rate 0.0003 \
615
+ --num_train_epochs 100 \
616
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
617
+ --max_source_length 512 \
618
+ --max_target_length 50 \
619
+ --generation_max_length 50 \
620
+ --add_task_name False \
621
+ --add_dataset_name False \
622
+ --overwrite_output_dir \
623
+ --overwrite_cache \
624
+ --lr_scheduler_type constant \
625
+ --warmup_steps 0 \
626
+ --logging_strategy steps \
627
+ --logging_steps 10 \
628
+ --metric_for_best_model eval_rougeL_for_task591_sciq_answer_generation \
629
+ --evaluation_strategy steps \
630
+ --save_strategy steps \
631
+ --save_total_limit 1 \
632
+ --load_best_model_at_end \
633
+ --lora_r 4 \
634
+ --lora_alpha 32 \
635
+ --lora_dropout 0.0 \
636
+ --add_instruction_replay \
637
+ --data_replay_freq -1 \
638
+ --replay_after_n_epoch 0 \
639
+ --kl_ratio 0.5 \
640
+ --attn_temperature 1 \
641
+ --mlp_hidden_dim 100 \
642
+ --model_name gainlora_inflora \
643
+ --threshold 0.995 \
644
+ --transthreshold 0.995
645
+
646
+
647
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
648
+ --do_train \
649
+ --do_predict \
650
+ --predict_with_generate \
651
+ --model_name_or_path $2 \
652
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/13-task591_sciq_answer_generation/saved_weights/trans_input.pt \
653
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/13-task591_sciq_answer_generation/saved_weights \
654
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/13-task591_sciq_answer_generation/saved_weights/prompts_keys_till_now.pt \
655
+ --data_dir CL_Benchmark \
656
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
657
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
658
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1687_sentiment140_classification \
659
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/14-task1687_sentiment140_classification \
660
+ --per_device_train_batch_size 16 \
661
+ --per_device_eval_batch_size 8 \
662
+ --gradient_accumulation_steps 2 \
663
+ --learning_rate 0.0003 \
664
+ --num_train_epochs 100 \
665
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
666
+ --max_source_length 512 \
667
+ --max_target_length 50 \
668
+ --generation_max_length 50 \
669
+ --add_task_name False \
670
+ --add_dataset_name False \
671
+ --overwrite_output_dir \
672
+ --overwrite_cache \
673
+ --lr_scheduler_type constant \
674
+ --warmup_steps 0 \
675
+ --logging_strategy steps \
676
+ --logging_steps 10 \
677
+ --metric_for_best_model eval_rougeL_for_task1687_sentiment140_classification \
678
+ --evaluation_strategy steps \
679
+ --save_strategy steps \
680
+ --save_total_limit 1 \
681
+ --load_best_model_at_end \
682
+ --lora_r 4 \
683
+ --lora_alpha 32 \
684
+ --lora_dropout 0.0 \
685
+ --add_instruction_replay \
686
+ --data_replay_freq -1 \
687
+ --replay_after_n_epoch 0 \
688
+ --kl_ratio 0.5 \
689
+ --attn_temperature 1 \
690
+ --mlp_hidden_dim 100 \
691
+ --model_name gainlora_inflora \
692
+ --threshold 0.995 \
693
+ --transthreshold 0.995
694
+
695
+
696
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
697
+ --do_train \
698
+ --do_predict \
699
+ --predict_with_generate \
700
+ --model_name_or_path $2 \
701
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/14-task1687_sentiment140_classification/saved_weights/trans_input.pt \
702
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/13-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/14-task1687_sentiment140_classification/saved_weights \
703
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/14-task1687_sentiment140_classification/saved_weights/prompts_keys_till_now.pt \
704
+ --data_dir CL_Benchmark \
705
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
706
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
707
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task875_emotion_classification \
708
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_gainlora_inflora/outputs/15-task875_emotion_classification \
709
+ --per_device_train_batch_size 16 \
710
+ --per_device_eval_batch_size 8 \
711
+ --gradient_accumulation_steps 2 \
712
+ --learning_rate 0.0003 \
713
+ --num_train_epochs 100 \
714
+ --run_name gen_script_superni_order1_t5_small_gainlora_inflora \
715
+ --max_source_length 512 \
716
+ --max_target_length 50 \
717
+ --generation_max_length 50 \
718
+ --add_task_name False \
719
+ --add_dataset_name False \
720
+ --overwrite_output_dir \
721
+ --overwrite_cache \
722
+ --lr_scheduler_type constant \
723
+ --warmup_steps 0 \
724
+ --logging_strategy steps \
725
+ --logging_steps 10 \
726
+ --metric_for_best_model eval_rougeL_for_task875_emotion_classification \
727
+ --evaluation_strategy steps \
728
+ --save_strategy steps \
729
+ --save_total_limit 1 \
730
+ --load_best_model_at_end \
731
+ --lora_r 4 \
732
+ --lora_alpha 32 \
733
+ --lora_dropout 0.0 \
734
+ --add_instruction_replay \
735
+ --data_replay_freq -1 \
736
+ --replay_after_n_epoch 0 \
737
+ --kl_ratio 0.5 \
738
+ --attn_temperature 1 \
739
+ --mlp_hidden_dim 100 \
740
+ --model_name gainlora_inflora \
741
+ --threshold 0.995 \
742
+ --transthreshold 0.995
743
+
744
+ python score.py gen_script_superni_order1_t5_small_gainlora_inflora gen_script_superni_order1_t5_small_gainlora_inflora
root_gainlora/T5_small/gen_script_superni_order1_t5_small_inflora.sh ADDED
@@ -0,0 +1,713 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
21
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1572_samsum_summary \
22
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary \
23
+ --per_device_train_batch_size 16 \
24
+ --per_device_eval_batch_size 8 \
25
+ --gradient_accumulation_steps 2 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 100 \
28
+ --run_name gen_script_superni_order1_t5_small_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_rougeL \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --lora_r 4 \
45
+ --lora_alpha 32 \
46
+ --lora_dropout 0.0 \
47
+ --load_best_model_at_end \
48
+ --data_replay_freq -1 \
49
+ --replay_after_n_epoch 0 \
50
+ --kl_ratio 0.5 \
51
+ --attn_temperature 1 \
52
+ --model_name inflora \
53
+ --threshold 0.995
54
+
55
+
56
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
57
+ --do_train \
58
+ --do_predict \
59
+ --predict_with_generate \
60
+ --model_name_or_path $2 \
61
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights/trans_input.pt \
62
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights \
63
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights/prompts_keys_till_now.pt \
64
+ --data_dir CL_Benchmark \
65
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
66
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
67
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task363_sst2_polarity_classification \
68
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification \
69
+ --per_device_train_batch_size 16 \
70
+ --per_device_eval_batch_size 8 \
71
+ --gradient_accumulation_steps 2 \
72
+ --learning_rate 0.0003 \
73
+ --num_train_epochs 100 \
74
+ --run_name gen_script_superni_order1_t5_small_inflora \
75
+ --max_source_length 512 \
76
+ --max_target_length 50 \
77
+ --generation_max_length 50 \
78
+ --add_task_name False \
79
+ --add_dataset_name False \
80
+ --overwrite_output_dir \
81
+ --overwrite_cache \
82
+ --lr_scheduler_type constant \
83
+ --warmup_steps 0 \
84
+ --logging_strategy steps \
85
+ --logging_steps 10 \
86
+ --metric_for_best_model eval_rougeL_for_task363_sst2_polarity_classification \
87
+ --evaluation_strategy steps \
88
+ --save_strategy steps \
89
+ --save_total_limit 1 \
90
+ --load_best_model_at_end \
91
+ --lora_r 4 \
92
+ --lora_alpha 32 \
93
+ --lora_dropout 0.0 \
94
+ --add_instruction_replay \
95
+ --data_replay_freq -1 \
96
+ --replay_after_n_epoch 0 \
97
+ --kl_ratio 0.5 \
98
+ --attn_temperature 1 \
99
+ --model_name inflora \
100
+ --threshold 0.995
101
+
102
+
103
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
104
+ --do_train \
105
+ --do_predict \
106
+ --predict_with_generate \
107
+ --model_name_or_path $2 \
108
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights/trans_input.pt \
109
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights \
110
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights/prompts_keys_till_now.pt \
111
+ --data_dir CL_Benchmark \
112
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
113
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
114
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1290_xsum_summarization \
115
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization \
116
+ --per_device_train_batch_size 16 \
117
+ --per_device_eval_batch_size 8 \
118
+ --gradient_accumulation_steps 2 \
119
+ --learning_rate 0.0003 \
120
+ --num_train_epochs 100 \
121
+ --run_name gen_script_superni_order1_t5_small_inflora \
122
+ --max_source_length 512 \
123
+ --max_target_length 50 \
124
+ --generation_max_length 50 \
125
+ --add_task_name False \
126
+ --add_dataset_name False \
127
+ --overwrite_output_dir \
128
+ --overwrite_cache \
129
+ --lr_scheduler_type constant \
130
+ --warmup_steps 0 \
131
+ --logging_strategy steps \
132
+ --logging_steps 10 \
133
+ --metric_for_best_model eval_rougeL_for_task1290_xsum_summarization \
134
+ --evaluation_strategy steps \
135
+ --save_strategy steps \
136
+ --save_total_limit 1 \
137
+ --load_best_model_at_end \
138
+ --lora_r 4 \
139
+ --lora_alpha 32 \
140
+ --lora_dropout 0.0 \
141
+ --add_instruction_replay \
142
+ --data_replay_freq -1 \
143
+ --replay_after_n_epoch 0 \
144
+ --kl_ratio 0.5 \
145
+ --attn_temperature 1 \
146
+ --model_name inflora \
147
+ --threshold 0.995
148
+
149
+
150
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
151
+ --do_train \
152
+ --do_predict \
153
+ --predict_with_generate \
154
+ --model_name_or_path $2 \
155
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights/trans_input.pt \
156
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights \
157
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights/prompts_keys_till_now.pt \
158
+ --data_dir CL_Benchmark \
159
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
160
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
161
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task181_outcome_extraction \
162
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction \
163
+ --per_device_train_batch_size 16 \
164
+ --per_device_eval_batch_size 8 \
165
+ --gradient_accumulation_steps 2 \
166
+ --learning_rate 0.0003 \
167
+ --num_train_epochs 100 \
168
+ --run_name gen_script_superni_order1_t5_small_inflora \
169
+ --max_source_length 512 \
170
+ --max_target_length 50 \
171
+ --generation_max_length 50 \
172
+ --add_task_name False \
173
+ --add_dataset_name False \
174
+ --overwrite_output_dir \
175
+ --overwrite_cache \
176
+ --lr_scheduler_type constant \
177
+ --warmup_steps 0 \
178
+ --logging_strategy steps \
179
+ --logging_steps 10 \
180
+ --metric_for_best_model eval_rougeL_for_task181_outcome_extraction \
181
+ --evaluation_strategy steps \
182
+ --save_strategy steps \
183
+ --save_total_limit 1 \
184
+ --load_best_model_at_end \
185
+ --lora_r 4 \
186
+ --lora_alpha 32 \
187
+ --lora_dropout 0.0 \
188
+ --add_instruction_replay \
189
+ --data_replay_freq -1 \
190
+ --replay_after_n_epoch 0 \
191
+ --kl_ratio 0.5 \
192
+ --attn_temperature 1 \
193
+ --model_name inflora \
194
+ --threshold 0.995
195
+
196
+
197
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
198
+ --do_train \
199
+ --do_predict \
200
+ --predict_with_generate \
201
+ --model_name_or_path $2 \
202
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights/trans_input.pt \
203
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights \
204
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights/prompts_keys_till_now.pt \
205
+ --data_dir CL_Benchmark \
206
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
207
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
208
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task002_quoref_answer_generation \
209
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation \
210
+ --per_device_train_batch_size 16 \
211
+ --per_device_eval_batch_size 8 \
212
+ --gradient_accumulation_steps 2 \
213
+ --learning_rate 0.0003 \
214
+ --num_train_epochs 100 \
215
+ --run_name gen_script_superni_order1_t5_small_inflora \
216
+ --max_source_length 512 \
217
+ --max_target_length 50 \
218
+ --generation_max_length 50 \
219
+ --add_task_name False \
220
+ --add_dataset_name False \
221
+ --overwrite_output_dir \
222
+ --overwrite_cache \
223
+ --lr_scheduler_type constant \
224
+ --warmup_steps 0 \
225
+ --logging_strategy steps \
226
+ --logging_steps 10 \
227
+ --metric_for_best_model eval_rougeL_for_task002_quoref_answer_generation \
228
+ --evaluation_strategy steps \
229
+ --save_strategy steps \
230
+ --save_total_limit 1 \
231
+ --load_best_model_at_end \
232
+ --lora_r 4 \
233
+ --lora_alpha 32 \
234
+ --lora_dropout 0.0 \
235
+ --add_instruction_replay \
236
+ --data_replay_freq -1 \
237
+ --replay_after_n_epoch 0 \
238
+ --kl_ratio 0.5 \
239
+ --attn_temperature 1 \
240
+ --model_name inflora \
241
+ --threshold 0.995
242
+
243
+
244
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
245
+ --do_train \
246
+ --do_predict \
247
+ --predict_with_generate \
248
+ --model_name_or_path $2 \
249
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights/trans_input.pt \
250
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights \
251
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights/prompts_keys_till_now.pt \
252
+ --data_dir CL_Benchmark \
253
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
254
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
255
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1510_evalution_relation_extraction \
256
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction \
257
+ --per_device_train_batch_size 16 \
258
+ --per_device_eval_batch_size 8 \
259
+ --gradient_accumulation_steps 2 \
260
+ --learning_rate 0.0003 \
261
+ --num_train_epochs 100 \
262
+ --run_name gen_script_superni_order1_t5_small_inflora \
263
+ --max_source_length 512 \
264
+ --max_target_length 50 \
265
+ --generation_max_length 50 \
266
+ --add_task_name False \
267
+ --add_dataset_name False \
268
+ --overwrite_output_dir \
269
+ --overwrite_cache \
270
+ --lr_scheduler_type constant \
271
+ --warmup_steps 0 \
272
+ --logging_strategy steps \
273
+ --logging_steps 10 \
274
+ --metric_for_best_model eval_rougeL_for_task1510_evalution_relation_extraction \
275
+ --evaluation_strategy steps \
276
+ --save_strategy steps \
277
+ --save_total_limit 1 \
278
+ --load_best_model_at_end \
279
+ --lora_r 4 \
280
+ --lora_alpha 32 \
281
+ --lora_dropout 0.0 \
282
+ --add_instruction_replay \
283
+ --data_replay_freq -1 \
284
+ --replay_after_n_epoch 0 \
285
+ --kl_ratio 0.5 \
286
+ --attn_temperature 1 \
287
+ --model_name inflora \
288
+ --threshold 0.995
289
+
290
+
291
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
292
+ --do_train \
293
+ --do_predict \
294
+ --predict_with_generate \
295
+ --model_name_or_path $2 \
296
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights/trans_input.pt \
297
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights \
298
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights/prompts_keys_till_now.pt \
299
+ --data_dir CL_Benchmark \
300
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
301
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
302
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task639_multi_woz_user_utterance_generation \
303
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation \
304
+ --per_device_train_batch_size 16 \
305
+ --per_device_eval_batch_size 8 \
306
+ --gradient_accumulation_steps 2 \
307
+ --learning_rate 0.0003 \
308
+ --num_train_epochs 100 \
309
+ --run_name gen_script_superni_order1_t5_small_inflora \
310
+ --max_source_length 512 \
311
+ --max_target_length 50 \
312
+ --generation_max_length 50 \
313
+ --add_task_name False \
314
+ --add_dataset_name False \
315
+ --overwrite_output_dir \
316
+ --overwrite_cache \
317
+ --lr_scheduler_type constant \
318
+ --warmup_steps 0 \
319
+ --logging_strategy steps \
320
+ --logging_steps 10 \
321
+ --metric_for_best_model eval_rougeL_for_task639_multi_woz_user_utterance_generation \
322
+ --evaluation_strategy steps \
323
+ --save_strategy steps \
324
+ --save_total_limit 1 \
325
+ --load_best_model_at_end \
326
+ --lora_r 4 \
327
+ --lora_alpha 32 \
328
+ --lora_dropout 0.0 \
329
+ --add_instruction_replay \
330
+ --data_replay_freq -1 \
331
+ --replay_after_n_epoch 0 \
332
+ --kl_ratio 0.5 \
333
+ --attn_temperature 1 \
334
+ --model_name inflora \
335
+ --threshold 0.995
336
+
337
+
338
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
339
+ --do_train \
340
+ --do_predict \
341
+ --predict_with_generate \
342
+ --model_name_or_path $2 \
343
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights/trans_input.pt \
344
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights \
345
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights/prompts_keys_till_now.pt \
346
+ --data_dir CL_Benchmark \
347
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
348
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
349
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1729_personachat_generate_next \
350
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next \
351
+ --per_device_train_batch_size 16 \
352
+ --per_device_eval_batch_size 8 \
353
+ --gradient_accumulation_steps 2 \
354
+ --learning_rate 0.0003 \
355
+ --num_train_epochs 100 \
356
+ --run_name gen_script_superni_order1_t5_small_inflora \
357
+ --max_source_length 512 \
358
+ --max_target_length 50 \
359
+ --generation_max_length 50 \
360
+ --add_task_name False \
361
+ --add_dataset_name False \
362
+ --overwrite_output_dir \
363
+ --overwrite_cache \
364
+ --lr_scheduler_type constant \
365
+ --warmup_steps 0 \
366
+ --logging_strategy steps \
367
+ --logging_steps 10 \
368
+ --metric_for_best_model eval_rougeL_for_task1729_personachat_generate_next \
369
+ --evaluation_strategy steps \
370
+ --save_strategy steps \
371
+ --save_total_limit 1 \
372
+ --load_best_model_at_end \
373
+ --lora_r 4 \
374
+ --lora_alpha 32 \
375
+ --lora_dropout 0.0 \
376
+ --add_instruction_replay \
377
+ --data_replay_freq -1 \
378
+ --replay_after_n_epoch 0 \
379
+ --kl_ratio 0.5 \
380
+ --attn_temperature 1 \
381
+ --model_name inflora \
382
+ --threshold 0.995
383
+
384
+
385
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
386
+ --do_train \
387
+ --do_predict \
388
+ --predict_with_generate \
389
+ --model_name_or_path $2 \
390
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights/trans_input.pt \
391
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights \
392
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights/prompts_keys_till_now.pt \
393
+ --data_dir CL_Benchmark \
394
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
395
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
396
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task073_commonsenseqa_answer_generation \
397
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation \
398
+ --per_device_train_batch_size 16 \
399
+ --per_device_eval_batch_size 8 \
400
+ --gradient_accumulation_steps 2 \
401
+ --learning_rate 0.0003 \
402
+ --num_train_epochs 100 \
403
+ --run_name gen_script_superni_order1_t5_small_inflora \
404
+ --max_source_length 512 \
405
+ --max_target_length 50 \
406
+ --generation_max_length 50 \
407
+ --add_task_name False \
408
+ --add_dataset_name False \
409
+ --overwrite_output_dir \
410
+ --overwrite_cache \
411
+ --lr_scheduler_type constant \
412
+ --warmup_steps 0 \
413
+ --logging_strategy steps \
414
+ --logging_steps 10 \
415
+ --metric_for_best_model eval_rougeL_for_task073_commonsenseqa_answer_generation \
416
+ --evaluation_strategy steps \
417
+ --save_strategy steps \
418
+ --save_total_limit 1 \
419
+ --load_best_model_at_end \
420
+ --lora_r 4 \
421
+ --lora_alpha 32 \
422
+ --lora_dropout 0.0 \
423
+ --add_instruction_replay \
424
+ --data_replay_freq -1 \
425
+ --replay_after_n_epoch 0 \
426
+ --kl_ratio 0.5 \
427
+ --attn_temperature 1 \
428
+ --model_name inflora \
429
+ --threshold 0.995
430
+
431
+
432
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
433
+ --do_train \
434
+ --do_predict \
435
+ --predict_with_generate \
436
+ --model_name_or_path $2 \
437
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights/trans_input.pt \
438
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights \
439
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights/prompts_keys_till_now.pt \
440
+ --data_dir CL_Benchmark \
441
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
442
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
443
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1590_diplomacy_text_generation \
444
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation \
445
+ --per_device_train_batch_size 16 \
446
+ --per_device_eval_batch_size 8 \
447
+ --gradient_accumulation_steps 2 \
448
+ --learning_rate 0.0003 \
449
+ --num_train_epochs 100 \
450
+ --run_name gen_script_superni_order1_t5_small_inflora \
451
+ --max_source_length 512 \
452
+ --max_target_length 50 \
453
+ --generation_max_length 50 \
454
+ --add_task_name False \
455
+ --add_dataset_name False \
456
+ --overwrite_output_dir \
457
+ --overwrite_cache \
458
+ --lr_scheduler_type constant \
459
+ --warmup_steps 0 \
460
+ --logging_strategy steps \
461
+ --logging_steps 10 \
462
+ --metric_for_best_model eval_rougeL_for_task1590_diplomacy_text_generation \
463
+ --evaluation_strategy steps \
464
+ --save_strategy steps \
465
+ --save_total_limit 1 \
466
+ --load_best_model_at_end \
467
+ --lora_r 4 \
468
+ --lora_alpha 32 \
469
+ --lora_dropout 0.0 \
470
+ --add_instruction_replay \
471
+ --data_replay_freq -1 \
472
+ --replay_after_n_epoch 0 \
473
+ --kl_ratio 0.5 \
474
+ --attn_temperature 1 \
475
+ --model_name inflora \
476
+ --threshold 0.995
477
+
478
+
479
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
480
+ --do_train \
481
+ --do_predict \
482
+ --predict_with_generate \
483
+ --model_name_or_path $2 \
484
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights/trans_input.pt \
485
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights \
486
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights/prompts_keys_till_now.pt \
487
+ --data_dir CL_Benchmark \
488
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
489
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
490
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task748_glucose_reverse_cause_event_detection \
491
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection \
492
+ --per_device_train_batch_size 16 \
493
+ --per_device_eval_batch_size 8 \
494
+ --gradient_accumulation_steps 2 \
495
+ --learning_rate 0.0003 \
496
+ --num_train_epochs 100 \
497
+ --run_name gen_script_superni_order1_t5_small_inflora \
498
+ --max_source_length 512 \
499
+ --max_target_length 50 \
500
+ --generation_max_length 50 \
501
+ --add_task_name False \
502
+ --add_dataset_name False \
503
+ --overwrite_output_dir \
504
+ --overwrite_cache \
505
+ --lr_scheduler_type constant \
506
+ --warmup_steps 0 \
507
+ --logging_strategy steps \
508
+ --logging_steps 10 \
509
+ --metric_for_best_model eval_rougeL_for_task748_glucose_reverse_cause_event_detection \
510
+ --evaluation_strategy steps \
511
+ --save_strategy steps \
512
+ --save_total_limit 1 \
513
+ --load_best_model_at_end \
514
+ --lora_r 4 \
515
+ --lora_alpha 32 \
516
+ --lora_dropout 0.0 \
517
+ --add_instruction_replay \
518
+ --data_replay_freq -1 \
519
+ --replay_after_n_epoch 0 \
520
+ --kl_ratio 0.5 \
521
+ --attn_temperature 1 \
522
+ --model_name inflora \
523
+ --threshold 0.995
524
+
525
+
526
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
527
+ --do_train \
528
+ --do_predict \
529
+ --predict_with_generate \
530
+ --model_name_or_path $2 \
531
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights/trans_input.pt \
532
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights \
533
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights/prompts_keys_till_now.pt \
534
+ --data_dir CL_Benchmark \
535
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
536
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
537
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task511_reddit_tifu_long_text_summarization \
538
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization \
539
+ --per_device_train_batch_size 16 \
540
+ --per_device_eval_batch_size 8 \
541
+ --gradient_accumulation_steps 2 \
542
+ --learning_rate 0.0003 \
543
+ --num_train_epochs 100 \
544
+ --run_name gen_script_superni_order1_t5_small_inflora \
545
+ --max_source_length 512 \
546
+ --max_target_length 50 \
547
+ --generation_max_length 50 \
548
+ --add_task_name False \
549
+ --add_dataset_name False \
550
+ --overwrite_output_dir \
551
+ --overwrite_cache \
552
+ --lr_scheduler_type constant \
553
+ --warmup_steps 0 \
554
+ --logging_strategy steps \
555
+ --logging_steps 10 \
556
+ --metric_for_best_model eval_rougeL_for_task511_reddit_tifu_long_text_summarization \
557
+ --evaluation_strategy steps \
558
+ --save_strategy steps \
559
+ --save_total_limit 1 \
560
+ --load_best_model_at_end \
561
+ --lora_r 4 \
562
+ --lora_alpha 32 \
563
+ --lora_dropout 0.0 \
564
+ --add_instruction_replay \
565
+ --data_replay_freq -1 \
566
+ --replay_after_n_epoch 0 \
567
+ --kl_ratio 0.5 \
568
+ --attn_temperature 1 \
569
+ --model_name inflora \
570
+ --threshold 0.995
571
+
572
+
573
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
574
+ --do_train \
575
+ --do_predict \
576
+ --predict_with_generate \
577
+ --model_name_or_path $2 \
578
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/trans_input.pt \
579
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights \
580
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/prompts_keys_till_now.pt \
581
+ --data_dir CL_Benchmark \
582
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
583
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
584
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task591_sciq_answer_generation \
585
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/13-task591_sciq_answer_generation \
586
+ --per_device_train_batch_size 16 \
587
+ --per_device_eval_batch_size 8 \
588
+ --gradient_accumulation_steps 2 \
589
+ --learning_rate 0.0003 \
590
+ --num_train_epochs 100 \
591
+ --run_name gen_script_superni_order1_t5_small_inflora \
592
+ --max_source_length 512 \
593
+ --max_target_length 50 \
594
+ --generation_max_length 50 \
595
+ --add_task_name False \
596
+ --add_dataset_name False \
597
+ --overwrite_output_dir \
598
+ --overwrite_cache \
599
+ --lr_scheduler_type constant \
600
+ --warmup_steps 0 \
601
+ --logging_strategy steps \
602
+ --logging_steps 10 \
603
+ --metric_for_best_model eval_rougeL_for_task591_sciq_answer_generation \
604
+ --evaluation_strategy steps \
605
+ --save_strategy steps \
606
+ --save_total_limit 1 \
607
+ --load_best_model_at_end \
608
+ --lora_r 4 \
609
+ --lora_alpha 32 \
610
+ --lora_dropout 0.0 \
611
+ --add_instruction_replay \
612
+ --data_replay_freq -1 \
613
+ --replay_after_n_epoch 0 \
614
+ --kl_ratio 0.5 \
615
+ --attn_temperature 1 \
616
+ --model_name inflora \
617
+ --threshold 0.995
618
+
619
+
620
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
621
+ --do_train \
622
+ --do_predict \
623
+ --predict_with_generate \
624
+ --model_name_or_path $2 \
625
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/13-task591_sciq_answer_generation/saved_weights/trans_input.pt \
626
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/13-task591_sciq_answer_generation/saved_weights \
627
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/13-task591_sciq_answer_generation/saved_weights/prompts_keys_till_now.pt \
628
+ --data_dir CL_Benchmark \
629
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
630
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
631
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task1687_sentiment140_classification \
632
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/14-task1687_sentiment140_classification \
633
+ --per_device_train_batch_size 16 \
634
+ --per_device_eval_batch_size 8 \
635
+ --gradient_accumulation_steps 2 \
636
+ --learning_rate 0.0003 \
637
+ --num_train_epochs 100 \
638
+ --run_name gen_script_superni_order1_t5_small_inflora \
639
+ --max_source_length 512 \
640
+ --max_target_length 50 \
641
+ --generation_max_length 50 \
642
+ --add_task_name False \
643
+ --add_dataset_name False \
644
+ --overwrite_output_dir \
645
+ --overwrite_cache \
646
+ --lr_scheduler_type constant \
647
+ --warmup_steps 0 \
648
+ --logging_strategy steps \
649
+ --logging_steps 10 \
650
+ --metric_for_best_model eval_rougeL_for_task1687_sentiment140_classification \
651
+ --evaluation_strategy steps \
652
+ --save_strategy steps \
653
+ --save_total_limit 1 \
654
+ --load_best_model_at_end \
655
+ --lora_r 4 \
656
+ --lora_alpha 32 \
657
+ --lora_dropout 0.0 \
658
+ --add_instruction_replay \
659
+ --data_replay_freq -1 \
660
+ --replay_after_n_epoch 0 \
661
+ --kl_ratio 0.5 \
662
+ --attn_temperature 1 \
663
+ --model_name inflora \
664
+ --threshold 0.995
665
+
666
+
667
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
668
+ --do_train \
669
+ --do_predict \
670
+ --predict_with_generate \
671
+ --model_name_or_path $2 \
672
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/14-task1687_sentiment140_classification/saved_weights/trans_input.pt \
673
+ --previous_lora_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/1-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/2-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/3-task1290_xsum_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/4-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/5-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/6-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/7-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/8-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/9-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/10-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/11-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/13-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/14-task1687_sentiment140_classification/saved_weights \
674
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/14-task1687_sentiment140_classification/saved_weights/prompts_keys_till_now.pt \
675
+ --data_dir CL_Benchmark \
676
+ --task_order task1572_samsum_summary,task363_sst2_polarity_classification,task1290_xsum_summarization,task181_outcome_extraction,task002_quoref_answer_generation,task1510_evalution_relation_extraction,task639_multi_woz_user_utterance_generation,task1729_personachat_generate_next,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task748_glucose_reverse_cause_event_detection,task511_reddit_tifu_long_text_summarization,task591_sciq_answer_generation,task1687_sentiment140_classification,task875_emotion_classification \
677
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
678
+ --task_config_dir configs/gen_script_superni_order1_t5_small_configs/task875_emotion_classification \
679
+ --output_dir logs_and_outputs/gen_script_superni_order1_t5_small_inflora/outputs/15-task875_emotion_classification \
680
+ --per_device_train_batch_size 16 \
681
+ --per_device_eval_batch_size 8 \
682
+ --gradient_accumulation_steps 2 \
683
+ --learning_rate 0.0003 \
684
+ --num_train_epochs 100 \
685
+ --run_name gen_script_superni_order1_t5_small_inflora \
686
+ --max_source_length 512 \
687
+ --max_target_length 50 \
688
+ --generation_max_length 50 \
689
+ --add_task_name False \
690
+ --add_dataset_name False \
691
+ --overwrite_output_dir \
692
+ --overwrite_cache \
693
+ --lr_scheduler_type constant \
694
+ --warmup_steps 0 \
695
+ --logging_strategy steps \
696
+ --logging_steps 10 \
697
+ --metric_for_best_model eval_rougeL_for_task875_emotion_classification \
698
+ --evaluation_strategy steps \
699
+ --save_strategy steps \
700
+ --save_total_limit 1 \
701
+ --load_best_model_at_end \
702
+ --lora_r 4 \
703
+ --lora_alpha 32 \
704
+ --lora_dropout 0.0 \
705
+ --add_instruction_replay \
706
+ --data_replay_freq -1 \
707
+ --replay_after_n_epoch 0 \
708
+ --kl_ratio 0.5 \
709
+ --attn_temperature 1 \
710
+ --model_name inflora \
711
+ --threshold 0.995
712
+
713
+ python score.py gen_script_superni_order1_t5_small_inflora gen_script_superni_order1_t5_small_inflora
root_gainlora/T5_small/gen_script_superni_order2_t5_small_gainlora_inflora.sh ADDED
@@ -0,0 +1,743 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
21
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task748_glucose_reverse_cause_event_detection \
22
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection \
23
+ --per_device_train_batch_size 16 \
24
+ --per_device_eval_batch_size 8 \
25
+ --gradient_accumulation_steps 2 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 100 \
28
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_rougeL \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --lora_r 4 \
45
+ --lora_alpha 32 \
46
+ --lora_dropout 0.0 \
47
+ --load_best_model_at_end \
48
+ --data_replay_freq -1 \
49
+ --replay_after_n_epoch 0 \
50
+ --model_name gainlora_inflora \
51
+ --mlp_hidden_dim 100 \
52
+ --threshold 0.995 \
53
+ --transthreshold 0.995 \
54
+ --kl_ratio 0.5 \
55
+ --attn_temperature 1
56
+
57
+
58
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
59
+ --do_train \
60
+ --do_predict \
61
+ --predict_with_generate \
62
+ --model_name_or_path $2 \
63
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights/trans_input.pt \
64
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights \
65
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights/prompts_keys_till_now.pt \
66
+ --data_dir CL_Benchmark \
67
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
68
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
69
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task073_commonsenseqa_answer_generation \
70
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation \
71
+ --per_device_train_batch_size 16 \
72
+ --per_device_eval_batch_size 8 \
73
+ --gradient_accumulation_steps 2 \
74
+ --learning_rate 0.0003 \
75
+ --num_train_epochs 100 \
76
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
77
+ --max_source_length 512 \
78
+ --max_target_length 50 \
79
+ --generation_max_length 50 \
80
+ --add_task_name False \
81
+ --add_dataset_name False \
82
+ --overwrite_output_dir \
83
+ --overwrite_cache \
84
+ --lr_scheduler_type constant \
85
+ --warmup_steps 0 \
86
+ --logging_strategy steps \
87
+ --logging_steps 10 \
88
+ --metric_for_best_model eval_rougeL_for_task073_commonsenseqa_answer_generation \
89
+ --evaluation_strategy steps \
90
+ --save_strategy steps \
91
+ --save_total_limit 1 \
92
+ --load_best_model_at_end \
93
+ --lora_r 4 \
94
+ --lora_alpha 32 \
95
+ --lora_dropout 0.0 \
96
+ --add_instruction_replay \
97
+ --data_replay_freq -1 \
98
+ --replay_after_n_epoch 0 \
99
+ --model_name gainlora_inflora \
100
+ --mlp_hidden_dim 100 \
101
+ --threshold 0.995 \
102
+ --transthreshold 0.995 \
103
+ --kl_ratio 0.5 \
104
+ --attn_temperature 1
105
+
106
+
107
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
108
+ --do_train \
109
+ --do_predict \
110
+ --predict_with_generate \
111
+ --model_name_or_path $2 \
112
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights/trans_input.pt \
113
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights \
114
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights/prompts_keys_till_now.pt \
115
+ --data_dir CL_Benchmark \
116
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
117
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
118
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1590_diplomacy_text_generation \
119
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation \
120
+ --per_device_train_batch_size 16 \
121
+ --per_device_eval_batch_size 8 \
122
+ --gradient_accumulation_steps 2 \
123
+ --learning_rate 0.0003 \
124
+ --num_train_epochs 100 \
125
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
126
+ --max_source_length 512 \
127
+ --max_target_length 50 \
128
+ --generation_max_length 50 \
129
+ --add_task_name False \
130
+ --add_dataset_name False \
131
+ --overwrite_output_dir \
132
+ --overwrite_cache \
133
+ --lr_scheduler_type constant \
134
+ --warmup_steps 0 \
135
+ --logging_strategy steps \
136
+ --logging_steps 10 \
137
+ --metric_for_best_model eval_rougeL_for_task1590_diplomacy_text_generation \
138
+ --evaluation_strategy steps \
139
+ --save_strategy steps \
140
+ --save_total_limit 1 \
141
+ --load_best_model_at_end \
142
+ --lora_r 4 \
143
+ --lora_alpha 32 \
144
+ --lora_dropout 0.0 \
145
+ --add_instruction_replay \
146
+ --data_replay_freq -1 \
147
+ --replay_after_n_epoch 0 \
148
+ --model_name gainlora_inflora \
149
+ --mlp_hidden_dim 100 \
150
+ --threshold 0.995 \
151
+ --transthreshold 0.995 \
152
+ --kl_ratio 0.5 \
153
+ --attn_temperature 1
154
+
155
+
156
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
157
+ --do_train \
158
+ --do_predict \
159
+ --predict_with_generate \
160
+ --model_name_or_path $2 \
161
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights/trans_input.pt \
162
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights \
163
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights/prompts_keys_till_now.pt \
164
+ --data_dir CL_Benchmark \
165
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
166
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
167
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task639_multi_woz_user_utterance_generation \
168
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation \
169
+ --per_device_train_batch_size 16 \
170
+ --per_device_eval_batch_size 8 \
171
+ --gradient_accumulation_steps 2 \
172
+ --learning_rate 0.0003 \
173
+ --num_train_epochs 100 \
174
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
175
+ --max_source_length 512 \
176
+ --max_target_length 50 \
177
+ --generation_max_length 50 \
178
+ --add_task_name False \
179
+ --add_dataset_name False \
180
+ --overwrite_output_dir \
181
+ --overwrite_cache \
182
+ --lr_scheduler_type constant \
183
+ --warmup_steps 0 \
184
+ --logging_strategy steps \
185
+ --logging_steps 10 \
186
+ --metric_for_best_model eval_rougeL_for_task639_multi_woz_user_utterance_generation \
187
+ --evaluation_strategy steps \
188
+ --save_strategy steps \
189
+ --save_total_limit 1 \
190
+ --load_best_model_at_end \
191
+ --lora_r 4 \
192
+ --lora_alpha 32 \
193
+ --lora_dropout 0.0 \
194
+ --add_instruction_replay \
195
+ --data_replay_freq -1 \
196
+ --replay_after_n_epoch 0 \
197
+ --model_name gainlora_inflora \
198
+ --mlp_hidden_dim 100 \
199
+ --threshold 0.995 \
200
+ --transthreshold 0.995 \
201
+ --kl_ratio 0.5 \
202
+ --attn_temperature 1
203
+
204
+
205
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
206
+ --do_train \
207
+ --do_predict \
208
+ --predict_with_generate \
209
+ --model_name_or_path $2 \
210
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights/trans_input.pt \
211
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights \
212
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights/prompts_keys_till_now.pt \
213
+ --data_dir CL_Benchmark \
214
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
215
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
216
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1572_samsum_summary \
217
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary \
218
+ --per_device_train_batch_size 16 \
219
+ --per_device_eval_batch_size 8 \
220
+ --gradient_accumulation_steps 2 \
221
+ --learning_rate 0.0003 \
222
+ --num_train_epochs 100 \
223
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
224
+ --max_source_length 512 \
225
+ --max_target_length 50 \
226
+ --generation_max_length 50 \
227
+ --add_task_name False \
228
+ --add_dataset_name False \
229
+ --overwrite_output_dir \
230
+ --overwrite_cache \
231
+ --lr_scheduler_type constant \
232
+ --warmup_steps 0 \
233
+ --logging_strategy steps \
234
+ --logging_steps 10 \
235
+ --metric_for_best_model eval_rougeL_for_task1572_samsum_summary \
236
+ --evaluation_strategy steps \
237
+ --save_strategy steps \
238
+ --save_total_limit 1 \
239
+ --load_best_model_at_end \
240
+ --lora_r 4 \
241
+ --lora_alpha 32 \
242
+ --lora_dropout 0.0 \
243
+ --add_instruction_replay \
244
+ --data_replay_freq -1 \
245
+ --replay_after_n_epoch 0 \
246
+ --model_name gainlora_inflora \
247
+ --mlp_hidden_dim 100 \
248
+ --threshold 0.995 \
249
+ --transthreshold 0.995 \
250
+ --kl_ratio 0.5 \
251
+ --attn_temperature 1
252
+
253
+
254
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
255
+ --do_train \
256
+ --do_predict \
257
+ --predict_with_generate \
258
+ --model_name_or_path $2 \
259
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights/trans_input.pt \
260
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights \
261
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights/prompts_keys_till_now.pt \
262
+ --data_dir CL_Benchmark \
263
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
264
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
265
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1687_sentiment140_classification \
266
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification \
267
+ --per_device_train_batch_size 16 \
268
+ --per_device_eval_batch_size 8 \
269
+ --gradient_accumulation_steps 2 \
270
+ --learning_rate 0.0003 \
271
+ --num_train_epochs 100 \
272
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
273
+ --max_source_length 512 \
274
+ --max_target_length 50 \
275
+ --generation_max_length 50 \
276
+ --add_task_name False \
277
+ --add_dataset_name False \
278
+ --overwrite_output_dir \
279
+ --overwrite_cache \
280
+ --lr_scheduler_type constant \
281
+ --warmup_steps 0 \
282
+ --logging_strategy steps \
283
+ --logging_steps 10 \
284
+ --metric_for_best_model eval_rougeL_for_task1687_sentiment140_classification \
285
+ --evaluation_strategy steps \
286
+ --save_strategy steps \
287
+ --save_total_limit 1 \
288
+ --load_best_model_at_end \
289
+ --lora_r 4 \
290
+ --lora_alpha 32 \
291
+ --lora_dropout 0.0 \
292
+ --add_instruction_replay \
293
+ --data_replay_freq -1 \
294
+ --replay_after_n_epoch 0 \
295
+ --model_name gainlora_inflora \
296
+ --mlp_hidden_dim 100 \
297
+ --threshold 0.995 \
298
+ --transthreshold 0.995 \
299
+ --kl_ratio 0.5 \
300
+ --attn_temperature 1
301
+
302
+
303
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
304
+ --do_train \
305
+ --do_predict \
306
+ --predict_with_generate \
307
+ --model_name_or_path $2 \
308
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights/trans_input.pt \
309
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights \
310
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights/prompts_keys_till_now.pt \
311
+ --data_dir CL_Benchmark \
312
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
313
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
314
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task591_sciq_answer_generation \
315
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation \
316
+ --per_device_train_batch_size 16 \
317
+ --per_device_eval_batch_size 8 \
318
+ --gradient_accumulation_steps 2 \
319
+ --learning_rate 0.0003 \
320
+ --num_train_epochs 100 \
321
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
322
+ --max_source_length 512 \
323
+ --max_target_length 50 \
324
+ --generation_max_length 50 \
325
+ --add_task_name False \
326
+ --add_dataset_name False \
327
+ --overwrite_output_dir \
328
+ --overwrite_cache \
329
+ --lr_scheduler_type constant \
330
+ --warmup_steps 0 \
331
+ --logging_strategy steps \
332
+ --logging_steps 10 \
333
+ --metric_for_best_model eval_rougeL_for_task591_sciq_answer_generation \
334
+ --evaluation_strategy steps \
335
+ --save_strategy steps \
336
+ --save_total_limit 1 \
337
+ --load_best_model_at_end \
338
+ --lora_r 4 \
339
+ --lora_alpha 32 \
340
+ --lora_dropout 0.0 \
341
+ --add_instruction_replay \
342
+ --data_replay_freq -1 \
343
+ --replay_after_n_epoch 0 \
344
+ --model_name gainlora_inflora \
345
+ --mlp_hidden_dim 100 \
346
+ --threshold 0.995 \
347
+ --transthreshold 0.995 \
348
+ --kl_ratio 0.5 \
349
+ --attn_temperature 1
350
+
351
+
352
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
353
+ --do_train \
354
+ --do_predict \
355
+ --predict_with_generate \
356
+ --model_name_or_path $2 \
357
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights/trans_input.pt \
358
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights \
359
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights/prompts_keys_till_now.pt \
360
+ --data_dir CL_Benchmark \
361
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
362
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
363
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task363_sst2_polarity_classification \
364
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification \
365
+ --per_device_train_batch_size 16 \
366
+ --per_device_eval_batch_size 8 \
367
+ --gradient_accumulation_steps 2 \
368
+ --learning_rate 0.0003 \
369
+ --num_train_epochs 100 \
370
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
371
+ --max_source_length 512 \
372
+ --max_target_length 50 \
373
+ --generation_max_length 50 \
374
+ --add_task_name False \
375
+ --add_dataset_name False \
376
+ --overwrite_output_dir \
377
+ --overwrite_cache \
378
+ --lr_scheduler_type constant \
379
+ --warmup_steps 0 \
380
+ --logging_strategy steps \
381
+ --logging_steps 10 \
382
+ --metric_for_best_model eval_rougeL_for_task363_sst2_polarity_classification \
383
+ --evaluation_strategy steps \
384
+ --save_strategy steps \
385
+ --save_total_limit 1 \
386
+ --load_best_model_at_end \
387
+ --lora_r 4 \
388
+ --lora_alpha 32 \
389
+ --lora_dropout 0.0 \
390
+ --add_instruction_replay \
391
+ --data_replay_freq -1 \
392
+ --replay_after_n_epoch 0 \
393
+ --model_name gainlora_inflora \
394
+ --mlp_hidden_dim 100 \
395
+ --threshold 0.995 \
396
+ --transthreshold 0.995 \
397
+ --kl_ratio 0.5 \
398
+ --attn_temperature 1
399
+
400
+
401
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
402
+ --do_train \
403
+ --do_predict \
404
+ --predict_with_generate \
405
+ --model_name_or_path $2 \
406
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights/trans_input.pt \
407
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights \
408
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights/prompts_keys_till_now.pt \
409
+ --data_dir CL_Benchmark \
410
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
411
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
412
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1510_evalution_relation_extraction \
413
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction \
414
+ --per_device_train_batch_size 16 \
415
+ --per_device_eval_batch_size 8 \
416
+ --gradient_accumulation_steps 2 \
417
+ --learning_rate 0.0003 \
418
+ --num_train_epochs 100 \
419
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
420
+ --max_source_length 512 \
421
+ --max_target_length 50 \
422
+ --generation_max_length 50 \
423
+ --add_task_name False \
424
+ --add_dataset_name False \
425
+ --overwrite_output_dir \
426
+ --overwrite_cache \
427
+ --lr_scheduler_type constant \
428
+ --warmup_steps 0 \
429
+ --logging_strategy steps \
430
+ --logging_steps 10 \
431
+ --metric_for_best_model eval_rougeL_for_task1510_evalution_relation_extraction \
432
+ --evaluation_strategy steps \
433
+ --save_strategy steps \
434
+ --save_total_limit 1 \
435
+ --load_best_model_at_end \
436
+ --lora_r 4 \
437
+ --lora_alpha 32 \
438
+ --lora_dropout 0.0 \
439
+ --add_instruction_replay \
440
+ --data_replay_freq -1 \
441
+ --replay_after_n_epoch 0 \
442
+ --model_name gainlora_inflora \
443
+ --mlp_hidden_dim 100 \
444
+ --threshold 0.995 \
445
+ --transthreshold 0.995 \
446
+ --kl_ratio 0.5 \
447
+ --attn_temperature 1
448
+
449
+
450
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
451
+ --do_train \
452
+ --do_predict \
453
+ --predict_with_generate \
454
+ --model_name_or_path $2 \
455
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights/trans_input.pt \
456
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights \
457
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights/prompts_keys_till_now.pt \
458
+ --data_dir CL_Benchmark \
459
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
460
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
461
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1729_personachat_generate_next \
462
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next \
463
+ --per_device_train_batch_size 16 \
464
+ --per_device_eval_batch_size 8 \
465
+ --gradient_accumulation_steps 2 \
466
+ --learning_rate 0.0003 \
467
+ --num_train_epochs 100 \
468
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
469
+ --max_source_length 512 \
470
+ --max_target_length 50 \
471
+ --generation_max_length 50 \
472
+ --add_task_name False \
473
+ --add_dataset_name False \
474
+ --overwrite_output_dir \
475
+ --overwrite_cache \
476
+ --lr_scheduler_type constant \
477
+ --warmup_steps 0 \
478
+ --logging_strategy steps \
479
+ --logging_steps 10 \
480
+ --metric_for_best_model eval_rougeL_for_task1729_personachat_generate_next \
481
+ --evaluation_strategy steps \
482
+ --save_strategy steps \
483
+ --save_total_limit 1 \
484
+ --load_best_model_at_end \
485
+ --lora_r 4 \
486
+ --lora_alpha 32 \
487
+ --lora_dropout 0.0 \
488
+ --add_instruction_replay \
489
+ --data_replay_freq -1 \
490
+ --replay_after_n_epoch 0 \
491
+ --model_name gainlora_inflora \
492
+ --mlp_hidden_dim 100 \
493
+ --threshold 0.995 \
494
+ --transthreshold 0.995 \
495
+ --kl_ratio 0.5 \
496
+ --attn_temperature 1
497
+
498
+
499
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
500
+ --do_train \
501
+ --do_predict \
502
+ --predict_with_generate \
503
+ --model_name_or_path $2 \
504
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights/trans_input.pt \
505
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights \
506
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights/prompts_keys_till_now.pt \
507
+ --data_dir CL_Benchmark \
508
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
509
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
510
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task181_outcome_extraction \
511
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction \
512
+ --per_device_train_batch_size 16 \
513
+ --per_device_eval_batch_size 8 \
514
+ --gradient_accumulation_steps 2 \
515
+ --learning_rate 0.0003 \
516
+ --num_train_epochs 100 \
517
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
518
+ --max_source_length 512 \
519
+ --max_target_length 50 \
520
+ --generation_max_length 50 \
521
+ --add_task_name False \
522
+ --add_dataset_name False \
523
+ --overwrite_output_dir \
524
+ --overwrite_cache \
525
+ --lr_scheduler_type constant \
526
+ --warmup_steps 0 \
527
+ --logging_strategy steps \
528
+ --logging_steps 10 \
529
+ --metric_for_best_model eval_rougeL_for_task181_outcome_extraction \
530
+ --evaluation_strategy steps \
531
+ --save_strategy steps \
532
+ --save_total_limit 1 \
533
+ --load_best_model_at_end \
534
+ --lora_r 4 \
535
+ --lora_alpha 32 \
536
+ --lora_dropout 0.0 \
537
+ --add_instruction_replay \
538
+ --data_replay_freq -1 \
539
+ --replay_after_n_epoch 0 \
540
+ --model_name gainlora_inflora \
541
+ --mlp_hidden_dim 100 \
542
+ --threshold 0.995 \
543
+ --transthreshold 0.995 \
544
+ --kl_ratio 0.5 \
545
+ --attn_temperature 1
546
+
547
+
548
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
549
+ --do_train \
550
+ --do_predict \
551
+ --predict_with_generate \
552
+ --model_name_or_path $2 \
553
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights/trans_input.pt \
554
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights \
555
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights/prompts_keys_till_now.pt \
556
+ --data_dir CL_Benchmark \
557
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
558
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
559
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task511_reddit_tifu_long_text_summarization \
560
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization \
561
+ --per_device_train_batch_size 16 \
562
+ --per_device_eval_batch_size 8 \
563
+ --gradient_accumulation_steps 2 \
564
+ --learning_rate 0.0003 \
565
+ --num_train_epochs 100 \
566
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
567
+ --max_source_length 512 \
568
+ --max_target_length 50 \
569
+ --generation_max_length 50 \
570
+ --add_task_name False \
571
+ --add_dataset_name False \
572
+ --overwrite_output_dir \
573
+ --overwrite_cache \
574
+ --lr_scheduler_type constant \
575
+ --warmup_steps 0 \
576
+ --logging_strategy steps \
577
+ --logging_steps 10 \
578
+ --metric_for_best_model eval_rougeL_for_task511_reddit_tifu_long_text_summarization \
579
+ --evaluation_strategy steps \
580
+ --save_strategy steps \
581
+ --save_total_limit 1 \
582
+ --load_best_model_at_end \
583
+ --lora_r 4 \
584
+ --lora_alpha 32 \
585
+ --lora_dropout 0.0 \
586
+ --add_instruction_replay \
587
+ --data_replay_freq -1 \
588
+ --replay_after_n_epoch 0 \
589
+ --model_name gainlora_inflora \
590
+ --mlp_hidden_dim 100 \
591
+ --threshold 0.995 \
592
+ --transthreshold 0.995 \
593
+ --kl_ratio 0.5 \
594
+ --attn_temperature 1
595
+
596
+
597
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
598
+ --do_train \
599
+ --do_predict \
600
+ --predict_with_generate \
601
+ --model_name_or_path $2 \
602
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/trans_input.pt \
603
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights \
604
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/prompts_keys_till_now.pt \
605
+ --data_dir CL_Benchmark \
606
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
607
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
608
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task002_quoref_answer_generation \
609
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/13-task002_quoref_answer_generation \
610
+ --per_device_train_batch_size 16 \
611
+ --per_device_eval_batch_size 8 \
612
+ --gradient_accumulation_steps 2 \
613
+ --learning_rate 0.0003 \
614
+ --num_train_epochs 100 \
615
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
616
+ --max_source_length 512 \
617
+ --max_target_length 50 \
618
+ --generation_max_length 50 \
619
+ --add_task_name False \
620
+ --add_dataset_name False \
621
+ --overwrite_output_dir \
622
+ --overwrite_cache \
623
+ --lr_scheduler_type constant \
624
+ --warmup_steps 0 \
625
+ --logging_strategy steps \
626
+ --logging_steps 10 \
627
+ --metric_for_best_model eval_rougeL_for_task002_quoref_answer_generation \
628
+ --evaluation_strategy steps \
629
+ --save_strategy steps \
630
+ --save_total_limit 1 \
631
+ --load_best_model_at_end \
632
+ --lora_r 4 \
633
+ --lora_alpha 32 \
634
+ --lora_dropout 0.0 \
635
+ --add_instruction_replay \
636
+ --data_replay_freq -1 \
637
+ --replay_after_n_epoch 0 \
638
+ --model_name gainlora_inflora \
639
+ --mlp_hidden_dim 100 \
640
+ --threshold 0.995 \
641
+ --transthreshold 0.995 \
642
+ --kl_ratio 0.5 \
643
+ --attn_temperature 1
644
+
645
+
646
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
647
+ --do_train \
648
+ --do_predict \
649
+ --predict_with_generate \
650
+ --model_name_or_path $2 \
651
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/13-task002_quoref_answer_generation/saved_weights/trans_input.pt \
652
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/13-task002_quoref_answer_generation/saved_weights \
653
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/13-task002_quoref_answer_generation/saved_weights/prompts_keys_till_now.pt \
654
+ --data_dir CL_Benchmark \
655
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
656
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
657
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1290_xsum_summarization \
658
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/14-task1290_xsum_summarization \
659
+ --per_device_train_batch_size 16 \
660
+ --per_device_eval_batch_size 8 \
661
+ --gradient_accumulation_steps 2 \
662
+ --learning_rate 0.0003 \
663
+ --num_train_epochs 100 \
664
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
665
+ --max_source_length 512 \
666
+ --max_target_length 50 \
667
+ --generation_max_length 50 \
668
+ --add_task_name False \
669
+ --add_dataset_name False \
670
+ --overwrite_output_dir \
671
+ --overwrite_cache \
672
+ --lr_scheduler_type constant \
673
+ --warmup_steps 0 \
674
+ --logging_strategy steps \
675
+ --logging_steps 10 \
676
+ --metric_for_best_model eval_rougeL_for_task1290_xsum_summarization \
677
+ --evaluation_strategy steps \
678
+ --save_strategy steps \
679
+ --save_total_limit 1 \
680
+ --load_best_model_at_end \
681
+ --lora_r 4 \
682
+ --lora_alpha 32 \
683
+ --lora_dropout 0.0 \
684
+ --add_instruction_replay \
685
+ --data_replay_freq -1 \
686
+ --replay_after_n_epoch 0 \
687
+ --model_name gainlora_inflora \
688
+ --mlp_hidden_dim 100 \
689
+ --threshold 0.995 \
690
+ --transthreshold 0.995 \
691
+ --kl_ratio 0.5 \
692
+ --attn_temperature 1
693
+
694
+
695
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
696
+ --do_train \
697
+ --do_predict \
698
+ --predict_with_generate \
699
+ --model_name_or_path $2 \
700
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/14-task1290_xsum_summarization/saved_weights/trans_input.pt \
701
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/13-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/14-task1290_xsum_summarization/saved_weights \
702
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/14-task1290_xsum_summarization/saved_weights/prompts_keys_till_now.pt \
703
+ --data_dir CL_Benchmark \
704
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
705
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
706
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task875_emotion_classification \
707
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_gainlora_inflora/outputs/15-task875_emotion_classification \
708
+ --per_device_train_batch_size 16 \
709
+ --per_device_eval_batch_size 8 \
710
+ --gradient_accumulation_steps 2 \
711
+ --learning_rate 0.0003 \
712
+ --num_train_epochs 100 \
713
+ --run_name gen_script_superni_order2_t5_small_gainlora_inflora \
714
+ --max_source_length 512 \
715
+ --max_target_length 50 \
716
+ --generation_max_length 50 \
717
+ --add_task_name False \
718
+ --add_dataset_name False \
719
+ --overwrite_output_dir \
720
+ --overwrite_cache \
721
+ --lr_scheduler_type constant \
722
+ --warmup_steps 0 \
723
+ --logging_strategy steps \
724
+ --logging_steps 10 \
725
+ --metric_for_best_model eval_rougeL_for_task875_emotion_classification \
726
+ --evaluation_strategy steps \
727
+ --save_strategy steps \
728
+ --save_total_limit 1 \
729
+ --load_best_model_at_end \
730
+ --lora_r 4 \
731
+ --lora_alpha 32 \
732
+ --lora_dropout 0.0 \
733
+ --add_instruction_replay \
734
+ --data_replay_freq -1 \
735
+ --replay_after_n_epoch 0 \
736
+ --model_name gainlora_inflora \
737
+ --mlp_hidden_dim 100 \
738
+ --threshold 0.995 \
739
+ --transthreshold 0.995 \
740
+ --kl_ratio 0.5 \
741
+ --attn_temperature 1
742
+
743
+ python score.py gen_script_superni_order2_t5_small_gainlora_inflora gen_script_superni_order2_t5_small_gainlora_inflora
root_gainlora/T5_small/gen_script_superni_order2_t5_small_inflora.sh ADDED
@@ -0,0 +1,713 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH -J cl
3
+ #SBATCH -o cl-%j.out
4
+ #SBATCH -p compute
5
+ #SBATCH -N 1
6
+ #SBATCH -t 20:00:00
7
+ #SBATCH --mem 128G
8
+ #SBATCH --gres=gpu:a100-sxm4-80gb:1
9
+
10
+ export CUDA_DEVICE_ORDER="PCI_BUS_ID"
11
+
12
+ port=$(shuf -i25000-30000 -n1)
13
+
14
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
15
+ --do_train \
16
+ --do_predict \
17
+ --predict_with_generate \
18
+ --model_name_or_path $2 \
19
+ --data_dir CL_Benchmark \
20
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
21
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task748_glucose_reverse_cause_event_detection \
22
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection \
23
+ --per_device_train_batch_size 16 \
24
+ --per_device_eval_batch_size 8 \
25
+ --gradient_accumulation_steps 2 \
26
+ --learning_rate 0.0003 \
27
+ --num_train_epochs 100 \
28
+ --run_name gen_script_superni_order2_t5_small_inflora \
29
+ --max_source_length 512 \
30
+ --max_target_length 50 \
31
+ --generation_max_length 50 \
32
+ --add_task_name False \
33
+ --add_dataset_name False \
34
+ --overwrite_output_dir \
35
+ --overwrite_cache \
36
+ --lr_scheduler_type constant \
37
+ --warmup_steps 0 \
38
+ --logging_strategy steps \
39
+ --logging_steps 10 \
40
+ --metric_for_best_model eval_rougeL \
41
+ --evaluation_strategy steps \
42
+ --save_strategy steps \
43
+ --save_total_limit 1 \
44
+ --lora_r 4 \
45
+ --lora_alpha 32 \
46
+ --lora_dropout 0.0 \
47
+ --load_best_model_at_end \
48
+ --data_replay_freq -1 \
49
+ --replay_after_n_epoch 0 \
50
+ --model_name inflora \
51
+ --threshold 0.995 \
52
+ --kl_ratio 0.5 \
53
+ --attn_temperature 1
54
+
55
+
56
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
57
+ --do_train \
58
+ --do_predict \
59
+ --predict_with_generate \
60
+ --model_name_or_path $2 \
61
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights/trans_input.pt \
62
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights \
63
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights/prompts_keys_till_now.pt \
64
+ --data_dir CL_Benchmark \
65
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
66
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
67
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task073_commonsenseqa_answer_generation \
68
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation \
69
+ --per_device_train_batch_size 16 \
70
+ --per_device_eval_batch_size 8 \
71
+ --gradient_accumulation_steps 2 \
72
+ --learning_rate 0.0003 \
73
+ --num_train_epochs 100 \
74
+ --run_name gen_script_superni_order2_t5_small_inflora \
75
+ --max_source_length 512 \
76
+ --max_target_length 50 \
77
+ --generation_max_length 50 \
78
+ --add_task_name False \
79
+ --add_dataset_name False \
80
+ --overwrite_output_dir \
81
+ --overwrite_cache \
82
+ --lr_scheduler_type constant \
83
+ --warmup_steps 0 \
84
+ --logging_strategy steps \
85
+ --logging_steps 10 \
86
+ --metric_for_best_model eval_rougeL_for_task073_commonsenseqa_answer_generation \
87
+ --evaluation_strategy steps \
88
+ --save_strategy steps \
89
+ --save_total_limit 1 \
90
+ --load_best_model_at_end \
91
+ --lora_r 4 \
92
+ --lora_alpha 32 \
93
+ --lora_dropout 0.0 \
94
+ --add_instruction_replay \
95
+ --data_replay_freq -1 \
96
+ --replay_after_n_epoch 0 \
97
+ --model_name inflora \
98
+ --threshold 0.995 \
99
+ --kl_ratio 0.5 \
100
+ --attn_temperature 1
101
+
102
+
103
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
104
+ --do_train \
105
+ --do_predict \
106
+ --predict_with_generate \
107
+ --model_name_or_path $2 \
108
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights/trans_input.pt \
109
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights \
110
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights/prompts_keys_till_now.pt \
111
+ --data_dir CL_Benchmark \
112
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
113
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
114
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1590_diplomacy_text_generation \
115
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation \
116
+ --per_device_train_batch_size 16 \
117
+ --per_device_eval_batch_size 8 \
118
+ --gradient_accumulation_steps 2 \
119
+ --learning_rate 0.0003 \
120
+ --num_train_epochs 100 \
121
+ --run_name gen_script_superni_order2_t5_small_inflora \
122
+ --max_source_length 512 \
123
+ --max_target_length 50 \
124
+ --generation_max_length 50 \
125
+ --add_task_name False \
126
+ --add_dataset_name False \
127
+ --overwrite_output_dir \
128
+ --overwrite_cache \
129
+ --lr_scheduler_type constant \
130
+ --warmup_steps 0 \
131
+ --logging_strategy steps \
132
+ --logging_steps 10 \
133
+ --metric_for_best_model eval_rougeL_for_task1590_diplomacy_text_generation \
134
+ --evaluation_strategy steps \
135
+ --save_strategy steps \
136
+ --save_total_limit 1 \
137
+ --load_best_model_at_end \
138
+ --lora_r 4 \
139
+ --lora_alpha 32 \
140
+ --lora_dropout 0.0 \
141
+ --add_instruction_replay \
142
+ --data_replay_freq -1 \
143
+ --replay_after_n_epoch 0 \
144
+ --model_name inflora \
145
+ --threshold 0.995 \
146
+ --kl_ratio 0.5 \
147
+ --attn_temperature 1
148
+
149
+
150
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
151
+ --do_train \
152
+ --do_predict \
153
+ --predict_with_generate \
154
+ --model_name_or_path $2 \
155
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights/trans_input.pt \
156
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights \
157
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights/prompts_keys_till_now.pt \
158
+ --data_dir CL_Benchmark \
159
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
160
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
161
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task639_multi_woz_user_utterance_generation \
162
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation \
163
+ --per_device_train_batch_size 16 \
164
+ --per_device_eval_batch_size 8 \
165
+ --gradient_accumulation_steps 2 \
166
+ --learning_rate 0.0003 \
167
+ --num_train_epochs 100 \
168
+ --run_name gen_script_superni_order2_t5_small_inflora \
169
+ --max_source_length 512 \
170
+ --max_target_length 50 \
171
+ --generation_max_length 50 \
172
+ --add_task_name False \
173
+ --add_dataset_name False \
174
+ --overwrite_output_dir \
175
+ --overwrite_cache \
176
+ --lr_scheduler_type constant \
177
+ --warmup_steps 0 \
178
+ --logging_strategy steps \
179
+ --logging_steps 10 \
180
+ --metric_for_best_model eval_rougeL_for_task639_multi_woz_user_utterance_generation \
181
+ --evaluation_strategy steps \
182
+ --save_strategy steps \
183
+ --save_total_limit 1 \
184
+ --load_best_model_at_end \
185
+ --lora_r 4 \
186
+ --lora_alpha 32 \
187
+ --lora_dropout 0.0 \
188
+ --add_instruction_replay \
189
+ --data_replay_freq -1 \
190
+ --replay_after_n_epoch 0 \
191
+ --model_name inflora \
192
+ --threshold 0.995 \
193
+ --kl_ratio 0.5 \
194
+ --attn_temperature 1
195
+
196
+
197
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
198
+ --do_train \
199
+ --do_predict \
200
+ --predict_with_generate \
201
+ --model_name_or_path $2 \
202
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights/trans_input.pt \
203
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights \
204
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights/prompts_keys_till_now.pt \
205
+ --data_dir CL_Benchmark \
206
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
207
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
208
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1572_samsum_summary \
209
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary \
210
+ --per_device_train_batch_size 16 \
211
+ --per_device_eval_batch_size 8 \
212
+ --gradient_accumulation_steps 2 \
213
+ --learning_rate 0.0003 \
214
+ --num_train_epochs 100 \
215
+ --run_name gen_script_superni_order2_t5_small_inflora \
216
+ --max_source_length 512 \
217
+ --max_target_length 50 \
218
+ --generation_max_length 50 \
219
+ --add_task_name False \
220
+ --add_dataset_name False \
221
+ --overwrite_output_dir \
222
+ --overwrite_cache \
223
+ --lr_scheduler_type constant \
224
+ --warmup_steps 0 \
225
+ --logging_strategy steps \
226
+ --logging_steps 10 \
227
+ --metric_for_best_model eval_rougeL_for_task1572_samsum_summary \
228
+ --evaluation_strategy steps \
229
+ --save_strategy steps \
230
+ --save_total_limit 1 \
231
+ --load_best_model_at_end \
232
+ --lora_r 4 \
233
+ --lora_alpha 32 \
234
+ --lora_dropout 0.0 \
235
+ --add_instruction_replay \
236
+ --data_replay_freq -1 \
237
+ --replay_after_n_epoch 0 \
238
+ --model_name inflora \
239
+ --threshold 0.995 \
240
+ --kl_ratio 0.5 \
241
+ --attn_temperature 1
242
+
243
+
244
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
245
+ --do_train \
246
+ --do_predict \
247
+ --predict_with_generate \
248
+ --model_name_or_path $2 \
249
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights/trans_input.pt \
250
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights \
251
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights/prompts_keys_till_now.pt \
252
+ --data_dir CL_Benchmark \
253
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
254
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
255
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1687_sentiment140_classification \
256
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification \
257
+ --per_device_train_batch_size 16 \
258
+ --per_device_eval_batch_size 8 \
259
+ --gradient_accumulation_steps 2 \
260
+ --learning_rate 0.0003 \
261
+ --num_train_epochs 100 \
262
+ --run_name gen_script_superni_order2_t5_small_inflora \
263
+ --max_source_length 512 \
264
+ --max_target_length 50 \
265
+ --generation_max_length 50 \
266
+ --add_task_name False \
267
+ --add_dataset_name False \
268
+ --overwrite_output_dir \
269
+ --overwrite_cache \
270
+ --lr_scheduler_type constant \
271
+ --warmup_steps 0 \
272
+ --logging_strategy steps \
273
+ --logging_steps 10 \
274
+ --metric_for_best_model eval_rougeL_for_task1687_sentiment140_classification \
275
+ --evaluation_strategy steps \
276
+ --save_strategy steps \
277
+ --save_total_limit 1 \
278
+ --load_best_model_at_end \
279
+ --lora_r 4 \
280
+ --lora_alpha 32 \
281
+ --lora_dropout 0.0 \
282
+ --add_instruction_replay \
283
+ --data_replay_freq -1 \
284
+ --replay_after_n_epoch 0 \
285
+ --model_name inflora \
286
+ --threshold 0.995 \
287
+ --kl_ratio 0.5 \
288
+ --attn_temperature 1
289
+
290
+
291
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
292
+ --do_train \
293
+ --do_predict \
294
+ --predict_with_generate \
295
+ --model_name_or_path $2 \
296
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights/trans_input.pt \
297
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights \
298
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights/prompts_keys_till_now.pt \
299
+ --data_dir CL_Benchmark \
300
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
301
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
302
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task591_sciq_answer_generation \
303
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation \
304
+ --per_device_train_batch_size 16 \
305
+ --per_device_eval_batch_size 8 \
306
+ --gradient_accumulation_steps 2 \
307
+ --learning_rate 0.0003 \
308
+ --num_train_epochs 100 \
309
+ --run_name gen_script_superni_order2_t5_small_inflora \
310
+ --max_source_length 512 \
311
+ --max_target_length 50 \
312
+ --generation_max_length 50 \
313
+ --add_task_name False \
314
+ --add_dataset_name False \
315
+ --overwrite_output_dir \
316
+ --overwrite_cache \
317
+ --lr_scheduler_type constant \
318
+ --warmup_steps 0 \
319
+ --logging_strategy steps \
320
+ --logging_steps 10 \
321
+ --metric_for_best_model eval_rougeL_for_task591_sciq_answer_generation \
322
+ --evaluation_strategy steps \
323
+ --save_strategy steps \
324
+ --save_total_limit 1 \
325
+ --load_best_model_at_end \
326
+ --lora_r 4 \
327
+ --lora_alpha 32 \
328
+ --lora_dropout 0.0 \
329
+ --add_instruction_replay \
330
+ --data_replay_freq -1 \
331
+ --replay_after_n_epoch 0 \
332
+ --model_name inflora \
333
+ --threshold 0.995 \
334
+ --kl_ratio 0.5 \
335
+ --attn_temperature 1
336
+
337
+
338
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
339
+ --do_train \
340
+ --do_predict \
341
+ --predict_with_generate \
342
+ --model_name_or_path $2 \
343
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights/trans_input.pt \
344
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights \
345
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights/prompts_keys_till_now.pt \
346
+ --data_dir CL_Benchmark \
347
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
348
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
349
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task363_sst2_polarity_classification \
350
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification \
351
+ --per_device_train_batch_size 16 \
352
+ --per_device_eval_batch_size 8 \
353
+ --gradient_accumulation_steps 2 \
354
+ --learning_rate 0.0003 \
355
+ --num_train_epochs 100 \
356
+ --run_name gen_script_superni_order2_t5_small_inflora \
357
+ --max_source_length 512 \
358
+ --max_target_length 50 \
359
+ --generation_max_length 50 \
360
+ --add_task_name False \
361
+ --add_dataset_name False \
362
+ --overwrite_output_dir \
363
+ --overwrite_cache \
364
+ --lr_scheduler_type constant \
365
+ --warmup_steps 0 \
366
+ --logging_strategy steps \
367
+ --logging_steps 10 \
368
+ --metric_for_best_model eval_rougeL_for_task363_sst2_polarity_classification \
369
+ --evaluation_strategy steps \
370
+ --save_strategy steps \
371
+ --save_total_limit 1 \
372
+ --load_best_model_at_end \
373
+ --lora_r 4 \
374
+ --lora_alpha 32 \
375
+ --lora_dropout 0.0 \
376
+ --add_instruction_replay \
377
+ --data_replay_freq -1 \
378
+ --replay_after_n_epoch 0 \
379
+ --model_name inflora \
380
+ --threshold 0.995 \
381
+ --kl_ratio 0.5 \
382
+ --attn_temperature 1
383
+
384
+
385
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
386
+ --do_train \
387
+ --do_predict \
388
+ --predict_with_generate \
389
+ --model_name_or_path $2 \
390
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights/trans_input.pt \
391
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights \
392
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights/prompts_keys_till_now.pt \
393
+ --data_dir CL_Benchmark \
394
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
395
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
396
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1510_evalution_relation_extraction \
397
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction \
398
+ --per_device_train_batch_size 16 \
399
+ --per_device_eval_batch_size 8 \
400
+ --gradient_accumulation_steps 2 \
401
+ --learning_rate 0.0003 \
402
+ --num_train_epochs 100 \
403
+ --run_name gen_script_superni_order2_t5_small_inflora \
404
+ --max_source_length 512 \
405
+ --max_target_length 50 \
406
+ --generation_max_length 50 \
407
+ --add_task_name False \
408
+ --add_dataset_name False \
409
+ --overwrite_output_dir \
410
+ --overwrite_cache \
411
+ --lr_scheduler_type constant \
412
+ --warmup_steps 0 \
413
+ --logging_strategy steps \
414
+ --logging_steps 10 \
415
+ --metric_for_best_model eval_rougeL_for_task1510_evalution_relation_extraction \
416
+ --evaluation_strategy steps \
417
+ --save_strategy steps \
418
+ --save_total_limit 1 \
419
+ --load_best_model_at_end \
420
+ --lora_r 4 \
421
+ --lora_alpha 32 \
422
+ --lora_dropout 0.0 \
423
+ --add_instruction_replay \
424
+ --data_replay_freq -1 \
425
+ --replay_after_n_epoch 0 \
426
+ --model_name inflora \
427
+ --threshold 0.995 \
428
+ --kl_ratio 0.5 \
429
+ --attn_temperature 1
430
+
431
+
432
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
433
+ --do_train \
434
+ --do_predict \
435
+ --predict_with_generate \
436
+ --model_name_or_path $2 \
437
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights/trans_input.pt \
438
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights \
439
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights/prompts_keys_till_now.pt \
440
+ --data_dir CL_Benchmark \
441
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
442
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
443
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1729_personachat_generate_next \
444
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next \
445
+ --per_device_train_batch_size 16 \
446
+ --per_device_eval_batch_size 8 \
447
+ --gradient_accumulation_steps 2 \
448
+ --learning_rate 0.0003 \
449
+ --num_train_epochs 100 \
450
+ --run_name gen_script_superni_order2_t5_small_inflora \
451
+ --max_source_length 512 \
452
+ --max_target_length 50 \
453
+ --generation_max_length 50 \
454
+ --add_task_name False \
455
+ --add_dataset_name False \
456
+ --overwrite_output_dir \
457
+ --overwrite_cache \
458
+ --lr_scheduler_type constant \
459
+ --warmup_steps 0 \
460
+ --logging_strategy steps \
461
+ --logging_steps 10 \
462
+ --metric_for_best_model eval_rougeL_for_task1729_personachat_generate_next \
463
+ --evaluation_strategy steps \
464
+ --save_strategy steps \
465
+ --save_total_limit 1 \
466
+ --load_best_model_at_end \
467
+ --lora_r 4 \
468
+ --lora_alpha 32 \
469
+ --lora_dropout 0.0 \
470
+ --add_instruction_replay \
471
+ --data_replay_freq -1 \
472
+ --replay_after_n_epoch 0 \
473
+ --model_name inflora \
474
+ --threshold 0.995 \
475
+ --kl_ratio 0.5 \
476
+ --attn_temperature 1
477
+
478
+
479
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
480
+ --do_train \
481
+ --do_predict \
482
+ --predict_with_generate \
483
+ --model_name_or_path $2 \
484
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights/trans_input.pt \
485
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights \
486
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights/prompts_keys_till_now.pt \
487
+ --data_dir CL_Benchmark \
488
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
489
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
490
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task181_outcome_extraction \
491
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction \
492
+ --per_device_train_batch_size 16 \
493
+ --per_device_eval_batch_size 8 \
494
+ --gradient_accumulation_steps 2 \
495
+ --learning_rate 0.0003 \
496
+ --num_train_epochs 100 \
497
+ --run_name gen_script_superni_order2_t5_small_inflora \
498
+ --max_source_length 512 \
499
+ --max_target_length 50 \
500
+ --generation_max_length 50 \
501
+ --add_task_name False \
502
+ --add_dataset_name False \
503
+ --overwrite_output_dir \
504
+ --overwrite_cache \
505
+ --lr_scheduler_type constant \
506
+ --warmup_steps 0 \
507
+ --logging_strategy steps \
508
+ --logging_steps 10 \
509
+ --metric_for_best_model eval_rougeL_for_task181_outcome_extraction \
510
+ --evaluation_strategy steps \
511
+ --save_strategy steps \
512
+ --save_total_limit 1 \
513
+ --load_best_model_at_end \
514
+ --lora_r 4 \
515
+ --lora_alpha 32 \
516
+ --lora_dropout 0.0 \
517
+ --add_instruction_replay \
518
+ --data_replay_freq -1 \
519
+ --replay_after_n_epoch 0 \
520
+ --model_name inflora \
521
+ --threshold 0.995 \
522
+ --kl_ratio 0.5 \
523
+ --attn_temperature 1
524
+
525
+
526
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
527
+ --do_train \
528
+ --do_predict \
529
+ --predict_with_generate \
530
+ --model_name_or_path $2 \
531
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights/trans_input.pt \
532
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights \
533
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights/prompts_keys_till_now.pt \
534
+ --data_dir CL_Benchmark \
535
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
536
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
537
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task511_reddit_tifu_long_text_summarization \
538
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization \
539
+ --per_device_train_batch_size 16 \
540
+ --per_device_eval_batch_size 8 \
541
+ --gradient_accumulation_steps 2 \
542
+ --learning_rate 0.0003 \
543
+ --num_train_epochs 100 \
544
+ --run_name gen_script_superni_order2_t5_small_inflora \
545
+ --max_source_length 512 \
546
+ --max_target_length 50 \
547
+ --generation_max_length 50 \
548
+ --add_task_name False \
549
+ --add_dataset_name False \
550
+ --overwrite_output_dir \
551
+ --overwrite_cache \
552
+ --lr_scheduler_type constant \
553
+ --warmup_steps 0 \
554
+ --logging_strategy steps \
555
+ --logging_steps 10 \
556
+ --metric_for_best_model eval_rougeL_for_task511_reddit_tifu_long_text_summarization \
557
+ --evaluation_strategy steps \
558
+ --save_strategy steps \
559
+ --save_total_limit 1 \
560
+ --load_best_model_at_end \
561
+ --lora_r 4 \
562
+ --lora_alpha 32 \
563
+ --lora_dropout 0.0 \
564
+ --add_instruction_replay \
565
+ --data_replay_freq -1 \
566
+ --replay_after_n_epoch 0 \
567
+ --model_name inflora \
568
+ --threshold 0.995 \
569
+ --kl_ratio 0.5 \
570
+ --attn_temperature 1
571
+
572
+
573
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
574
+ --do_train \
575
+ --do_predict \
576
+ --predict_with_generate \
577
+ --model_name_or_path $2 \
578
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/trans_input.pt \
579
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights \
580
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights/prompts_keys_till_now.pt \
581
+ --data_dir CL_Benchmark \
582
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
583
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
584
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task002_quoref_answer_generation \
585
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/13-task002_quoref_answer_generation \
586
+ --per_device_train_batch_size 16 \
587
+ --per_device_eval_batch_size 8 \
588
+ --gradient_accumulation_steps 2 \
589
+ --learning_rate 0.0003 \
590
+ --num_train_epochs 100 \
591
+ --run_name gen_script_superni_order2_t5_small_inflora \
592
+ --max_source_length 512 \
593
+ --max_target_length 50 \
594
+ --generation_max_length 50 \
595
+ --add_task_name False \
596
+ --add_dataset_name False \
597
+ --overwrite_output_dir \
598
+ --overwrite_cache \
599
+ --lr_scheduler_type constant \
600
+ --warmup_steps 0 \
601
+ --logging_strategy steps \
602
+ --logging_steps 10 \
603
+ --metric_for_best_model eval_rougeL_for_task002_quoref_answer_generation \
604
+ --evaluation_strategy steps \
605
+ --save_strategy steps \
606
+ --save_total_limit 1 \
607
+ --load_best_model_at_end \
608
+ --lora_r 4 \
609
+ --lora_alpha 32 \
610
+ --lora_dropout 0.0 \
611
+ --add_instruction_replay \
612
+ --data_replay_freq -1 \
613
+ --replay_after_n_epoch 0 \
614
+ --model_name inflora \
615
+ --threshold 0.995 \
616
+ --kl_ratio 0.5 \
617
+ --attn_temperature 1
618
+
619
+
620
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
621
+ --do_train \
622
+ --do_predict \
623
+ --predict_with_generate \
624
+ --model_name_or_path $2 \
625
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/13-task002_quoref_answer_generation/saved_weights/trans_input.pt \
626
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/13-task002_quoref_answer_generation/saved_weights \
627
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/13-task002_quoref_answer_generation/saved_weights/prompts_keys_till_now.pt \
628
+ --data_dir CL_Benchmark \
629
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
630
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
631
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task1290_xsum_summarization \
632
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/14-task1290_xsum_summarization \
633
+ --per_device_train_batch_size 16 \
634
+ --per_device_eval_batch_size 8 \
635
+ --gradient_accumulation_steps 2 \
636
+ --learning_rate 0.0003 \
637
+ --num_train_epochs 100 \
638
+ --run_name gen_script_superni_order2_t5_small_inflora \
639
+ --max_source_length 512 \
640
+ --max_target_length 50 \
641
+ --generation_max_length 50 \
642
+ --add_task_name False \
643
+ --add_dataset_name False \
644
+ --overwrite_output_dir \
645
+ --overwrite_cache \
646
+ --lr_scheduler_type constant \
647
+ --warmup_steps 0 \
648
+ --logging_strategy steps \
649
+ --logging_steps 10 \
650
+ --metric_for_best_model eval_rougeL_for_task1290_xsum_summarization \
651
+ --evaluation_strategy steps \
652
+ --save_strategy steps \
653
+ --save_total_limit 1 \
654
+ --load_best_model_at_end \
655
+ --lora_r 4 \
656
+ --lora_alpha 32 \
657
+ --lora_dropout 0.0 \
658
+ --add_instruction_replay \
659
+ --data_replay_freq -1 \
660
+ --replay_after_n_epoch 0 \
661
+ --model_name inflora \
662
+ --threshold 0.995 \
663
+ --kl_ratio 0.5 \
664
+ --attn_temperature 1
665
+
666
+
667
+ CUDA_VISIBLE_DEVICES=$1 python src/run_t5.py \
668
+ --do_train \
669
+ --do_predict \
670
+ --predict_with_generate \
671
+ --model_name_or_path $2 \
672
+ --load_checkpoint_from logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/14-task1290_xsum_summarization/saved_weights/trans_input.pt \
673
+ --previous_lora_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/1-task748_glucose_reverse_cause_event_detection/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/2-task073_commonsenseqa_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/3-task1590_diplomacy_text_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/4-task639_multi_woz_user_utterance_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/5-task1572_samsum_summary/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/6-task1687_sentiment140_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/7-task591_sciq_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/8-task363_sst2_polarity_classification/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/9-task1510_evalution_relation_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/10-task1729_personachat_generate_next/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/11-task181_outcome_extraction/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/12-task511_reddit_tifu_long_text_summarization/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/13-task002_quoref_answer_generation/saved_weights,logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/14-task1290_xsum_summarization/saved_weights \
674
+ --previous_prompt_key_path logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/14-task1290_xsum_summarization/saved_weights/prompts_keys_till_now.pt \
675
+ --data_dir CL_Benchmark \
676
+ --task_order task748_glucose_reverse_cause_event_detection,task073_commonsenseqa_answer_generation,task1590_diplomacy_text_generation,task639_multi_woz_user_utterance_generation,task1572_samsum_summary,task1687_sentiment140_classification,task591_sciq_answer_generation,task363_sst2_polarity_classification,task1510_evalution_relation_extraction,task1729_personachat_generate_next,task181_outcome_extraction,task511_reddit_tifu_long_text_summarization,task002_quoref_answer_generation,task1290_xsum_summarization,task875_emotion_classification \
677
+ --gen_data_dir generated_data/lora_gen_superni_t5 \
678
+ --task_config_dir configs/gen_script_superni_order2_t5_small_configs/task875_emotion_classification \
679
+ --output_dir logs_and_outputs/gen_script_superni_order2_t5_small_inflora/outputs/15-task875_emotion_classification \
680
+ --per_device_train_batch_size 16 \
681
+ --per_device_eval_batch_size 8 \
682
+ --gradient_accumulation_steps 2 \
683
+ --learning_rate 0.0003 \
684
+ --num_train_epochs 100 \
685
+ --run_name gen_script_superni_order2_t5_small_inflora \
686
+ --max_source_length 512 \
687
+ --max_target_length 50 \
688
+ --generation_max_length 50 \
689
+ --add_task_name False \
690
+ --add_dataset_name False \
691
+ --overwrite_output_dir \
692
+ --overwrite_cache \
693
+ --lr_scheduler_type constant \
694
+ --warmup_steps 0 \
695
+ --logging_strategy steps \
696
+ --logging_steps 10 \
697
+ --metric_for_best_model eval_rougeL_for_task875_emotion_classification \
698
+ --evaluation_strategy steps \
699
+ --save_strategy steps \
700
+ --save_total_limit 1 \
701
+ --load_best_model_at_end \
702
+ --lora_r 4 \
703
+ --lora_alpha 32 \
704
+ --lora_dropout 0.0 \
705
+ --add_instruction_replay \
706
+ --data_replay_freq -1 \
707
+ --replay_after_n_epoch 0 \
708
+ --model_name inflora \
709
+ --threshold 0.995 \
710
+ --kl_ratio 0.5 \
711
+ --attn_temperature 1
712
+
713
+ python score.py gen_script_superni_order2_t5_small_inflora gen_script_superni_order2_t5_small_inflora