natmin322 commited on
Commit
915a112
·
1 Parent(s): bb4c9d9

fix: pass attention_mask directly to model.generate(), not via GenerationConfig

Browse files
fix_attention_mask_gen.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fix: attention_mask is incorrectly added to gen_kwargs before GenerationConfig,
3
+ but GenerationConfig does not accept attention_mask. It must be extracted and
4
+ passed directly to model.generate(), just like the synced_gpus fix.
5
+ """
6
+ import os, re
7
+
8
+ REPOS = ["root_gainlora/src", "improve_gainlora/src"]
9
+
10
+ # Pattern to find and fix
11
+ OLD_BLOCK = (
12
+ ' if "attention_mask" in inputs:\n'
13
+ ' gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)\n'
14
+ '\n'
15
+ ' generation_config = GenerationConfig(**gen_kwargs)\n'
16
+ )
17
+ NEW_BLOCK = (
18
+ ' attention_mask = inputs.get("attention_mask", None)\n'
19
+ '\n'
20
+ ' generation_config = GenerationConfig(**gen_kwargs)\n'
21
+ )
22
+
23
+ ALREADY = 'attention_mask = inputs.get("attention_mask", None)'
24
+
25
+ # All three model.generate() patterns that need attention_mask added
26
+ # Pattern A: encoder-decoder branch (first if)
27
+ OLD_GEN_A = (
28
+ ' generated_tokens = self.model.generate(\n'
29
+ ' input_ids=generation_inputs, \n'
30
+ ' generation_config=generation_config,\n'
31
+ ' synced_gpus=synced_gpus,\n'
32
+ ' )\n'
33
+ )
34
+ NEW_GEN_A = (
35
+ ' generated_tokens = self.model.generate(\n'
36
+ ' input_ids=generation_inputs, \n'
37
+ ' generation_config=generation_config,\n'
38
+ ' attention_mask=attention_mask,\n'
39
+ ' synced_gpus=synced_gpus,\n'
40
+ ' )\n'
41
+ )
42
+
43
+ # Pattern B: LLaMA branch with input_ids_wo_label
44
+ OLD_GEN_B = (
45
+ ' generated_tokens = self.model.generate(\n'
46
+ ' input_ids=generation_inputs,\n'
47
+ ' input_ids_wo_label=inputs["input_ids_wo_label"],\n'
48
+ ' generation_config=generation_config,\n'
49
+ ' synced_gpus=synced_gpus,\n'
50
+ ' )\n'
51
+ )
52
+ NEW_GEN_B = (
53
+ ' generated_tokens = self.model.generate(\n'
54
+ ' input_ids=generation_inputs,\n'
55
+ ' input_ids_wo_label=inputs["input_ids_wo_label"],\n'
56
+ ' generation_config=generation_config,\n'
57
+ ' attention_mask=attention_mask,\n'
58
+ ' synced_gpus=synced_gpus,\n'
59
+ ' )\n'
60
+ )
61
+
62
+ # Pattern C: T5 (else branch, no input_ids_wo_label)
63
+ OLD_GEN_C = (
64
+ ' generated_tokens = self.model.generate(\n'
65
+ ' input_ids=generation_inputs,\n'
66
+ ' generation_config=generation_config,\n'
67
+ ' synced_gpus=synced_gpus,\n'
68
+ ' )\n'
69
+ )
70
+ NEW_GEN_C = (
71
+ ' generated_tokens = self.model.generate(\n'
72
+ ' input_ids=generation_inputs,\n'
73
+ ' generation_config=generation_config,\n'
74
+ ' attention_mask=attention_mask,\n'
75
+ ' synced_gpus=synced_gpus,\n'
76
+ ' )\n'
77
+ )
78
+
79
+ for repo in REPOS:
80
+ for fname in sorted(os.listdir(repo)):
81
+ if not fname.startswith("cl_trainer_") or not fname.endswith(".py"):
82
+ continue
83
+ fpath = os.path.join(repo, fname)
84
+ with open(fpath) as f:
85
+ src = f.read()
86
+ if ALREADY in src:
87
+ print(f"SKIP (already fixed): {fpath}")
88
+ continue
89
+ if OLD_BLOCK not in src:
90
+ print(f"SKIP (no old block): {fpath}")
91
+ continue
92
+ new_src = src.replace(OLD_BLOCK, NEW_BLOCK, 1)
93
+ new_src = new_src.replace(OLD_GEN_A, NEW_GEN_A)
94
+ new_src = new_src.replace(OLD_GEN_B, NEW_GEN_B)
95
+ new_src = new_src.replace(OLD_GEN_C, NEW_GEN_C)
96
+ with open(fpath, "w") as f:
97
+ f.write(new_src)
98
+ print(f"FIXED: {fpath}")
99
+
100
+ print("Done.")
fix_attention_mask_gen2.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fix v2: For all files where attention_mask is still in gen_kwargs before GenerationConfig.
3
+ Uses regex to handle all ordering variants.
4
+ """
5
+ import os, re
6
+
7
+ REPOS = ["root_gainlora/src", "improve_gainlora/src"]
8
+ OLD_FLAG = 'gen_kwargs["attention_mask"]'
9
+ ALREADY = 'attention_mask = inputs.get("attention_mask", None)'
10
+
11
+ def fix_file(fpath):
12
+ with open(fpath) as f:
13
+ src = f.read()
14
+
15
+ if ALREADY in src:
16
+ print(f"SKIP (already): {fpath}")
17
+ return
18
+ if OLD_FLAG not in src:
19
+ print(f"SKIP (no flag): {fpath}")
20
+ return
21
+
22
+ # Step 1: Replace the "gen_kwargs["attention_mask"] = ..." block with extraction
23
+ step1 = re.sub(
24
+ r' if "attention_mask" in inputs:\n gen_kwargs\["attention_mask"\] = inputs\.get\("attention_mask", None\)\n\n',
25
+ ' attention_mask = inputs.get("attention_mask", None)\n\n',
26
+ src,
27
+ )
28
+ if step1 == src:
29
+ # Variant without blank line after
30
+ step1 = re.sub(
31
+ r' if "attention_mask" in inputs:\n gen_kwargs\["attention_mask"\] = inputs\.get\("attention_mask", None\)\n',
32
+ ' attention_mask = inputs.get("attention_mask", None)\n',
33
+ src,
34
+ )
35
+
36
+ # Step 2: Add attention_mask= to every model.generate() call that doesn't have it
37
+ result = re.sub(
38
+ r'(self\.model\.generate\(\n(?:(?!attention_mask)(?!synced_gpus)[^\n]*\n)*?)(\s*synced_gpus=synced_gpus,\n\s*\))',
39
+ r'\1 attention_mask=attention_mask,\n\2',
40
+ step1,
41
+ )
42
+
43
+ if result == src:
44
+ print(f"WARNING: no change for {fpath}")
45
+ return
46
+
47
+ with open(fpath, "w") as f:
48
+ f.write(result)
49
+ print(f"FIXED: {fpath}")
50
+
51
+ for repo in REPOS:
52
+ for fname in sorted(os.listdir(repo)):
53
+ if not fname.startswith("cl_trainer_") or not fname.endswith(".py"):
54
+ continue
55
+ fix_file(os.path.join(repo, fname))
56
+
57
+ print("Done.")
improve_gainlora/src/cl_trainer_gainlora_inflora.py CHANGED
@@ -943,8 +943,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
943
 
944
  gen_kwargs["synced_gpus"] = False
945
 
946
- if "attention_mask" in inputs:
947
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
948
 
949
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
950
  generation_config = GenerationConfig(**gen_kwargs)
@@ -958,6 +957,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
958
  generated_tokens = self.model.generate(
959
  input_ids=generation_inputs,
960
  generation_config=generation_config,
 
961
  synced_gpus=synced_gpus,
962
  )
963
  else:
@@ -968,6 +968,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
968
  input_ids=generation_inputs,
969
  input_ids_wo_label=inputs["input_ids_wo_label"],
970
  generation_config=generation_config,
 
971
  synced_gpus=synced_gpus,
972
  )
973
 
@@ -975,6 +976,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
975
  generated_tokens = self.model.generate(
976
  input_ids=generation_inputs,
977
  generation_config=generation_config,
 
978
  synced_gpus=synced_gpus,
979
  )
980
 
 
943
 
944
  gen_kwargs["synced_gpus"] = False
945
 
946
+ attention_mask = inputs.get("attention_mask", None)
 
947
 
948
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
949
  generation_config = GenerationConfig(**gen_kwargs)
 
957
  generated_tokens = self.model.generate(
958
  input_ids=generation_inputs,
959
  generation_config=generation_config,
960
+ attention_mask=attention_mask,
961
  synced_gpus=synced_gpus,
962
  )
963
  else:
 
968
  input_ids=generation_inputs,
969
  input_ids_wo_label=inputs["input_ids_wo_label"],
970
  generation_config=generation_config,
971
+ attention_mask=attention_mask,
972
  synced_gpus=synced_gpus,
973
  )
974
 
 
976
  generated_tokens = self.model.generate(
977
  input_ids=generation_inputs,
978
  generation_config=generation_config,
979
+ attention_mask=attention_mask,
980
  synced_gpus=synced_gpus,
981
  )
982
 
improve_gainlora/src/cl_trainer_gainlora_inflora_llama.py CHANGED
@@ -974,8 +974,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
974
 
975
  gen_kwargs["synced_gpus"] = False
976
 
977
- if "attention_mask" in inputs:
978
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
979
 
980
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
981
  generation_config = GenerationConfig(**gen_kwargs)
@@ -989,6 +988,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
989
  generated_tokens = self.model.generate(
990
  input_ids=generation_inputs,
991
  generation_config=generation_config,
 
992
  synced_gpus=synced_gpus,
993
  )
994
  else:
@@ -999,6 +999,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
999
  input_ids=generation_inputs,
1000
  input_ids_wo_label=inputs["input_ids_wo_label"],
1001
  generation_config=generation_config,
 
1002
  synced_gpus=synced_gpus,
1003
  )
1004
 
@@ -1006,6 +1007,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
1006
  generated_tokens = self.model.generate(
1007
  input_ids=generation_inputs,
1008
  generation_config=generation_config,
 
1009
  synced_gpus=synced_gpus,
1010
  )
1011
 
 
974
 
975
  gen_kwargs["synced_gpus"] = False
976
 
977
+ attention_mask = inputs.get("attention_mask", None)
 
978
 
979
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
980
  generation_config = GenerationConfig(**gen_kwargs)
 
988
  generated_tokens = self.model.generate(
989
  input_ids=generation_inputs,
990
  generation_config=generation_config,
991
+ attention_mask=attention_mask,
992
  synced_gpus=synced_gpus,
993
  )
994
  else:
 
999
  input_ids=generation_inputs,
1000
  input_ids_wo_label=inputs["input_ids_wo_label"],
1001
  generation_config=generation_config,
1002
+ attention_mask=attention_mask,
1003
  synced_gpus=synced_gpus,
1004
  )
1005
 
 
1007
  generated_tokens = self.model.generate(
1008
  input_ids=generation_inputs,
1009
  generation_config=generation_config,
1010
+ attention_mask=attention_mask,
1011
  synced_gpus=synced_gpus,
1012
  )
1013
 
improve_gainlora/src/cl_trainer_gainlora_olora.py CHANGED
@@ -982,8 +982,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
982
 
983
  gen_kwargs["synced_gpus"] = False
984
 
985
- if "attention_mask" in inputs:
986
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
987
 
988
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
989
  generation_config = GenerationConfig(**gen_kwargs)
@@ -997,6 +996,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
997
  generated_tokens = self.model.generate(
998
  input_ids=generation_inputs,
999
  generation_config=generation_config,
 
1000
  synced_gpus=synced_gpus,
1001
  )
1002
  else:
@@ -1007,6 +1007,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
1007
  input_ids=generation_inputs,
1008
  input_ids_wo_label=inputs["input_ids_wo_label"],
1009
  generation_config=generation_config,
 
1010
  synced_gpus=synced_gpus,
1011
  )
1012
 
@@ -1014,6 +1015,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
1014
  generated_tokens = self.model.generate(
1015
  input_ids=generation_inputs,
1016
  generation_config=generation_config,
 
1017
  synced_gpus=synced_gpus,
1018
  )
1019
 
 
982
 
983
  gen_kwargs["synced_gpus"] = False
984
 
985
+ attention_mask = inputs.get("attention_mask", None)
 
986
 
987
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
988
  generation_config = GenerationConfig(**gen_kwargs)
 
996
  generated_tokens = self.model.generate(
997
  input_ids=generation_inputs,
998
  generation_config=generation_config,
999
+ attention_mask=attention_mask,
1000
  synced_gpus=synced_gpus,
1001
  )
1002
  else:
 
1007
  input_ids=generation_inputs,
1008
  input_ids_wo_label=inputs["input_ids_wo_label"],
1009
  generation_config=generation_config,
1010
+ attention_mask=attention_mask,
1011
  synced_gpus=synced_gpus,
1012
  )
1013
 
 
1015
  generated_tokens = self.model.generate(
1016
  input_ids=generation_inputs,
1017
  generation_config=generation_config,
1018
+ attention_mask=attention_mask,
1019
  synced_gpus=synced_gpus,
1020
  )
1021
 
improve_gainlora/src/cl_trainer_gainlora_olora_llama.py CHANGED
@@ -965,8 +965,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
965
 
966
  gen_kwargs["synced_gpus"] = False
967
 
968
- if "attention_mask" in inputs:
969
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
970
 
971
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
972
  generation_config = GenerationConfig(**gen_kwargs)
@@ -980,6 +979,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
980
  generated_tokens = self.model.generate(
981
  input_ids=generation_inputs,
982
  generation_config=generation_config,
 
983
  synced_gpus=synced_gpus,
984
  )
985
  else:
@@ -990,6 +990,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
990
  input_ids=generation_inputs,
991
  input_ids_wo_label=inputs["input_ids_wo_label"],
992
  generation_config=generation_config,
 
993
  synced_gpus=synced_gpus,
994
  )
995
 
@@ -997,6 +998,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
997
  generated_tokens = self.model.generate(
998
  input_ids=generation_inputs,
999
  generation_config=generation_config,
 
1000
  synced_gpus=synced_gpus,
1001
  )
1002
 
 
965
 
966
  gen_kwargs["synced_gpus"] = False
967
 
968
+ attention_mask = inputs.get("attention_mask", None)
 
969
 
970
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
971
  generation_config = GenerationConfig(**gen_kwargs)
 
979
  generated_tokens = self.model.generate(
980
  input_ids=generation_inputs,
981
  generation_config=generation_config,
982
+ attention_mask=attention_mask,
983
  synced_gpus=synced_gpus,
984
  )
985
  else:
 
990
  input_ids=generation_inputs,
991
  input_ids_wo_label=inputs["input_ids_wo_label"],
992
  generation_config=generation_config,
993
+ attention_mask=attention_mask,
994
  synced_gpus=synced_gpus,
995
  )
996
 
 
998
  generated_tokens = self.model.generate(
999
  input_ids=generation_inputs,
1000
  generation_config=generation_config,
1001
+ attention_mask=attention_mask,
1002
  synced_gpus=synced_gpus,
1003
  )
1004
 
improve_gainlora/src/cl_trainer_inflora.py CHANGED
@@ -738,8 +738,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
738
 
739
  gen_kwargs["synced_gpus"] = False
740
 
741
- if "attention_mask" in inputs:
742
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
743
 
744
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
745
  generation_config = GenerationConfig(**gen_kwargs)
@@ -753,6 +752,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
753
  generated_tokens = self.model.generate(
754
  input_ids=generation_inputs,
755
  generation_config=generation_config,
 
756
  synced_gpus=synced_gpus,
757
  )
758
  else:
@@ -763,6 +763,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
763
  input_ids=generation_inputs,
764
  input_ids_wo_label=inputs["input_ids_wo_label"],
765
  generation_config=generation_config,
 
766
  synced_gpus=synced_gpus,
767
  )
768
 
@@ -770,6 +771,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
770
  generated_tokens = self.model.generate(
771
  input_ids=generation_inputs,
772
  generation_config=generation_config,
 
773
  synced_gpus=synced_gpus,
774
  )
775
 
 
738
 
739
  gen_kwargs["synced_gpus"] = False
740
 
741
+ attention_mask = inputs.get("attention_mask", None)
 
742
 
743
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
744
  generation_config = GenerationConfig(**gen_kwargs)
 
752
  generated_tokens = self.model.generate(
753
  input_ids=generation_inputs,
754
  generation_config=generation_config,
755
+ attention_mask=attention_mask,
756
  synced_gpus=synced_gpus,
757
  )
758
  else:
 
763
  input_ids=generation_inputs,
764
  input_ids_wo_label=inputs["input_ids_wo_label"],
765
  generation_config=generation_config,
766
+ attention_mask=attention_mask,
767
  synced_gpus=synced_gpus,
768
  )
769
 
 
771
  generated_tokens = self.model.generate(
772
  input_ids=generation_inputs,
773
  generation_config=generation_config,
774
+ attention_mask=attention_mask,
775
  synced_gpus=synced_gpus,
776
  )
777
 
improve_gainlora/src/cl_trainer_inflora_llama.py CHANGED
@@ -765,8 +765,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
765
 
766
  gen_kwargs["synced_gpus"] = False
767
 
768
- if "attention_mask" in inputs:
769
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
770
 
771
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
772
  generation_config = GenerationConfig(**gen_kwargs)
@@ -780,6 +779,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
780
  generated_tokens = self.model.generate(
781
  input_ids=generation_inputs,
782
  generation_config=generation_config,
 
783
  synced_gpus=synced_gpus,
784
  )
785
  else:
@@ -790,6 +790,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
790
  input_ids=generation_inputs,
791
  input_ids_wo_label=inputs["input_ids_wo_label"],
792
  generation_config=generation_config,
 
793
  synced_gpus=synced_gpus,
794
  )
795
 
@@ -797,6 +798,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
797
  generated_tokens = self.model.generate(
798
  input_ids=generation_inputs,
799
  generation_config=generation_config,
 
800
  synced_gpus=synced_gpus,
801
  )
802
 
 
765
 
766
  gen_kwargs["synced_gpus"] = False
767
 
768
+ attention_mask = inputs.get("attention_mask", None)
 
769
 
770
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
771
  generation_config = GenerationConfig(**gen_kwargs)
 
779
  generated_tokens = self.model.generate(
780
  input_ids=generation_inputs,
781
  generation_config=generation_config,
782
+ attention_mask=attention_mask,
783
  synced_gpus=synced_gpus,
784
  )
785
  else:
 
790
  input_ids=generation_inputs,
791
  input_ids_wo_label=inputs["input_ids_wo_label"],
792
  generation_config=generation_config,
793
+ attention_mask=attention_mask,
794
  synced_gpus=synced_gpus,
795
  )
796
 
 
798
  generated_tokens = self.model.generate(
799
  input_ids=generation_inputs,
800
  generation_config=generation_config,
801
+ attention_mask=attention_mask,
802
  synced_gpus=synced_gpus,
803
  )
804
 
improve_gainlora/src/cl_trainer_olora.py CHANGED
@@ -580,8 +580,7 @@ class OLoRATrainer(Seq2SeqTrainer):
580
 
581
  gen_kwargs["synced_gpus"] = False
582
 
583
- if "attention_mask" in inputs:
584
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
585
 
586
  generation_config = GenerationConfig(**gen_kwargs)
587
 
 
580
 
581
  gen_kwargs["synced_gpus"] = False
582
 
583
+ attention_mask = inputs.get("attention_mask", None)
 
584
 
585
  generation_config = GenerationConfig(**gen_kwargs)
586
 
improve_gainlora/src/cl_trainer_olora_llama.py CHANGED
@@ -582,8 +582,7 @@ class OLoRATrainer(Seq2SeqTrainer):
582
 
583
  gen_kwargs["synced_gpus"] = False
584
 
585
- if "attention_mask" in inputs:
586
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
587
 
588
  generation_config = GenerationConfig(**gen_kwargs)
589
 
 
582
 
583
  gen_kwargs["synced_gpus"] = False
584
 
585
+ attention_mask = inputs.get("attention_mask", None)
 
586
 
587
  generation_config = GenerationConfig(**gen_kwargs)
588
 
improve_gainlora/src/cl_trainer_specroute.py CHANGED
@@ -585,12 +585,11 @@ class SpecRoute_Trainer(Seq2SeqTrainer):
585
  }
586
  gen_kwargs["synced_gpus"] = False
587
 
588
- if "attention_mask" in inputs:
589
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
590
 
591
  # synced_gpus and attention_mask must be passed to generate(), not GenerationConfig
592
  _synced_gpus = gen_kwargs.pop("synced_gpus", False)
593
- _attention_mask = gen_kwargs.pop("attention_mask", None)
594
 
595
  generation_config = GenerationConfig(**gen_kwargs)
596
 
 
585
  }
586
  gen_kwargs["synced_gpus"] = False
587
 
588
+ attention_mask = inputs.get("attention_mask", None)
 
589
 
590
  # synced_gpus and attention_mask must be passed to generate(), not GenerationConfig
591
  _synced_gpus = gen_kwargs.pop("synced_gpus", False)
592
+ _attention_mask = inputs.get("attention_mask", None) # from inputs, not gen_kwargs
593
 
594
  generation_config = GenerationConfig(**gen_kwargs)
595
 
root_gainlora/src/cl_trainer_gainlora_inflora.py CHANGED
@@ -952,8 +952,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
952
 
953
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
954
 
955
- if "attention_mask" in inputs:
956
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
957
 
958
  generation_config = GenerationConfig(**gen_kwargs)
959
 
@@ -966,6 +965,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
966
  generated_tokens = self.model.generate(
967
  input_ids=generation_inputs,
968
  generation_config=generation_config,
 
969
  synced_gpus=synced_gpus,
970
  )
971
  else:
@@ -976,6 +976,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
976
  input_ids=generation_inputs,
977
  input_ids_wo_label=inputs["input_ids_wo_label"],
978
  generation_config=generation_config,
 
979
  synced_gpus=synced_gpus,
980
  )
981
 
@@ -983,6 +984,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
983
  generated_tokens = self.model.generate(
984
  input_ids=generation_inputs,
985
  generation_config=generation_config,
 
986
  synced_gpus=synced_gpus,
987
  )
988
 
 
952
 
953
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
954
 
955
+ attention_mask = inputs.get("attention_mask", None)
 
956
 
957
  generation_config = GenerationConfig(**gen_kwargs)
958
 
 
965
  generated_tokens = self.model.generate(
966
  input_ids=generation_inputs,
967
  generation_config=generation_config,
968
+ attention_mask=attention_mask,
969
  synced_gpus=synced_gpus,
970
  )
971
  else:
 
976
  input_ids=generation_inputs,
977
  input_ids_wo_label=inputs["input_ids_wo_label"],
978
  generation_config=generation_config,
979
+ attention_mask=attention_mask,
980
  synced_gpus=synced_gpus,
981
  )
982
 
 
984
  generated_tokens = self.model.generate(
985
  input_ids=generation_inputs,
986
  generation_config=generation_config,
987
+ attention_mask=attention_mask,
988
  synced_gpus=synced_gpus,
989
  )
990
 
root_gainlora/src/cl_trainer_gainlora_inflora_llama.py CHANGED
@@ -975,8 +975,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
975
 
976
  gen_kwargs["synced_gpus"] = False
977
 
978
- if "attention_mask" in inputs:
979
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
980
 
981
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
982
  generation_config = GenerationConfig(**gen_kwargs)
@@ -990,6 +989,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
990
  generated_tokens = self.model.generate(
991
  input_ids=generation_inputs,
992
  generation_config=generation_config,
 
993
  synced_gpus=synced_gpus,
994
  )
995
  else:
@@ -1000,6 +1000,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
1000
  input_ids=generation_inputs,
1001
  input_ids_wo_label=inputs["input_ids_wo_label"],
1002
  generation_config=generation_config,
 
1003
  synced_gpus=synced_gpus,
1004
  )
1005
 
@@ -1007,6 +1008,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
1007
  generated_tokens = self.model.generate(
1008
  input_ids=generation_inputs,
1009
  generation_config=generation_config,
 
1010
  synced_gpus=synced_gpus,
1011
  )
1012
 
 
975
 
976
  gen_kwargs["synced_gpus"] = False
977
 
978
+ attention_mask = inputs.get("attention_mask", None)
 
979
 
980
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
981
  generation_config = GenerationConfig(**gen_kwargs)
 
989
  generated_tokens = self.model.generate(
990
  input_ids=generation_inputs,
991
  generation_config=generation_config,
992
+ attention_mask=attention_mask,
993
  synced_gpus=synced_gpus,
994
  )
995
  else:
 
1000
  input_ids=generation_inputs,
1001
  input_ids_wo_label=inputs["input_ids_wo_label"],
1002
  generation_config=generation_config,
1003
+ attention_mask=attention_mask,
1004
  synced_gpus=synced_gpus,
1005
  )
1006
 
 
1008
  generated_tokens = self.model.generate(
1009
  input_ids=generation_inputs,
1010
  generation_config=generation_config,
1011
+ attention_mask=attention_mask,
1012
  synced_gpus=synced_gpus,
1013
  )
1014
 
root_gainlora/src/cl_trainer_gainlora_olora.py CHANGED
@@ -983,8 +983,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
983
 
984
  gen_kwargs["synced_gpus"] = False
985
 
986
- if "attention_mask" in inputs:
987
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
988
 
989
  generation_config = GenerationConfig(**gen_kwargs)
990
 
 
983
 
984
  gen_kwargs["synced_gpus"] = False
985
 
986
+ attention_mask = inputs.get("attention_mask", None)
 
987
 
988
  generation_config = GenerationConfig(**gen_kwargs)
989
 
root_gainlora/src/cl_trainer_gainlora_olora_llama.py CHANGED
@@ -966,8 +966,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
966
 
967
  gen_kwargs["synced_gpus"] = False
968
 
969
- if "attention_mask" in inputs:
970
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
971
 
972
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
973
  generation_config = GenerationConfig(**gen_kwargs)
@@ -981,6 +980,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
981
  generated_tokens = self.model.generate(
982
  input_ids=generation_inputs,
983
  generation_config=generation_config,
 
984
  synced_gpus=synced_gpus,
985
  )
986
  else:
@@ -991,6 +991,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
991
  input_ids=generation_inputs,
992
  input_ids_wo_label=inputs["input_ids_wo_label"],
993
  generation_config=generation_config,
 
994
  synced_gpus=synced_gpus,
995
  )
996
 
@@ -998,6 +999,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
998
  generated_tokens = self.model.generate(
999
  input_ids=generation_inputs,
1000
  generation_config=generation_config,
 
1001
  synced_gpus=synced_gpus,
1002
  )
1003
 
 
966
 
967
  gen_kwargs["synced_gpus"] = False
968
 
969
+ attention_mask = inputs.get("attention_mask", None)
 
970
 
971
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
972
  generation_config = GenerationConfig(**gen_kwargs)
 
980
  generated_tokens = self.model.generate(
981
  input_ids=generation_inputs,
982
  generation_config=generation_config,
983
+ attention_mask=attention_mask,
984
  synced_gpus=synced_gpus,
985
  )
986
  else:
 
991
  input_ids=generation_inputs,
992
  input_ids_wo_label=inputs["input_ids_wo_label"],
993
  generation_config=generation_config,
994
+ attention_mask=attention_mask,
995
  synced_gpus=synced_gpus,
996
  )
997
 
 
999
  generated_tokens = self.model.generate(
1000
  input_ids=generation_inputs,
1001
  generation_config=generation_config,
1002
+ attention_mask=attention_mask,
1003
  synced_gpus=synced_gpus,
1004
  )
1005
 
root_gainlora/src/cl_trainer_inflora.py CHANGED
@@ -739,8 +739,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
739
 
740
  gen_kwargs["synced_gpus"] = False
741
 
742
- if "attention_mask" in inputs:
743
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
744
 
745
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
746
  generation_config = GenerationConfig(**gen_kwargs)
@@ -754,6 +753,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
754
  generated_tokens = self.model.generate(
755
  input_ids=generation_inputs,
756
  generation_config=generation_config,
 
757
  synced_gpus=synced_gpus,
758
  )
759
  else:
@@ -764,6 +764,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
764
  input_ids=generation_inputs,
765
  input_ids_wo_label=inputs["input_ids_wo_label"],
766
  generation_config=generation_config,
 
767
  synced_gpus=synced_gpus,
768
  )
769
 
@@ -771,6 +772,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
771
  generated_tokens = self.model.generate(
772
  input_ids=generation_inputs,
773
  generation_config=generation_config,
 
774
  synced_gpus=synced_gpus,
775
  )
776
 
 
739
 
740
  gen_kwargs["synced_gpus"] = False
741
 
742
+ attention_mask = inputs.get("attention_mask", None)
 
743
 
744
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
745
  generation_config = GenerationConfig(**gen_kwargs)
 
753
  generated_tokens = self.model.generate(
754
  input_ids=generation_inputs,
755
  generation_config=generation_config,
756
+ attention_mask=attention_mask,
757
  synced_gpus=synced_gpus,
758
  )
759
  else:
 
764
  input_ids=generation_inputs,
765
  input_ids_wo_label=inputs["input_ids_wo_label"],
766
  generation_config=generation_config,
767
+ attention_mask=attention_mask,
768
  synced_gpus=synced_gpus,
769
  )
770
 
 
772
  generated_tokens = self.model.generate(
773
  input_ids=generation_inputs,
774
  generation_config=generation_config,
775
+ attention_mask=attention_mask,
776
  synced_gpus=synced_gpus,
777
  )
778
 
root_gainlora/src/cl_trainer_inflora_llama.py CHANGED
@@ -766,8 +766,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
766
 
767
  gen_kwargs["synced_gpus"] = False
768
 
769
- if "attention_mask" in inputs:
770
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
771
 
772
  generation_config = GenerationConfig(**gen_kwargs)
773
 
 
766
 
767
  gen_kwargs["synced_gpus"] = False
768
 
769
+ attention_mask = inputs.get("attention_mask", None)
 
770
 
771
  generation_config = GenerationConfig(**gen_kwargs)
772
 
root_gainlora/src/cl_trainer_olora.py CHANGED
@@ -581,8 +581,7 @@ class OLoRATrainer(Seq2SeqTrainer):
581
 
582
  gen_kwargs["synced_gpus"] = False
583
 
584
- if "attention_mask" in inputs:
585
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
586
 
587
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
588
  generation_config = GenerationConfig(**gen_kwargs)
@@ -596,6 +595,7 @@ class OLoRATrainer(Seq2SeqTrainer):
596
  generated_tokens = self.model.generate(
597
  input_ids=generation_inputs,
598
  generation_config=generation_config,
 
599
  synced_gpus=synced_gpus,
600
  )
601
  else:
@@ -606,6 +606,7 @@ class OLoRATrainer(Seq2SeqTrainer):
606
  input_ids=generation_inputs,
607
  input_ids_wo_label=inputs["input_ids_wo_label"],
608
  generation_config=generation_config,
 
609
  synced_gpus=synced_gpus,
610
  )
611
 
@@ -613,6 +614,7 @@ class OLoRATrainer(Seq2SeqTrainer):
613
  generated_tokens = self.model.generate(
614
  input_ids=generation_inputs,
615
  generation_config=generation_config,
 
616
  synced_gpus=synced_gpus,
617
  )
618
 
 
581
 
582
  gen_kwargs["synced_gpus"] = False
583
 
584
+ attention_mask = inputs.get("attention_mask", None)
 
585
 
586
  synced_gpus = gen_kwargs.pop("synced_gpus", False)
587
  generation_config = GenerationConfig(**gen_kwargs)
 
595
  generated_tokens = self.model.generate(
596
  input_ids=generation_inputs,
597
  generation_config=generation_config,
598
+ attention_mask=attention_mask,
599
  synced_gpus=synced_gpus,
600
  )
601
  else:
 
606
  input_ids=generation_inputs,
607
  input_ids_wo_label=inputs["input_ids_wo_label"],
608
  generation_config=generation_config,
609
+ attention_mask=attention_mask,
610
  synced_gpus=synced_gpus,
611
  )
612
 
 
614
  generated_tokens = self.model.generate(
615
  input_ids=generation_inputs,
616
  generation_config=generation_config,
617
+ attention_mask=attention_mask,
618
  synced_gpus=synced_gpus,
619
  )
620
 
root_gainlora/src/cl_trainer_olora_llama.py CHANGED
@@ -583,8 +583,7 @@ class OLoRATrainer(Seq2SeqTrainer):
583
 
584
  gen_kwargs["synced_gpus"] = False
585
 
586
- if "attention_mask" in inputs:
587
- gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
588
 
589
  generation_config = GenerationConfig(**gen_kwargs)
590
 
 
583
 
584
  gen_kwargs["synced_gpus"] = False
585
 
586
+ attention_mask = inputs.get("attention_mask", None)
 
587
 
588
  generation_config = GenerationConfig(**gen_kwargs)
589