fix: pass attention_mask directly to model.generate(), not via GenerationConfig
Browse files- fix_attention_mask_gen.py +100 -0
- fix_attention_mask_gen2.py +57 -0
- improve_gainlora/src/cl_trainer_gainlora_inflora.py +4 -2
- improve_gainlora/src/cl_trainer_gainlora_inflora_llama.py +4 -2
- improve_gainlora/src/cl_trainer_gainlora_olora.py +4 -2
- improve_gainlora/src/cl_trainer_gainlora_olora_llama.py +4 -2
- improve_gainlora/src/cl_trainer_inflora.py +4 -2
- improve_gainlora/src/cl_trainer_inflora_llama.py +4 -2
- improve_gainlora/src/cl_trainer_olora.py +1 -2
- improve_gainlora/src/cl_trainer_olora_llama.py +1 -2
- improve_gainlora/src/cl_trainer_specroute.py +2 -3
- root_gainlora/src/cl_trainer_gainlora_inflora.py +4 -2
- root_gainlora/src/cl_trainer_gainlora_inflora_llama.py +4 -2
- root_gainlora/src/cl_trainer_gainlora_olora.py +1 -2
- root_gainlora/src/cl_trainer_gainlora_olora_llama.py +4 -2
- root_gainlora/src/cl_trainer_inflora.py +4 -2
- root_gainlora/src/cl_trainer_inflora_llama.py +1 -2
- root_gainlora/src/cl_trainer_olora.py +4 -2
- root_gainlora/src/cl_trainer_olora_llama.py +1 -2
fix_attention_mask_gen.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fix: attention_mask is incorrectly added to gen_kwargs before GenerationConfig,
|
| 3 |
+
but GenerationConfig does not accept attention_mask. It must be extracted and
|
| 4 |
+
passed directly to model.generate(), just like the synced_gpus fix.
|
| 5 |
+
"""
|
| 6 |
+
import os, re
|
| 7 |
+
|
| 8 |
+
REPOS = ["root_gainlora/src", "improve_gainlora/src"]
|
| 9 |
+
|
| 10 |
+
# Pattern to find and fix
|
| 11 |
+
OLD_BLOCK = (
|
| 12 |
+
' if "attention_mask" in inputs:\n'
|
| 13 |
+
' gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)\n'
|
| 14 |
+
'\n'
|
| 15 |
+
' generation_config = GenerationConfig(**gen_kwargs)\n'
|
| 16 |
+
)
|
| 17 |
+
NEW_BLOCK = (
|
| 18 |
+
' attention_mask = inputs.get("attention_mask", None)\n'
|
| 19 |
+
'\n'
|
| 20 |
+
' generation_config = GenerationConfig(**gen_kwargs)\n'
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
ALREADY = 'attention_mask = inputs.get("attention_mask", None)'
|
| 24 |
+
|
| 25 |
+
# All three model.generate() patterns that need attention_mask added
|
| 26 |
+
# Pattern A: encoder-decoder branch (first if)
|
| 27 |
+
OLD_GEN_A = (
|
| 28 |
+
' generated_tokens = self.model.generate(\n'
|
| 29 |
+
' input_ids=generation_inputs, \n'
|
| 30 |
+
' generation_config=generation_config,\n'
|
| 31 |
+
' synced_gpus=synced_gpus,\n'
|
| 32 |
+
' )\n'
|
| 33 |
+
)
|
| 34 |
+
NEW_GEN_A = (
|
| 35 |
+
' generated_tokens = self.model.generate(\n'
|
| 36 |
+
' input_ids=generation_inputs, \n'
|
| 37 |
+
' generation_config=generation_config,\n'
|
| 38 |
+
' attention_mask=attention_mask,\n'
|
| 39 |
+
' synced_gpus=synced_gpus,\n'
|
| 40 |
+
' )\n'
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
# Pattern B: LLaMA branch with input_ids_wo_label
|
| 44 |
+
OLD_GEN_B = (
|
| 45 |
+
' generated_tokens = self.model.generate(\n'
|
| 46 |
+
' input_ids=generation_inputs,\n'
|
| 47 |
+
' input_ids_wo_label=inputs["input_ids_wo_label"],\n'
|
| 48 |
+
' generation_config=generation_config,\n'
|
| 49 |
+
' synced_gpus=synced_gpus,\n'
|
| 50 |
+
' )\n'
|
| 51 |
+
)
|
| 52 |
+
NEW_GEN_B = (
|
| 53 |
+
' generated_tokens = self.model.generate(\n'
|
| 54 |
+
' input_ids=generation_inputs,\n'
|
| 55 |
+
' input_ids_wo_label=inputs["input_ids_wo_label"],\n'
|
| 56 |
+
' generation_config=generation_config,\n'
|
| 57 |
+
' attention_mask=attention_mask,\n'
|
| 58 |
+
' synced_gpus=synced_gpus,\n'
|
| 59 |
+
' )\n'
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Pattern C: T5 (else branch, no input_ids_wo_label)
|
| 63 |
+
OLD_GEN_C = (
|
| 64 |
+
' generated_tokens = self.model.generate(\n'
|
| 65 |
+
' input_ids=generation_inputs,\n'
|
| 66 |
+
' generation_config=generation_config,\n'
|
| 67 |
+
' synced_gpus=synced_gpus,\n'
|
| 68 |
+
' )\n'
|
| 69 |
+
)
|
| 70 |
+
NEW_GEN_C = (
|
| 71 |
+
' generated_tokens = self.model.generate(\n'
|
| 72 |
+
' input_ids=generation_inputs,\n'
|
| 73 |
+
' generation_config=generation_config,\n'
|
| 74 |
+
' attention_mask=attention_mask,\n'
|
| 75 |
+
' synced_gpus=synced_gpus,\n'
|
| 76 |
+
' )\n'
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
for repo in REPOS:
|
| 80 |
+
for fname in sorted(os.listdir(repo)):
|
| 81 |
+
if not fname.startswith("cl_trainer_") or not fname.endswith(".py"):
|
| 82 |
+
continue
|
| 83 |
+
fpath = os.path.join(repo, fname)
|
| 84 |
+
with open(fpath) as f:
|
| 85 |
+
src = f.read()
|
| 86 |
+
if ALREADY in src:
|
| 87 |
+
print(f"SKIP (already fixed): {fpath}")
|
| 88 |
+
continue
|
| 89 |
+
if OLD_BLOCK not in src:
|
| 90 |
+
print(f"SKIP (no old block): {fpath}")
|
| 91 |
+
continue
|
| 92 |
+
new_src = src.replace(OLD_BLOCK, NEW_BLOCK, 1)
|
| 93 |
+
new_src = new_src.replace(OLD_GEN_A, NEW_GEN_A)
|
| 94 |
+
new_src = new_src.replace(OLD_GEN_B, NEW_GEN_B)
|
| 95 |
+
new_src = new_src.replace(OLD_GEN_C, NEW_GEN_C)
|
| 96 |
+
with open(fpath, "w") as f:
|
| 97 |
+
f.write(new_src)
|
| 98 |
+
print(f"FIXED: {fpath}")
|
| 99 |
+
|
| 100 |
+
print("Done.")
|
fix_attention_mask_gen2.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fix v2: For all files where attention_mask is still in gen_kwargs before GenerationConfig.
|
| 3 |
+
Uses regex to handle all ordering variants.
|
| 4 |
+
"""
|
| 5 |
+
import os, re
|
| 6 |
+
|
| 7 |
+
REPOS = ["root_gainlora/src", "improve_gainlora/src"]
|
| 8 |
+
OLD_FLAG = 'gen_kwargs["attention_mask"]'
|
| 9 |
+
ALREADY = 'attention_mask = inputs.get("attention_mask", None)'
|
| 10 |
+
|
| 11 |
+
def fix_file(fpath):
|
| 12 |
+
with open(fpath) as f:
|
| 13 |
+
src = f.read()
|
| 14 |
+
|
| 15 |
+
if ALREADY in src:
|
| 16 |
+
print(f"SKIP (already): {fpath}")
|
| 17 |
+
return
|
| 18 |
+
if OLD_FLAG not in src:
|
| 19 |
+
print(f"SKIP (no flag): {fpath}")
|
| 20 |
+
return
|
| 21 |
+
|
| 22 |
+
# Step 1: Replace the "gen_kwargs["attention_mask"] = ..." block with extraction
|
| 23 |
+
step1 = re.sub(
|
| 24 |
+
r' if "attention_mask" in inputs:\n gen_kwargs\["attention_mask"\] = inputs\.get\("attention_mask", None\)\n\n',
|
| 25 |
+
' attention_mask = inputs.get("attention_mask", None)\n\n',
|
| 26 |
+
src,
|
| 27 |
+
)
|
| 28 |
+
if step1 == src:
|
| 29 |
+
# Variant without blank line after
|
| 30 |
+
step1 = re.sub(
|
| 31 |
+
r' if "attention_mask" in inputs:\n gen_kwargs\["attention_mask"\] = inputs\.get\("attention_mask", None\)\n',
|
| 32 |
+
' attention_mask = inputs.get("attention_mask", None)\n',
|
| 33 |
+
src,
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# Step 2: Add attention_mask= to every model.generate() call that doesn't have it
|
| 37 |
+
result = re.sub(
|
| 38 |
+
r'(self\.model\.generate\(\n(?:(?!attention_mask)(?!synced_gpus)[^\n]*\n)*?)(\s*synced_gpus=synced_gpus,\n\s*\))',
|
| 39 |
+
r'\1 attention_mask=attention_mask,\n\2',
|
| 40 |
+
step1,
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
if result == src:
|
| 44 |
+
print(f"WARNING: no change for {fpath}")
|
| 45 |
+
return
|
| 46 |
+
|
| 47 |
+
with open(fpath, "w") as f:
|
| 48 |
+
f.write(result)
|
| 49 |
+
print(f"FIXED: {fpath}")
|
| 50 |
+
|
| 51 |
+
for repo in REPOS:
|
| 52 |
+
for fname in sorted(os.listdir(repo)):
|
| 53 |
+
if not fname.startswith("cl_trainer_") or not fname.endswith(".py"):
|
| 54 |
+
continue
|
| 55 |
+
fix_file(os.path.join(repo, fname))
|
| 56 |
+
|
| 57 |
+
print("Done.")
|
improve_gainlora/src/cl_trainer_gainlora_inflora.py
CHANGED
|
@@ -943,8 +943,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 943 |
|
| 944 |
gen_kwargs["synced_gpus"] = False
|
| 945 |
|
| 946 |
-
|
| 947 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 948 |
|
| 949 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 950 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
@@ -958,6 +957,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 958 |
generated_tokens = self.model.generate(
|
| 959 |
input_ids=generation_inputs,
|
| 960 |
generation_config=generation_config,
|
|
|
|
| 961 |
synced_gpus=synced_gpus,
|
| 962 |
)
|
| 963 |
else:
|
|
@@ -968,6 +968,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 968 |
input_ids=generation_inputs,
|
| 969 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 970 |
generation_config=generation_config,
|
|
|
|
| 971 |
synced_gpus=synced_gpus,
|
| 972 |
)
|
| 973 |
|
|
@@ -975,6 +976,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 975 |
generated_tokens = self.model.generate(
|
| 976 |
input_ids=generation_inputs,
|
| 977 |
generation_config=generation_config,
|
|
|
|
| 978 |
synced_gpus=synced_gpus,
|
| 979 |
)
|
| 980 |
|
|
|
|
| 943 |
|
| 944 |
gen_kwargs["synced_gpus"] = False
|
| 945 |
|
| 946 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 947 |
|
| 948 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 949 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
|
|
| 957 |
generated_tokens = self.model.generate(
|
| 958 |
input_ids=generation_inputs,
|
| 959 |
generation_config=generation_config,
|
| 960 |
+
attention_mask=attention_mask,
|
| 961 |
synced_gpus=synced_gpus,
|
| 962 |
)
|
| 963 |
else:
|
|
|
|
| 968 |
input_ids=generation_inputs,
|
| 969 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 970 |
generation_config=generation_config,
|
| 971 |
+
attention_mask=attention_mask,
|
| 972 |
synced_gpus=synced_gpus,
|
| 973 |
)
|
| 974 |
|
|
|
|
| 976 |
generated_tokens = self.model.generate(
|
| 977 |
input_ids=generation_inputs,
|
| 978 |
generation_config=generation_config,
|
| 979 |
+
attention_mask=attention_mask,
|
| 980 |
synced_gpus=synced_gpus,
|
| 981 |
)
|
| 982 |
|
improve_gainlora/src/cl_trainer_gainlora_inflora_llama.py
CHANGED
|
@@ -974,8 +974,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 974 |
|
| 975 |
gen_kwargs["synced_gpus"] = False
|
| 976 |
|
| 977 |
-
|
| 978 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 979 |
|
| 980 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 981 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
@@ -989,6 +988,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 989 |
generated_tokens = self.model.generate(
|
| 990 |
input_ids=generation_inputs,
|
| 991 |
generation_config=generation_config,
|
|
|
|
| 992 |
synced_gpus=synced_gpus,
|
| 993 |
)
|
| 994 |
else:
|
|
@@ -999,6 +999,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 999 |
input_ids=generation_inputs,
|
| 1000 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 1001 |
generation_config=generation_config,
|
|
|
|
| 1002 |
synced_gpus=synced_gpus,
|
| 1003 |
)
|
| 1004 |
|
|
@@ -1006,6 +1007,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 1006 |
generated_tokens = self.model.generate(
|
| 1007 |
input_ids=generation_inputs,
|
| 1008 |
generation_config=generation_config,
|
|
|
|
| 1009 |
synced_gpus=synced_gpus,
|
| 1010 |
)
|
| 1011 |
|
|
|
|
| 974 |
|
| 975 |
gen_kwargs["synced_gpus"] = False
|
| 976 |
|
| 977 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 978 |
|
| 979 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 980 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
|
|
| 988 |
generated_tokens = self.model.generate(
|
| 989 |
input_ids=generation_inputs,
|
| 990 |
generation_config=generation_config,
|
| 991 |
+
attention_mask=attention_mask,
|
| 992 |
synced_gpus=synced_gpus,
|
| 993 |
)
|
| 994 |
else:
|
|
|
|
| 999 |
input_ids=generation_inputs,
|
| 1000 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 1001 |
generation_config=generation_config,
|
| 1002 |
+
attention_mask=attention_mask,
|
| 1003 |
synced_gpus=synced_gpus,
|
| 1004 |
)
|
| 1005 |
|
|
|
|
| 1007 |
generated_tokens = self.model.generate(
|
| 1008 |
input_ids=generation_inputs,
|
| 1009 |
generation_config=generation_config,
|
| 1010 |
+
attention_mask=attention_mask,
|
| 1011 |
synced_gpus=synced_gpus,
|
| 1012 |
)
|
| 1013 |
|
improve_gainlora/src/cl_trainer_gainlora_olora.py
CHANGED
|
@@ -982,8 +982,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 982 |
|
| 983 |
gen_kwargs["synced_gpus"] = False
|
| 984 |
|
| 985 |
-
|
| 986 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 987 |
|
| 988 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 989 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
@@ -997,6 +996,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 997 |
generated_tokens = self.model.generate(
|
| 998 |
input_ids=generation_inputs,
|
| 999 |
generation_config=generation_config,
|
|
|
|
| 1000 |
synced_gpus=synced_gpus,
|
| 1001 |
)
|
| 1002 |
else:
|
|
@@ -1007,6 +1007,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 1007 |
input_ids=generation_inputs,
|
| 1008 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 1009 |
generation_config=generation_config,
|
|
|
|
| 1010 |
synced_gpus=synced_gpus,
|
| 1011 |
)
|
| 1012 |
|
|
@@ -1014,6 +1015,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 1014 |
generated_tokens = self.model.generate(
|
| 1015 |
input_ids=generation_inputs,
|
| 1016 |
generation_config=generation_config,
|
|
|
|
| 1017 |
synced_gpus=synced_gpus,
|
| 1018 |
)
|
| 1019 |
|
|
|
|
| 982 |
|
| 983 |
gen_kwargs["synced_gpus"] = False
|
| 984 |
|
| 985 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 986 |
|
| 987 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 988 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
|
|
| 996 |
generated_tokens = self.model.generate(
|
| 997 |
input_ids=generation_inputs,
|
| 998 |
generation_config=generation_config,
|
| 999 |
+
attention_mask=attention_mask,
|
| 1000 |
synced_gpus=synced_gpus,
|
| 1001 |
)
|
| 1002 |
else:
|
|
|
|
| 1007 |
input_ids=generation_inputs,
|
| 1008 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 1009 |
generation_config=generation_config,
|
| 1010 |
+
attention_mask=attention_mask,
|
| 1011 |
synced_gpus=synced_gpus,
|
| 1012 |
)
|
| 1013 |
|
|
|
|
| 1015 |
generated_tokens = self.model.generate(
|
| 1016 |
input_ids=generation_inputs,
|
| 1017 |
generation_config=generation_config,
|
| 1018 |
+
attention_mask=attention_mask,
|
| 1019 |
synced_gpus=synced_gpus,
|
| 1020 |
)
|
| 1021 |
|
improve_gainlora/src/cl_trainer_gainlora_olora_llama.py
CHANGED
|
@@ -965,8 +965,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 965 |
|
| 966 |
gen_kwargs["synced_gpus"] = False
|
| 967 |
|
| 968 |
-
|
| 969 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 970 |
|
| 971 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 972 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
@@ -980,6 +979,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 980 |
generated_tokens = self.model.generate(
|
| 981 |
input_ids=generation_inputs,
|
| 982 |
generation_config=generation_config,
|
|
|
|
| 983 |
synced_gpus=synced_gpus,
|
| 984 |
)
|
| 985 |
else:
|
|
@@ -990,6 +990,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 990 |
input_ids=generation_inputs,
|
| 991 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 992 |
generation_config=generation_config,
|
|
|
|
| 993 |
synced_gpus=synced_gpus,
|
| 994 |
)
|
| 995 |
|
|
@@ -997,6 +998,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 997 |
generated_tokens = self.model.generate(
|
| 998 |
input_ids=generation_inputs,
|
| 999 |
generation_config=generation_config,
|
|
|
|
| 1000 |
synced_gpus=synced_gpus,
|
| 1001 |
)
|
| 1002 |
|
|
|
|
| 965 |
|
| 966 |
gen_kwargs["synced_gpus"] = False
|
| 967 |
|
| 968 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 969 |
|
| 970 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 971 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
|
|
| 979 |
generated_tokens = self.model.generate(
|
| 980 |
input_ids=generation_inputs,
|
| 981 |
generation_config=generation_config,
|
| 982 |
+
attention_mask=attention_mask,
|
| 983 |
synced_gpus=synced_gpus,
|
| 984 |
)
|
| 985 |
else:
|
|
|
|
| 990 |
input_ids=generation_inputs,
|
| 991 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 992 |
generation_config=generation_config,
|
| 993 |
+
attention_mask=attention_mask,
|
| 994 |
synced_gpus=synced_gpus,
|
| 995 |
)
|
| 996 |
|
|
|
|
| 998 |
generated_tokens = self.model.generate(
|
| 999 |
input_ids=generation_inputs,
|
| 1000 |
generation_config=generation_config,
|
| 1001 |
+
attention_mask=attention_mask,
|
| 1002 |
synced_gpus=synced_gpus,
|
| 1003 |
)
|
| 1004 |
|
improve_gainlora/src/cl_trainer_inflora.py
CHANGED
|
@@ -738,8 +738,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 738 |
|
| 739 |
gen_kwargs["synced_gpus"] = False
|
| 740 |
|
| 741 |
-
|
| 742 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 743 |
|
| 744 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 745 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
@@ -753,6 +752,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 753 |
generated_tokens = self.model.generate(
|
| 754 |
input_ids=generation_inputs,
|
| 755 |
generation_config=generation_config,
|
|
|
|
| 756 |
synced_gpus=synced_gpus,
|
| 757 |
)
|
| 758 |
else:
|
|
@@ -763,6 +763,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 763 |
input_ids=generation_inputs,
|
| 764 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 765 |
generation_config=generation_config,
|
|
|
|
| 766 |
synced_gpus=synced_gpus,
|
| 767 |
)
|
| 768 |
|
|
@@ -770,6 +771,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 770 |
generated_tokens = self.model.generate(
|
| 771 |
input_ids=generation_inputs,
|
| 772 |
generation_config=generation_config,
|
|
|
|
| 773 |
synced_gpus=synced_gpus,
|
| 774 |
)
|
| 775 |
|
|
|
|
| 738 |
|
| 739 |
gen_kwargs["synced_gpus"] = False
|
| 740 |
|
| 741 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 742 |
|
| 743 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 744 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
|
|
| 752 |
generated_tokens = self.model.generate(
|
| 753 |
input_ids=generation_inputs,
|
| 754 |
generation_config=generation_config,
|
| 755 |
+
attention_mask=attention_mask,
|
| 756 |
synced_gpus=synced_gpus,
|
| 757 |
)
|
| 758 |
else:
|
|
|
|
| 763 |
input_ids=generation_inputs,
|
| 764 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 765 |
generation_config=generation_config,
|
| 766 |
+
attention_mask=attention_mask,
|
| 767 |
synced_gpus=synced_gpus,
|
| 768 |
)
|
| 769 |
|
|
|
|
| 771 |
generated_tokens = self.model.generate(
|
| 772 |
input_ids=generation_inputs,
|
| 773 |
generation_config=generation_config,
|
| 774 |
+
attention_mask=attention_mask,
|
| 775 |
synced_gpus=synced_gpus,
|
| 776 |
)
|
| 777 |
|
improve_gainlora/src/cl_trainer_inflora_llama.py
CHANGED
|
@@ -765,8 +765,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 765 |
|
| 766 |
gen_kwargs["synced_gpus"] = False
|
| 767 |
|
| 768 |
-
|
| 769 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 770 |
|
| 771 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 772 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
@@ -780,6 +779,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 780 |
generated_tokens = self.model.generate(
|
| 781 |
input_ids=generation_inputs,
|
| 782 |
generation_config=generation_config,
|
|
|
|
| 783 |
synced_gpus=synced_gpus,
|
| 784 |
)
|
| 785 |
else:
|
|
@@ -790,6 +790,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 790 |
input_ids=generation_inputs,
|
| 791 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 792 |
generation_config=generation_config,
|
|
|
|
| 793 |
synced_gpus=synced_gpus,
|
| 794 |
)
|
| 795 |
|
|
@@ -797,6 +798,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 797 |
generated_tokens = self.model.generate(
|
| 798 |
input_ids=generation_inputs,
|
| 799 |
generation_config=generation_config,
|
|
|
|
| 800 |
synced_gpus=synced_gpus,
|
| 801 |
)
|
| 802 |
|
|
|
|
| 765 |
|
| 766 |
gen_kwargs["synced_gpus"] = False
|
| 767 |
|
| 768 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 769 |
|
| 770 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 771 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
|
|
| 779 |
generated_tokens = self.model.generate(
|
| 780 |
input_ids=generation_inputs,
|
| 781 |
generation_config=generation_config,
|
| 782 |
+
attention_mask=attention_mask,
|
| 783 |
synced_gpus=synced_gpus,
|
| 784 |
)
|
| 785 |
else:
|
|
|
|
| 790 |
input_ids=generation_inputs,
|
| 791 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 792 |
generation_config=generation_config,
|
| 793 |
+
attention_mask=attention_mask,
|
| 794 |
synced_gpus=synced_gpus,
|
| 795 |
)
|
| 796 |
|
|
|
|
| 798 |
generated_tokens = self.model.generate(
|
| 799 |
input_ids=generation_inputs,
|
| 800 |
generation_config=generation_config,
|
| 801 |
+
attention_mask=attention_mask,
|
| 802 |
synced_gpus=synced_gpus,
|
| 803 |
)
|
| 804 |
|
improve_gainlora/src/cl_trainer_olora.py
CHANGED
|
@@ -580,8 +580,7 @@ class OLoRATrainer(Seq2SeqTrainer):
|
|
| 580 |
|
| 581 |
gen_kwargs["synced_gpus"] = False
|
| 582 |
|
| 583 |
-
|
| 584 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 585 |
|
| 586 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 587 |
|
|
|
|
| 580 |
|
| 581 |
gen_kwargs["synced_gpus"] = False
|
| 582 |
|
| 583 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 584 |
|
| 585 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 586 |
|
improve_gainlora/src/cl_trainer_olora_llama.py
CHANGED
|
@@ -582,8 +582,7 @@ class OLoRATrainer(Seq2SeqTrainer):
|
|
| 582 |
|
| 583 |
gen_kwargs["synced_gpus"] = False
|
| 584 |
|
| 585 |
-
|
| 586 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 587 |
|
| 588 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 589 |
|
|
|
|
| 582 |
|
| 583 |
gen_kwargs["synced_gpus"] = False
|
| 584 |
|
| 585 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 586 |
|
| 587 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 588 |
|
improve_gainlora/src/cl_trainer_specroute.py
CHANGED
|
@@ -585,12 +585,11 @@ class SpecRoute_Trainer(Seq2SeqTrainer):
|
|
| 585 |
}
|
| 586 |
gen_kwargs["synced_gpus"] = False
|
| 587 |
|
| 588 |
-
|
| 589 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 590 |
|
| 591 |
# synced_gpus and attention_mask must be passed to generate(), not GenerationConfig
|
| 592 |
_synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 593 |
-
_attention_mask =
|
| 594 |
|
| 595 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 596 |
|
|
|
|
| 585 |
}
|
| 586 |
gen_kwargs["synced_gpus"] = False
|
| 587 |
|
| 588 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 589 |
|
| 590 |
# synced_gpus and attention_mask must be passed to generate(), not GenerationConfig
|
| 591 |
_synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 592 |
+
_attention_mask = inputs.get("attention_mask", None) # from inputs, not gen_kwargs
|
| 593 |
|
| 594 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 595 |
|
root_gainlora/src/cl_trainer_gainlora_inflora.py
CHANGED
|
@@ -952,8 +952,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 952 |
|
| 953 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 954 |
|
| 955 |
-
|
| 956 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 957 |
|
| 958 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 959 |
|
|
@@ -966,6 +965,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 966 |
generated_tokens = self.model.generate(
|
| 967 |
input_ids=generation_inputs,
|
| 968 |
generation_config=generation_config,
|
|
|
|
| 969 |
synced_gpus=synced_gpus,
|
| 970 |
)
|
| 971 |
else:
|
|
@@ -976,6 +976,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 976 |
input_ids=generation_inputs,
|
| 977 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 978 |
generation_config=generation_config,
|
|
|
|
| 979 |
synced_gpus=synced_gpus,
|
| 980 |
)
|
| 981 |
|
|
@@ -983,6 +984,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 983 |
generated_tokens = self.model.generate(
|
| 984 |
input_ids=generation_inputs,
|
| 985 |
generation_config=generation_config,
|
|
|
|
| 986 |
synced_gpus=synced_gpus,
|
| 987 |
)
|
| 988 |
|
|
|
|
| 952 |
|
| 953 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 954 |
|
| 955 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 956 |
|
| 957 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 958 |
|
|
|
|
| 965 |
generated_tokens = self.model.generate(
|
| 966 |
input_ids=generation_inputs,
|
| 967 |
generation_config=generation_config,
|
| 968 |
+
attention_mask=attention_mask,
|
| 969 |
synced_gpus=synced_gpus,
|
| 970 |
)
|
| 971 |
else:
|
|
|
|
| 976 |
input_ids=generation_inputs,
|
| 977 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 978 |
generation_config=generation_config,
|
| 979 |
+
attention_mask=attention_mask,
|
| 980 |
synced_gpus=synced_gpus,
|
| 981 |
)
|
| 982 |
|
|
|
|
| 984 |
generated_tokens = self.model.generate(
|
| 985 |
input_ids=generation_inputs,
|
| 986 |
generation_config=generation_config,
|
| 987 |
+
attention_mask=attention_mask,
|
| 988 |
synced_gpus=synced_gpus,
|
| 989 |
)
|
| 990 |
|
root_gainlora/src/cl_trainer_gainlora_inflora_llama.py
CHANGED
|
@@ -975,8 +975,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 975 |
|
| 976 |
gen_kwargs["synced_gpus"] = False
|
| 977 |
|
| 978 |
-
|
| 979 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 980 |
|
| 981 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 982 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
@@ -990,6 +989,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 990 |
generated_tokens = self.model.generate(
|
| 991 |
input_ids=generation_inputs,
|
| 992 |
generation_config=generation_config,
|
|
|
|
| 993 |
synced_gpus=synced_gpus,
|
| 994 |
)
|
| 995 |
else:
|
|
@@ -1000,6 +1000,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 1000 |
input_ids=generation_inputs,
|
| 1001 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 1002 |
generation_config=generation_config,
|
|
|
|
| 1003 |
synced_gpus=synced_gpus,
|
| 1004 |
)
|
| 1005 |
|
|
@@ -1007,6 +1008,7 @@ class GainLoRA_InfLoRA_Trainer(Seq2SeqTrainer):
|
|
| 1007 |
generated_tokens = self.model.generate(
|
| 1008 |
input_ids=generation_inputs,
|
| 1009 |
generation_config=generation_config,
|
|
|
|
| 1010 |
synced_gpus=synced_gpus,
|
| 1011 |
)
|
| 1012 |
|
|
|
|
| 975 |
|
| 976 |
gen_kwargs["synced_gpus"] = False
|
| 977 |
|
| 978 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 979 |
|
| 980 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 981 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
|
|
| 989 |
generated_tokens = self.model.generate(
|
| 990 |
input_ids=generation_inputs,
|
| 991 |
generation_config=generation_config,
|
| 992 |
+
attention_mask=attention_mask,
|
| 993 |
synced_gpus=synced_gpus,
|
| 994 |
)
|
| 995 |
else:
|
|
|
|
| 1000 |
input_ids=generation_inputs,
|
| 1001 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 1002 |
generation_config=generation_config,
|
| 1003 |
+
attention_mask=attention_mask,
|
| 1004 |
synced_gpus=synced_gpus,
|
| 1005 |
)
|
| 1006 |
|
|
|
|
| 1008 |
generated_tokens = self.model.generate(
|
| 1009 |
input_ids=generation_inputs,
|
| 1010 |
generation_config=generation_config,
|
| 1011 |
+
attention_mask=attention_mask,
|
| 1012 |
synced_gpus=synced_gpus,
|
| 1013 |
)
|
| 1014 |
|
root_gainlora/src/cl_trainer_gainlora_olora.py
CHANGED
|
@@ -983,8 +983,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 983 |
|
| 984 |
gen_kwargs["synced_gpus"] = False
|
| 985 |
|
| 986 |
-
|
| 987 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 988 |
|
| 989 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 990 |
|
|
|
|
| 983 |
|
| 984 |
gen_kwargs["synced_gpus"] = False
|
| 985 |
|
| 986 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 987 |
|
| 988 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 989 |
|
root_gainlora/src/cl_trainer_gainlora_olora_llama.py
CHANGED
|
@@ -966,8 +966,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 966 |
|
| 967 |
gen_kwargs["synced_gpus"] = False
|
| 968 |
|
| 969 |
-
|
| 970 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 971 |
|
| 972 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 973 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
@@ -981,6 +980,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 981 |
generated_tokens = self.model.generate(
|
| 982 |
input_ids=generation_inputs,
|
| 983 |
generation_config=generation_config,
|
|
|
|
| 984 |
synced_gpus=synced_gpus,
|
| 985 |
)
|
| 986 |
else:
|
|
@@ -991,6 +991,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 991 |
input_ids=generation_inputs,
|
| 992 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 993 |
generation_config=generation_config,
|
|
|
|
| 994 |
synced_gpus=synced_gpus,
|
| 995 |
)
|
| 996 |
|
|
@@ -998,6 +999,7 @@ class GainLoRA_OLoRA_Trainer(Seq2SeqTrainer):
|
|
| 998 |
generated_tokens = self.model.generate(
|
| 999 |
input_ids=generation_inputs,
|
| 1000 |
generation_config=generation_config,
|
|
|
|
| 1001 |
synced_gpus=synced_gpus,
|
| 1002 |
)
|
| 1003 |
|
|
|
|
| 966 |
|
| 967 |
gen_kwargs["synced_gpus"] = False
|
| 968 |
|
| 969 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 970 |
|
| 971 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 972 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
|
|
| 980 |
generated_tokens = self.model.generate(
|
| 981 |
input_ids=generation_inputs,
|
| 982 |
generation_config=generation_config,
|
| 983 |
+
attention_mask=attention_mask,
|
| 984 |
synced_gpus=synced_gpus,
|
| 985 |
)
|
| 986 |
else:
|
|
|
|
| 991 |
input_ids=generation_inputs,
|
| 992 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 993 |
generation_config=generation_config,
|
| 994 |
+
attention_mask=attention_mask,
|
| 995 |
synced_gpus=synced_gpus,
|
| 996 |
)
|
| 997 |
|
|
|
|
| 999 |
generated_tokens = self.model.generate(
|
| 1000 |
input_ids=generation_inputs,
|
| 1001 |
generation_config=generation_config,
|
| 1002 |
+
attention_mask=attention_mask,
|
| 1003 |
synced_gpus=synced_gpus,
|
| 1004 |
)
|
| 1005 |
|
root_gainlora/src/cl_trainer_inflora.py
CHANGED
|
@@ -739,8 +739,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 739 |
|
| 740 |
gen_kwargs["synced_gpus"] = False
|
| 741 |
|
| 742 |
-
|
| 743 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 744 |
|
| 745 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 746 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
@@ -754,6 +753,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 754 |
generated_tokens = self.model.generate(
|
| 755 |
input_ids=generation_inputs,
|
| 756 |
generation_config=generation_config,
|
|
|
|
| 757 |
synced_gpus=synced_gpus,
|
| 758 |
)
|
| 759 |
else:
|
|
@@ -764,6 +764,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 764 |
input_ids=generation_inputs,
|
| 765 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 766 |
generation_config=generation_config,
|
|
|
|
| 767 |
synced_gpus=synced_gpus,
|
| 768 |
)
|
| 769 |
|
|
@@ -771,6 +772,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 771 |
generated_tokens = self.model.generate(
|
| 772 |
input_ids=generation_inputs,
|
| 773 |
generation_config=generation_config,
|
|
|
|
| 774 |
synced_gpus=synced_gpus,
|
| 775 |
)
|
| 776 |
|
|
|
|
| 739 |
|
| 740 |
gen_kwargs["synced_gpus"] = False
|
| 741 |
|
| 742 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 743 |
|
| 744 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 745 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
|
|
| 753 |
generated_tokens = self.model.generate(
|
| 754 |
input_ids=generation_inputs,
|
| 755 |
generation_config=generation_config,
|
| 756 |
+
attention_mask=attention_mask,
|
| 757 |
synced_gpus=synced_gpus,
|
| 758 |
)
|
| 759 |
else:
|
|
|
|
| 764 |
input_ids=generation_inputs,
|
| 765 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 766 |
generation_config=generation_config,
|
| 767 |
+
attention_mask=attention_mask,
|
| 768 |
synced_gpus=synced_gpus,
|
| 769 |
)
|
| 770 |
|
|
|
|
| 772 |
generated_tokens = self.model.generate(
|
| 773 |
input_ids=generation_inputs,
|
| 774 |
generation_config=generation_config,
|
| 775 |
+
attention_mask=attention_mask,
|
| 776 |
synced_gpus=synced_gpus,
|
| 777 |
)
|
| 778 |
|
root_gainlora/src/cl_trainer_inflora_llama.py
CHANGED
|
@@ -766,8 +766,7 @@ class InfLoRATrainer(Seq2SeqTrainer):
|
|
| 766 |
|
| 767 |
gen_kwargs["synced_gpus"] = False
|
| 768 |
|
| 769 |
-
|
| 770 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 771 |
|
| 772 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 773 |
|
|
|
|
| 766 |
|
| 767 |
gen_kwargs["synced_gpus"] = False
|
| 768 |
|
| 769 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 770 |
|
| 771 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 772 |
|
root_gainlora/src/cl_trainer_olora.py
CHANGED
|
@@ -581,8 +581,7 @@ class OLoRATrainer(Seq2SeqTrainer):
|
|
| 581 |
|
| 582 |
gen_kwargs["synced_gpus"] = False
|
| 583 |
|
| 584 |
-
|
| 585 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 586 |
|
| 587 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 588 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
@@ -596,6 +595,7 @@ class OLoRATrainer(Seq2SeqTrainer):
|
|
| 596 |
generated_tokens = self.model.generate(
|
| 597 |
input_ids=generation_inputs,
|
| 598 |
generation_config=generation_config,
|
|
|
|
| 599 |
synced_gpus=synced_gpus,
|
| 600 |
)
|
| 601 |
else:
|
|
@@ -606,6 +606,7 @@ class OLoRATrainer(Seq2SeqTrainer):
|
|
| 606 |
input_ids=generation_inputs,
|
| 607 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 608 |
generation_config=generation_config,
|
|
|
|
| 609 |
synced_gpus=synced_gpus,
|
| 610 |
)
|
| 611 |
|
|
@@ -613,6 +614,7 @@ class OLoRATrainer(Seq2SeqTrainer):
|
|
| 613 |
generated_tokens = self.model.generate(
|
| 614 |
input_ids=generation_inputs,
|
| 615 |
generation_config=generation_config,
|
|
|
|
| 616 |
synced_gpus=synced_gpus,
|
| 617 |
)
|
| 618 |
|
|
|
|
| 581 |
|
| 582 |
gen_kwargs["synced_gpus"] = False
|
| 583 |
|
| 584 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 585 |
|
| 586 |
synced_gpus = gen_kwargs.pop("synced_gpus", False)
|
| 587 |
generation_config = GenerationConfig(**gen_kwargs)
|
|
|
|
| 595 |
generated_tokens = self.model.generate(
|
| 596 |
input_ids=generation_inputs,
|
| 597 |
generation_config=generation_config,
|
| 598 |
+
attention_mask=attention_mask,
|
| 599 |
synced_gpus=synced_gpus,
|
| 600 |
)
|
| 601 |
else:
|
|
|
|
| 606 |
input_ids=generation_inputs,
|
| 607 |
input_ids_wo_label=inputs["input_ids_wo_label"],
|
| 608 |
generation_config=generation_config,
|
| 609 |
+
attention_mask=attention_mask,
|
| 610 |
synced_gpus=synced_gpus,
|
| 611 |
)
|
| 612 |
|
|
|
|
| 614 |
generated_tokens = self.model.generate(
|
| 615 |
input_ids=generation_inputs,
|
| 616 |
generation_config=generation_config,
|
| 617 |
+
attention_mask=attention_mask,
|
| 618 |
synced_gpus=synced_gpus,
|
| 619 |
)
|
| 620 |
|
root_gainlora/src/cl_trainer_olora_llama.py
CHANGED
|
@@ -583,8 +583,7 @@ class OLoRATrainer(Seq2SeqTrainer):
|
|
| 583 |
|
| 584 |
gen_kwargs["synced_gpus"] = False
|
| 585 |
|
| 586 |
-
|
| 587 |
-
gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
|
| 588 |
|
| 589 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 590 |
|
|
|
|
| 583 |
|
| 584 |
gen_kwargs["synced_gpus"] = False
|
| 585 |
|
| 586 |
+
attention_mask = inputs.get("attention_mask", None)
|
|
|
|
| 587 |
|
| 588 |
generation_config = GenerationConfig(**gen_kwargs)
|
| 589 |
|