File size: 6,088 Bytes
40b3335 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import os
# Set the HF_HOME environment variable to point to the desired cache location
os.environ["HF_TOKEN"] = "Your_HuggingFace_Token_Here"
# Specify the directory path
cache_dir = 'Your_Desired_Cache_Directory_Here'
# Set the HF_HOME environment variable
os.environ['HF_HOME'] = cache_dir
import argparse
import json
import nltk
import time
import os
import tqdm
from nltk.tokenize import sent_tokenize
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
#nltk.download("punkt")
def main(args):
# Clear the cache
torch.cuda.empty_cache()
# Load data from the specified JSON file
with open(args.data, 'r') as f:
data = json.load(f)
data = [{"query": item["input"], "output_with_watermark": item[args.column_in_data]} for item in data[4960:args.Ninputs]]
# Load the model and tokenizer
time1 = time.time()
tokenizer = T5Tokenizer.from_pretrained("google/t5-v1_1-xxl")
model = T5ForConditionalGeneration.from_pretrained(args.model_name)
print("Model loaded in ", time.time() - time1)
model.cuda()
model.eval()
# Initialize lists to store the attacked output and the inputs for the paraphrase model
attack_results = []
input_counter = 0
# Iterate over the data
for idx, dd in tqdm.tqdm(enumerate(data), total=len(data)):
print(f"Processing input {idx + 1} / {len(data)}")
input_gen = dd["output_with_watermark"].strip() if isinstance(dd["output_with_watermark"], str) else dd["output_with_watermark"][0].strip()
# Initialize dipper_inputs and w_wm_output_attacked to empty lists
dipper_inputs = []
w_wm_output_attacked = []
assert args.lex in [0, 20, 40, 60, 80, 100], "Lexical diversity must be one of 0, 20, 40, 60, 80, 100."
assert args.order in [0, 20, 40, 60, 80, 100], "Order diversity must be one of 0, 20, 40, 60, 80, 100."
# Calculate the control codes for the paraphrase model
lex_code = int(100 - args.lex)
order_code = int(100 - args.order)
# Remove spurious newlines
# removing all extra whitespace from
input_gen = " ".join(input_gen.split())
# Split the input into sentences
sentences = sent_tokenize(input_gen)
# White space removal
prefix = " ".join(dd["query"].replace("\n", " ").split())
output_text = ""
final_input_text = ""
# Generate the paraphrase for each sentence window
for sent_idx in range(0, len(sentences), args.sent_interval):
curr_sent_window = " ".join(sentences[sent_idx : sent_idx + args.sent_interval])
if args.no_ctx:
final_input_text = f"lexical = {lex_code}, order = {order_code} <sent> {curr_sent_window} </sent>"
else:
final_input_text = f"lexical = {lex_code}, order = {order_code} {prefix} <sent> {curr_sent_window} </sent>"
final_input = tokenizer([final_input_text], return_tensors="pt")
final_input = {k: v.cuda() for k, v in final_input.items()}
# Generate the paraphrase
with torch.inference_mode():
outputs = model.generate(
**final_input,
do_sample=True,
top_p=0.75,
top_k=None,
max_length=400
)
outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
prefix += " " + outputs[0]
output_text += " " + outputs[0]
# Store the attacked output and the input for the paraphrase model
w_wm_output_attacked.append(output_text.strip())
dipper_inputs.append(final_input_text)
# Create a dictionary with the four specified columns
result = {
"original_query": dd["query"],
"watermarked_response": dd["output_with_watermark"],
#"final_input_text": dipper_inputs,
"paraphrased_response": w_wm_output_attacked[0]
}
# Add the result to the list of results
attack_results.append(result)
# Increment the input counter
input_counter += 1
# Save the results after processing every saving_freq inputs
if input_counter % args.saving_freq == 0:
# Save the generated data to a JSON file
# Check if the file exists
if os.path.isfile(f"{args.output_name}_{input_counter-args.saving_freq}.json"):
os.remove(f"{args.output_name}_{input_counter-args.saving_freq}.json")
with open(f"{args.output_name}_{input_counter}.json", "w") as json_file:
json.dump(attack_results, json_file, indent=4)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Attack by Dipper Paraphrasing")
parser.add_argument("--data",type=str,default="Llama3_SIR_test_13860.json", help="The data to be attacked / paraphrased.")
parser.add_argument("--column_in_data", type=str, default="output_only", help="Column in the data to be attacked / paraphrased.")
parser.add_argument("--output_name", type=str, default="Dipper_Llama3_SIR_13860_4960_", help="The output directory to save the attacked / paraphrased data.")
parser.add_argument("--Ninputs", type=int, default=13860, help="Number of inputs to be attacked / paraphrased.")
parser.add_argument("--saving_freq", type=int, default=10, help="The frequency of saving the output.")
parser.add_argument("--model_name", type=str, default="kalpeshk2011/dipper-paraphraser-xxl", help="The model name to use.")
parser.add_argument("--no_ctx", type=bool, default=True, help="Whether to use context or not.")
parser.add_argument("--sent_interval", type=int, default=3,help="The sentence interval.")
parser.add_argument("--lex",type=int, default=60, help="Lexical diversity knob for the paraphrase attack.")
parser.add_argument("--order",type=int,default=60,help="Order diversity knob for the paraphrase attack.")
args = parser.parse_args()
main(args) |