|
|
import os |
|
|
|
|
|
os.environ["HF_TOKEN"] = "Your_HuggingFace_Token_Here" |
|
|
|
|
|
cache_dir = 'Your_Desired_Cache_Directory_Here' |
|
|
|
|
|
os.environ['HF_HOME'] = cache_dir |
|
|
import argparse |
|
|
import json |
|
|
import nltk |
|
|
import time |
|
|
import os |
|
|
import tqdm |
|
|
|
|
|
from nltk.tokenize import sent_tokenize |
|
|
|
|
|
from transformers import T5Tokenizer, T5ForConditionalGeneration |
|
|
import torch |
|
|
|
|
|
|
|
|
|
|
|
def main(args): |
|
|
|
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
with open(args.data, 'r') as f: |
|
|
data = json.load(f) |
|
|
data = [{"query": item["input"], "output_with_watermark": item[args.column_in_data]} for item in data[4960:args.Ninputs]] |
|
|
|
|
|
|
|
|
time1 = time.time() |
|
|
tokenizer = T5Tokenizer.from_pretrained("google/t5-v1_1-xxl") |
|
|
model = T5ForConditionalGeneration.from_pretrained(args.model_name) |
|
|
print("Model loaded in ", time.time() - time1) |
|
|
model.cuda() |
|
|
model.eval() |
|
|
|
|
|
|
|
|
attack_results = [] |
|
|
input_counter = 0 |
|
|
|
|
|
|
|
|
for idx, dd in tqdm.tqdm(enumerate(data), total=len(data)): |
|
|
print(f"Processing input {idx + 1} / {len(data)}") |
|
|
input_gen = dd["output_with_watermark"].strip() if isinstance(dd["output_with_watermark"], str) else dd["output_with_watermark"][0].strip() |
|
|
|
|
|
|
|
|
dipper_inputs = [] |
|
|
w_wm_output_attacked = [] |
|
|
|
|
|
assert args.lex in [0, 20, 40, 60, 80, 100], "Lexical diversity must be one of 0, 20, 40, 60, 80, 100." |
|
|
assert args.order in [0, 20, 40, 60, 80, 100], "Order diversity must be one of 0, 20, 40, 60, 80, 100." |
|
|
|
|
|
lex_code = int(100 - args.lex) |
|
|
order_code = int(100 - args.order) |
|
|
|
|
|
|
|
|
|
|
|
input_gen = " ".join(input_gen.split()) |
|
|
|
|
|
sentences = sent_tokenize(input_gen) |
|
|
|
|
|
prefix = " ".join(dd["query"].replace("\n", " ").split()) |
|
|
output_text = "" |
|
|
final_input_text = "" |
|
|
|
|
|
|
|
|
for sent_idx in range(0, len(sentences), args.sent_interval): |
|
|
curr_sent_window = " ".join(sentences[sent_idx : sent_idx + args.sent_interval]) |
|
|
if args.no_ctx: |
|
|
final_input_text = f"lexical = {lex_code}, order = {order_code} <sent> {curr_sent_window} </sent>" |
|
|
else: |
|
|
final_input_text = f"lexical = {lex_code}, order = {order_code} {prefix} <sent> {curr_sent_window} </sent>" |
|
|
|
|
|
final_input = tokenizer([final_input_text], return_tensors="pt") |
|
|
final_input = {k: v.cuda() for k, v in final_input.items()} |
|
|
|
|
|
|
|
|
with torch.inference_mode(): |
|
|
outputs = model.generate( |
|
|
**final_input, |
|
|
do_sample=True, |
|
|
top_p=0.75, |
|
|
top_k=None, |
|
|
max_length=400 |
|
|
) |
|
|
outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
|
prefix += " " + outputs[0] |
|
|
output_text += " " + outputs[0] |
|
|
|
|
|
|
|
|
w_wm_output_attacked.append(output_text.strip()) |
|
|
dipper_inputs.append(final_input_text) |
|
|
|
|
|
|
|
|
result = { |
|
|
"original_query": dd["query"], |
|
|
"watermarked_response": dd["output_with_watermark"], |
|
|
|
|
|
"paraphrased_response": w_wm_output_attacked[0] |
|
|
|
|
|
} |
|
|
|
|
|
attack_results.append(result) |
|
|
|
|
|
|
|
|
input_counter += 1 |
|
|
|
|
|
|
|
|
if input_counter % args.saving_freq == 0: |
|
|
|
|
|
|
|
|
if os.path.isfile(f"{args.output_name}_{input_counter-args.saving_freq}.json"): |
|
|
os.remove(f"{args.output_name}_{input_counter-args.saving_freq}.json") |
|
|
|
|
|
with open(f"{args.output_name}_{input_counter}.json", "w") as json_file: |
|
|
json.dump(attack_results, json_file, indent=4) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
parser = argparse.ArgumentParser(description="Attack by Dipper Paraphrasing") |
|
|
parser.add_argument("--data",type=str,default="Llama3_SIR_test_13860.json", help="The data to be attacked / paraphrased.") |
|
|
parser.add_argument("--column_in_data", type=str, default="output_only", help="Column in the data to be attacked / paraphrased.") |
|
|
parser.add_argument("--output_name", type=str, default="Dipper_Llama3_SIR_13860_4960_", help="The output directory to save the attacked / paraphrased data.") |
|
|
parser.add_argument("--Ninputs", type=int, default=13860, help="Number of inputs to be attacked / paraphrased.") |
|
|
parser.add_argument("--saving_freq", type=int, default=10, help="The frequency of saving the output.") |
|
|
parser.add_argument("--model_name", type=str, default="kalpeshk2011/dipper-paraphraser-xxl", help="The model name to use.") |
|
|
parser.add_argument("--no_ctx", type=bool, default=True, help="Whether to use context or not.") |
|
|
parser.add_argument("--sent_interval", type=int, default=3,help="The sentence interval.") |
|
|
parser.add_argument("--lex",type=int, default=60, help="Lexical diversity knob for the paraphrase attack.") |
|
|
parser.add_argument("--order",type=int,default=60,help="Order diversity knob for the paraphrase attack.") |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
main(args) |