import os # Set the HF_HOME environment variable to point to the desired cache location os.environ["HF_TOKEN"] = "Your_HuggingFace_Token_Here" # Specify the directory path cache_dir = 'Your_Desired_Cache_Directory_Here' # Set the HF_HOME environment variable os.environ['HF_HOME'] = cache_dir import argparse import json import nltk import time import os import tqdm from nltk.tokenize import sent_tokenize from transformers import T5Tokenizer, T5ForConditionalGeneration import torch #nltk.download("punkt") def main(args): # Clear the cache torch.cuda.empty_cache() # Load data from the specified JSON file with open(args.data, 'r') as f: data = json.load(f) data = [{"query": item["input"], "output_with_watermark": item[args.column_in_data]} for item in data[4960:args.Ninputs]] # Load the model and tokenizer time1 = time.time() tokenizer = T5Tokenizer.from_pretrained("google/t5-v1_1-xxl") model = T5ForConditionalGeneration.from_pretrained(args.model_name) print("Model loaded in ", time.time() - time1) model.cuda() model.eval() # Initialize lists to store the attacked output and the inputs for the paraphrase model attack_results = [] input_counter = 0 # Iterate over the data for idx, dd in tqdm.tqdm(enumerate(data), total=len(data)): print(f"Processing input {idx + 1} / {len(data)}") input_gen = dd["output_with_watermark"].strip() if isinstance(dd["output_with_watermark"], str) else dd["output_with_watermark"][0].strip() # Initialize dipper_inputs and w_wm_output_attacked to empty lists dipper_inputs = [] w_wm_output_attacked = [] assert args.lex in [0, 20, 40, 60, 80, 100], "Lexical diversity must be one of 0, 20, 40, 60, 80, 100." assert args.order in [0, 20, 40, 60, 80, 100], "Order diversity must be one of 0, 20, 40, 60, 80, 100." # Calculate the control codes for the paraphrase model lex_code = int(100 - args.lex) order_code = int(100 - args.order) # Remove spurious newlines # removing all extra whitespace from input_gen = " ".join(input_gen.split()) # Split the input into sentences sentences = sent_tokenize(input_gen) # White space removal prefix = " ".join(dd["query"].replace("\n", " ").split()) output_text = "" final_input_text = "" # Generate the paraphrase for each sentence window for sent_idx in range(0, len(sentences), args.sent_interval): curr_sent_window = " ".join(sentences[sent_idx : sent_idx + args.sent_interval]) if args.no_ctx: final_input_text = f"lexical = {lex_code}, order = {order_code} {curr_sent_window} " else: final_input_text = f"lexical = {lex_code}, order = {order_code} {prefix} {curr_sent_window} " final_input = tokenizer([final_input_text], return_tensors="pt") final_input = {k: v.cuda() for k, v in final_input.items()} # Generate the paraphrase with torch.inference_mode(): outputs = model.generate( **final_input, do_sample=True, top_p=0.75, top_k=None, max_length=400 ) outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True) prefix += " " + outputs[0] output_text += " " + outputs[0] # Store the attacked output and the input for the paraphrase model w_wm_output_attacked.append(output_text.strip()) dipper_inputs.append(final_input_text) # Create a dictionary with the four specified columns result = { "original_query": dd["query"], "watermarked_response": dd["output_with_watermark"], #"final_input_text": dipper_inputs, "paraphrased_response": w_wm_output_attacked[0] } # Add the result to the list of results attack_results.append(result) # Increment the input counter input_counter += 1 # Save the results after processing every saving_freq inputs if input_counter % args.saving_freq == 0: # Save the generated data to a JSON file # Check if the file exists if os.path.isfile(f"{args.output_name}_{input_counter-args.saving_freq}.json"): os.remove(f"{args.output_name}_{input_counter-args.saving_freq}.json") with open(f"{args.output_name}_{input_counter}.json", "w") as json_file: json.dump(attack_results, json_file, indent=4) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Attack by Dipper Paraphrasing") parser.add_argument("--data",type=str,default="Llama3_SIR_test_13860.json", help="The data to be attacked / paraphrased.") parser.add_argument("--column_in_data", type=str, default="output_only", help="Column in the data to be attacked / paraphrased.") parser.add_argument("--output_name", type=str, default="Dipper_Llama3_SIR_13860_4960_", help="The output directory to save the attacked / paraphrased data.") parser.add_argument("--Ninputs", type=int, default=13860, help="Number of inputs to be attacked / paraphrased.") parser.add_argument("--saving_freq", type=int, default=10, help="The frequency of saving the output.") parser.add_argument("--model_name", type=str, default="kalpeshk2011/dipper-paraphraser-xxl", help="The model name to use.") parser.add_argument("--no_ctx", type=bool, default=True, help="Whether to use context or not.") parser.add_argument("--sent_interval", type=int, default=3,help="The sentence interval.") parser.add_argument("--lex",type=int, default=60, help="Lexical diversity knob for the paraphrase attack.") parser.add_argument("--order",type=int,default=60,help="Order diversity knob for the paraphrase attack.") args = parser.parse_args() main(args)