File size: 6,088 Bytes
40b3335
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
# Set the HF_HOME environment variable to point to the desired cache location
os.environ["HF_TOKEN"] = "Your_HuggingFace_Token_Here"
# Specify the directory path 
cache_dir = 'Your_Desired_Cache_Directory_Here'
# Set the HF_HOME environment variable
os.environ['HF_HOME'] = cache_dir
import argparse
import json
import nltk
import time
import os
import tqdm

from nltk.tokenize import sent_tokenize

from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

#nltk.download("punkt")

def main(args):
    # Clear the cache
    torch.cuda.empty_cache()
    # Load data from the specified JSON file
    with open(args.data, 'r') as f:
        data = json.load(f)
        data = [{"query": item["input"], "output_with_watermark": item[args.column_in_data]} for item in data[4960:args.Ninputs]]

    # Load the model and tokenizer
    time1 = time.time()
    tokenizer = T5Tokenizer.from_pretrained("google/t5-v1_1-xxl")
    model = T5ForConditionalGeneration.from_pretrained(args.model_name)
    print("Model loaded in ", time.time() - time1)
    model.cuda()
    model.eval()

    # Initialize lists to store the attacked output and the inputs for the paraphrase model
    attack_results = []
    input_counter = 0

    # Iterate over the data
    for idx, dd in tqdm.tqdm(enumerate(data), total=len(data)):
        print(f"Processing input {idx + 1} / {len(data)}")
        input_gen = dd["output_with_watermark"].strip() if isinstance(dd["output_with_watermark"], str) else dd["output_with_watermark"][0].strip()

        # Initialize dipper_inputs and w_wm_output_attacked to empty lists
        dipper_inputs = []
        w_wm_output_attacked = []

        assert args.lex in [0, 20, 40, 60, 80, 100], "Lexical diversity must be one of 0, 20, 40, 60, 80, 100."
        assert args.order in [0, 20, 40, 60, 80, 100], "Order diversity must be one of 0, 20, 40, 60, 80, 100."
        # Calculate the control codes for the paraphrase model
        lex_code = int(100 - args.lex)
        order_code = int(100 - args.order)

        # Remove spurious newlines
        # removing all extra whitespace from
        input_gen = " ".join(input_gen.split())
        # Split the input into sentences
        sentences = sent_tokenize(input_gen)
        # White space removal
        prefix = " ".join(dd["query"].replace("\n", " ").split())
        output_text = ""
        final_input_text = ""

        # Generate the paraphrase for each sentence window
        for sent_idx in range(0, len(sentences), args.sent_interval):
            curr_sent_window = " ".join(sentences[sent_idx : sent_idx + args.sent_interval])
            if args.no_ctx:
                final_input_text = f"lexical = {lex_code}, order = {order_code} <sent> {curr_sent_window} </sent>"
            else:
                final_input_text = f"lexical = {lex_code}, order = {order_code} {prefix} <sent> {curr_sent_window} </sent>"

            final_input = tokenizer([final_input_text], return_tensors="pt")
            final_input = {k: v.cuda() for k, v in final_input.items()}

            # Generate the paraphrase
            with torch.inference_mode():
                outputs = model.generate(
                    **final_input, 
                    do_sample=True, 
                    top_p=0.75, 
                    top_k=None, 
                    max_length=400
                )
            outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
            prefix += " " + outputs[0]
            output_text += " " + outputs[0]

        # Store the attacked output and the input for the paraphrase model
        w_wm_output_attacked.append(output_text.strip())
        dipper_inputs.append(final_input_text)
        
        # Create a dictionary with the four specified columns
        result = {
            "original_query": dd["query"],
            "watermarked_response": dd["output_with_watermark"],
            #"final_input_text": dipper_inputs,
            "paraphrased_response": w_wm_output_attacked[0]

        }
        # Add the result to the list of results
        attack_results.append(result)

        # Increment the input counter
        input_counter += 1

        # Save the results after processing every saving_freq inputs
        if input_counter % args.saving_freq == 0:
            # Save the generated data to a JSON file
            # Check if the file exists
            if os.path.isfile(f"{args.output_name}_{input_counter-args.saving_freq}.json"):
                os.remove(f"{args.output_name}_{input_counter-args.saving_freq}.json")

            with open(f"{args.output_name}_{input_counter}.json", "w") as json_file:
                json.dump(attack_results, json_file, indent=4)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Attack by Dipper Paraphrasing")
    parser.add_argument("--data",type=str,default="Llama3_SIR_test_13860.json", help="The data to be attacked / paraphrased.")
    parser.add_argument("--column_in_data", type=str, default="output_only", help="Column in the data to be attacked / paraphrased.")
    parser.add_argument("--output_name", type=str, default="Dipper_Llama3_SIR_13860_4960_", help="The output directory to save the attacked / paraphrased data.")
    parser.add_argument("--Ninputs", type=int, default=13860, help="Number of inputs to be attacked / paraphrased.")
    parser.add_argument("--saving_freq", type=int, default=10, help="The frequency of saving the output.")
    parser.add_argument("--model_name", type=str, default="kalpeshk2011/dipper-paraphraser-xxl", help="The model name to use.")
    parser.add_argument("--no_ctx", type=bool, default=True, help="Whether to use context or not.")
    parser.add_argument("--sent_interval", type=int, default=3,help="The sentence interval.")
    parser.add_argument("--lex",type=int, default=60, help="Lexical diversity knob for the paraphrase attack.")
    parser.add_argument("--order",type=int,default=60,help="Order diversity knob for the paraphrase attack.")

    args = parser.parse_args()

    main(args)