Spaces:

AnonymousResearch
/

WatermarkLeaderboard

Sleeping

File size: 6,088 Bytes

40b3335

import os
# Set the HF_HOME environment variable to point to the desired cache location
os.environ["HF_TOKEN"] = "Your_HuggingFace_Token_Here"
# Specify the directory path 
cache_dir = 'Your_Desired_Cache_Directory_Here'
# Set the HF_HOME environment variable
os.environ['HF_HOME'] = cache_dir
import argparse
import json
import nltk
import time
import os
import tqdm

from nltk.tokenize import sent_tokenize

from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

#nltk.download("punkt")

def main(args):
    # Clear the cache
    torch.cuda.empty_cache()
    # Load data from the specified JSON file
    with open(args.data, 'r') as f:
        data = json.load(f)
        data = [{"query": item["input"], "output_with_watermark": item[args.column_in_data]} for item in data[4960:args.Ninputs]]

    # Load the model and tokenizer
    time1 = time.time()
    tokenizer = T5Tokenizer.from_pretrained("google/t5-v1_1-xxl")
    model = T5ForConditionalGeneration.from_pretrained(args.model_name)
    print("Model loaded in ", time.time() - time1)
    model.cuda()
    model.eval()

    # Initialize lists to store the attacked output and the inputs for the paraphrase model
    attack_results = []
    input_counter = 0

    # Iterate over the data
    for idx, dd in tqdm.tqdm(enumerate(data), total=len(data)):
        print(f"Processing input {idx + 1} / {len(data)}")
        input_gen = dd["output_with_watermark"].strip() if isinstance(dd["output_with_watermark"], str) else dd["output_with_watermark"][0].strip()

        # Initialize dipper_inputs and w_wm_output_attacked to empty lists
        dipper_inputs = []
        w_wm_output_attacked = []

        assert args.lex in [0, 20, 40, 60, 80, 100], "Lexical diversity must be one of 0, 20, 40, 60, 80, 100."
        assert args.order in [0, 20, 40, 60, 80, 100], "Order diversity must be one of 0, 20, 40, 60, 80, 100."
        # Calculate the control codes for the paraphrase model
        lex_code = int(100 - args.lex)
        order_code = int(100 - args.order)

        # Remove spurious newlines
        # removing all extra whitespace from
        input_gen = " ".join(input_gen.split())
        # Split the input into sentences
        sentences = sent_tokenize(input_gen)
        # White space removal
        prefix = " ".join(dd["query"].replace("\n", " ").split())
        output_text = ""
        final_input_text = ""

        # Generate the paraphrase for each sentence window
        for sent_idx in range(0, len(sentences), args.sent_interval):
            curr_sent_window = " ".join(sentences[sent_idx : sent_idx + args.sent_interval])
            if args.no_ctx:
                final_input_text = f"lexical = {lex_code}, order = {order_code} <sent> {curr_sent_window} </sent>"
            else:
                final_input_text = f"lexical = {lex_code}, order = {order_code} {prefix} <sent> {curr_sent_window} </sent>"

            final_input = tokenizer([final_input_text], return_tensors="pt")
            final_input = {k: v.cuda() for k, v in final_input.items()}

            # Generate the paraphrase
            with torch.inference_mode():
                outputs = model.generate(
                    **final_input, 
                    do_sample=True, 
                    top_p=0.75, 
                    top_k=None, 
                    max_length=400
                )
            outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
            prefix += " " + outputs[0]
            output_text += " " + outputs[0]

        # Store the attacked output and the input for the paraphrase model
        w_wm_output_attacked.append(output_text.strip())
        dipper_inputs.append(final_input_text)
        
        # Create a dictionary with the four specified columns
        result = {
            "original_query": dd["query"],
            "watermarked_response": dd["output_with_watermark"],
            #"final_input_text": dipper_inputs,
            "paraphrased_response": w_wm_output_attacked[0]

        }
        # Add the result to the list of results
        attack_results.append(result)

        # Increment the input counter
        input_counter += 1

        # Save the results after processing every saving_freq inputs
        if input_counter % args.saving_freq == 0:
            # Save the generated data to a JSON file
            # Check if the file exists
            if os.path.isfile(f"{args.output_name}_{input_counter-args.saving_freq}.json"):
                os.remove(f"{args.output_name}_{input_counter-args.saving_freq}.json")

            with open(f"{args.output_name}_{input_counter}.json", "w") as json_file:
                json.dump(attack_results, json_file, indent=4)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Attack by Dipper Paraphrasing")
    parser.add_argument("--data",type=str,default="Llama3_SIR_test_13860.json", help="The data to be attacked / paraphrased.")
    parser.add_argument("--column_in_data", type=str, default="output_only", help="Column in the data to be attacked / paraphrased.")
    parser.add_argument("--output_name", type=str, default="Dipper_Llama3_SIR_13860_4960_", help="The output directory to save the attacked / paraphrased data.")
    parser.add_argument("--Ninputs", type=int, default=13860, help="Number of inputs to be attacked / paraphrased.")
    parser.add_argument("--saving_freq", type=int, default=10, help="The frequency of saving the output.")
    parser.add_argument("--model_name", type=str, default="kalpeshk2011/dipper-paraphraser-xxl", help="The model name to use.")
    parser.add_argument("--no_ctx", type=bool, default=True, help="Whether to use context or not.")
    parser.add_argument("--sent_interval", type=int, default=3,help="The sentence interval.")
    parser.add_argument("--lex",type=int, default=60, help="Lexical diversity knob for the paraphrase attack.")
    parser.add_argument("--order",type=int,default=60,help="Order diversity knob for the paraphrase attack.")

    args = parser.parse_args()

    main(args)