| import asyncio | |
| import json | |
| import os | |
| from datasets import Dataset, load_dataset | |
| from langchain_openai import ChatOpenAI | |
| from aihack.aihack.data_generation.malicious_instruction_generator import ( | |
| JailBreakExample, | |
| MaliciousInstructionGenerator, | |
| ) | |
| from aihack.aihack.data_generation.repo import JailBreakExampleRepo | |
| DATA_FILE_NAME = "malicious_data.json" | |
| MAX_CONCURRENT_REQUESTS = 5 | |
| MAX_EXAMPLES_TO_GENERATE = 2600 | |
| async def main(): | |
| examples = [] | |
| if os.path.exists(DATA_FILE_NAME): | |
| with open(DATA_FILE_NAME) as f: | |
| examples = [JailBreakExample.from_json(example) for example in json.load(f)] | |
| jailbreak_dataset = load_dataset("jackhhao/jailbreak-classification") | |
| def filter_for_type(data: Dataset, type: str) -> Dataset: | |
| return data.filter(lambda example: example["type"] == type) | |
| jailbreak_dataset_train = filter_for_type(jailbreak_dataset["train"], "jailbreak") | |
| jailbreak_example_repo_train = JailBreakExampleRepo(jailbreak_dataset_train) | |
| model = ChatOpenAI( | |
| model="gpt-3.5-turbo", | |
| temperature=0.9, | |
| ) | |
| malicious_data_generator = MaliciousInstructionGenerator( | |
| model, jailbreak_example_repo_train | |
| ) | |
| while True: | |
| if len(examples) >= MAX_EXAMPLES_TO_GENERATE: | |
| print(f"Generated {len(examples)} examples. Stopping the generation") | |
| break | |
| print("=" * 50) | |
| print( | |
| f"Generating malicious data iteration. Current examples count: {len(examples)}. Target examples count: {MAX_EXAMPLES_TO_GENERATE}" | |
| ) | |
| malicious_data = await malicious_data_generator.generate_malicious_instruction( | |
| max_conccurrent_requests=MAX_CONCURRENT_REQUESTS | |
| ) | |
| examples.extend(malicious_data) | |
| MaliciousInstructionGenerator.save_to_file(examples, DATA_FILE_NAME) | |
| print(f"Generated {len(malicious_data)} malicious data examples") | |
| if __name__ == "__main__": | |
| asyncio.run(main()) | |