import pandas as pd import csv # INPUT AND OUTPUT FILE NAMES input_file = "AI-MODEL-FINGERPRINTING/src/data_collection/claude_raw.txt" output_file = "AI-MODEL-FINGERPRINTING/src/data_collection/claude_responses.csv" #input_file = "AI-MODEL-FINGERPRINTING/src/data_collection/openai_raw.txt" #output_file = "AI-MODEL-FINGERPRINTING/src/data_collection/openai_responses.csv" # Read complete text file with open(input_file, "r", encoding="utf-8") as f: content = f.read() # Split each response block blocks = content.split("===END===") rows = [] for block in blocks: block = block.strip() if not block: continue lines = block.split("\n") prompt_id = "" category = "" model = "" prompt = "" response = "" response_started = False response_lines = [] for line in lines: if line.startswith("PROMPT_ID:"): prompt_id = line.replace("PROMPT_ID:", "").strip() elif line.startswith("CATEGORY:"): category = line.replace("CATEGORY:", "").strip() elif line.startswith("MODEL:"): model = line.replace("MODEL:", "").strip() elif line.startswith("PROMPT:"): prompt = line.replace("PROMPT:", "").strip() elif line.startswith("RESPONSE:"): response_started = True elif response_started: response_lines.append(line.strip()) response = " ".join(response_lines) rows.append([ prompt_id, category, model, prompt, response ]) # Create dataframe df = pd.DataFrame( rows, columns=[ "prompt_id", "category", "model", "prompt", "response" ] ) # Save proper CSV df.to_csv( output_file, index=False, encoding="utf-8", ) print("CSV file created successfully") print("Saved as:", output_file)