MODELTRACE-AI / src /data_collection /conversion.py
adyashanayak165-code
initial commit
50cac0b
Raw
History Blame Contribute Delete
1.88 kB
import pandas as pd
import csv
# INPUT AND OUTPUT FILE NAMES
input_file = "AI-MODEL-FINGERPRINTING/src/data_collection/claude_raw.txt"
output_file = "AI-MODEL-FINGERPRINTING/src/data_collection/claude_responses.csv"
#input_file = "AI-MODEL-FINGERPRINTING/src/data_collection/openai_raw.txt"
#output_file = "AI-MODEL-FINGERPRINTING/src/data_collection/openai_responses.csv"
# Read complete text file
with open(input_file, "r", encoding="utf-8") as f:
content = f.read()
# Split each response block
blocks = content.split("===END===")
rows = []
for block in blocks:
block = block.strip()
if not block:
continue
lines = block.split("\n")
prompt_id = ""
category = ""
model = ""
prompt = ""
response = ""
response_started = False
response_lines = []
for line in lines:
if line.startswith("PROMPT_ID:"):
prompt_id = line.replace("PROMPT_ID:", "").strip()
elif line.startswith("CATEGORY:"):
category = line.replace("CATEGORY:", "").strip()
elif line.startswith("MODEL:"):
model = line.replace("MODEL:", "").strip()
elif line.startswith("PROMPT:"):
prompt = line.replace("PROMPT:", "").strip()
elif line.startswith("RESPONSE:"):
response_started = True
elif response_started:
response_lines.append(line.strip())
response = " ".join(response_lines)
rows.append([
prompt_id,
category,
model,
prompt,
response
])
# Create dataframe
df = pd.DataFrame(
rows,
columns=[
"prompt_id",
"category",
"model",
"prompt",
"response"
]
)
# Save proper CSV
df.to_csv(
output_file,
index=False,
encoding="utf-8",
)
print("CSV file created successfully")
print("Saved as:", output_file)