File size: 2,980 Bytes

0c51b93

import json
import os
import re
from copy import deepcopy
from typing import Any, Dict, List

from tqdm import tqdm

# TODO: Fill in REDIS OM URL in the form of `redis://:password@host:port`
os.environ["REDIS_OM_URL"] = "redis://:QzmCUD3C3RdsR@localhost:6381"

from reverse_engineering import run_reverse_by_pk_agent

with open("../../data/sotopia_pi_openai_log_attribution.jsonl", 'r') as f:
    data: List[Dict[str, Any]] = [json.loads(d) for d in f.readlines()]

if not os.path.exists("../../data/episode_utterances"):
    os.makedirs("../../data/episode_utterances")
    for d in tqdm(data):
        run_reverse_by_pk_agent(d['episode_id'], True, "../../data/episode_utterances")
        run_reverse_by_pk_agent(d['episode_id'], False, "../../data/episode_utterances")

utterance_pattern = r'Utterance (\d+) by ([A-Za-z ]+)'

print("turning into attributed utterances")

attributed_data = []
print(len(data))
for d in tqdm(data):
    for uttr_key, attributed_uttr in d['attributed_utterances'].items():
        match = re.search(utterance_pattern, uttr_key)
        if match:
            turn_number = match.group(1)
            agent_name = match.group(2)
        else:
            raise Exception(f"Utterance key not in correct format: {uttr_key}")
        if agent_name != d['agent']:
            continue

        utterance_path = f"../../data/episode_utterances/{d['episode_id']}-{d['agent']}-{turn_number}.json"
        if not os.path.exists(utterance_path):
            raise Exception(f"Utterance not found: {utterance_path}")
        with open(f"../../data/episode_utterances/{d['episode_id']}-{d['agent']}-{turn_number}.json", 'r') as f:
            sotopia_utterance = json.load(f)

        new_utterance = deepcopy(sotopia_utterance)
        new_utterance['attribution'] = attributed_uttr[1]
        new_utterance['turn_number'] = turn_number
        new_utterance['goal_score'] = d['goal_score']

        attributed_data.append(new_utterance)


def calc_reward(utter_attrib: float, goal_score: float) -> float:
    if utter_attrib == -1:
        reward = -1.0
    else:
        reward = utter_attrib / 3 * goal_score
    return reward

sotopia_pi_utterance_reward = []
for d in tqdm(attributed_data):
    sotopia_pi_utterance_reward.append(
        {
            "instruction": d['prompt'],
            "input": "",
            "output": "",
            "value": calc_reward(d['attribution'], d['goal_score']),
            "system": "",
            "history": []
        }
    )

with open("../../data/sotopia_pi_utterance_reward_single_prompt.json", 'w') as f:
    json.dump(sotopia_pi_utterance_reward, f, indent=4)

sotopia_pi_utterance_ppo = []
for d in tqdm(attributed_data):
    sotopia_pi_utterance_ppo.append(
        {
            "instruction": d['prompt'],
            "input": "",
            "output": d["result"],
        }
    )

with open("../../data/sotopia_pi_utterance_ppo.json", 'w') as f:
    json.dump(sotopia_pi_utterance_ppo, f, indent=4)