anno / simSearch /experiment.py
Ksgk-fy's picture
Upload 67 files
ee657a1 verified
from src import independent_eval, independent_score, pairmatch_baseline
from data.rude_prompt import RUDE_TIPS
import glob, json
from typing import List
def present_biases(biases: List[str]):
for bias in biases:
print(bias)
print('------'*10)
express_judge = lambda x: {0: 'Tie', 1: 'A', 2: 'B'}[x]
# Compare Individually vs. Pairwise
def experiment_pairwise_baseline():
more_emoji_tip = "Use πŸ‘‰ emoji as much as possible" # the emoji existence tip suffices to tell the story
conversation_files = glob.glob('./data/confirmation_bias/conversation_*.json')
name_pairs = [('1AT.json', '8BT.json', 'A use emoji once, B use emoji 8 times', 1, 8),
('1AT.json', '3BT.json', 'A use emoji once, B use emoji 3 times', 1, 3),
('3AT.json', '8BT.json', 'A use emoji 3 times, B use emoji 8 times', 3, 8)]
individual_eval_bias_score, individual_eval_biases, pairwise_bias_score, pairwise_biases = 0, [], 0, []
for experiment_config in name_pairs:
(nameA, nameB, test_desc, ca, cb) = experiment_config
names = [nameA, nameB]
conversation_A = [c for c in conversation_files if names[0] in c][0]
conversation_B = [c for c in conversation_files if names[1] in c][0]
conversation_A = json.load(open(conversation_A, 'r'))
conversation_B = json.load(open(conversation_B, 'r'))
conversation_history_pair = (conversation_A, conversation_B)
# Naive Independent Scoring
naive_scores, naive_info = independent_score(conversation_history_pair, [more_emoji_tip])
naive_info = naive_info[more_emoji_tip]
print('------'*10)
print('Pair description: ', test_desc)
print('Naive Independent Score - A: ', naive_info['score_A'], ' | Score - B: ', naive_info['score_B'], ' | GT-Score A: ', ca, ' | GT-Score B: ', cb)
independent_correct = ((naive_info['score_A']>=naive_info['score_B']) == (ca>=cb))
if not independent_correct:
individual_eval_bias_score += 1
individual_eval_biases.append('|| ' + test_desc + ' || \n' + 'Score A: ' + str(naive_info["score_A"]) + '\n -- Argument: ' + naive_info["argument_A"] + '\nScore B: ' + str(naive_info["score_B"]) + '\n -- Argument: ' + naive_info["argument_B"])
# Pairwise Comparison should improve the result here -- nothing fancy, just a simple comparison
print('------'*10)
info = pairmatch_baseline(conversation_history_pair, [more_emoji_tip])
info = info[more_emoji_tip]
relative_score = info['relative_score']
print('Pairwise Comparison Relative Score - A: ', relative_score[0], '| Score - B: ', relative_score[1], ' | GT-Score A: ', ca, ' | GT-Score B: ', cb)
pairwise_correct = ((relative_score[0]>=relative_score[1]) == (ca>=cb))
if not pairwise_correct:
pairwise_bias_score += 1
pairwise_biases.append('|| ' + test_desc + ' || \n' + 'Relative Score A: ' + str(relative_score[0]) + '\n -- Argument A|(A, B): ' + info["argument_A_from_AB"] + '\n -- Confidence A|(A, B): ' + info["conf_A_from_AB"] + '\n -- Argument A|(B,A): ' + info["argument_A_from_BA"] + '\n -- Confidence A|(B,A): ' + info["conf_A_from_BA"] + '\nRelative Score B: ' + str(relative_score[1]) + '\n -- Argument B | (A, B): ' + info["argument_B_from_AB"] + '\n -- Confidence B | (A, B): ' + info['conf_B_from_AB'] + '\n -- Argument B | (B, A): ' + info['argument_B_from_BA'] +
'\n -- Confidence B | (B, A): ' + info['conf_B_from_BA'])
individual_eval_bias_score /= len(name_pairs)
pairwise_bias_score /= (2*len(name_pairs))
return individual_eval_bias_score, individual_eval_biases, pairwise_bias_score, pairwise_biases
# Anchoring bias, for the most part
def experiment_biases():
more_emoji_tip = "Use πŸ‘‰ emoji as much as possible" # the emoji existence tip suffices to tell the story
conversation_files = glob.glob('./data/confirmation_bias/conversation_*.json')
name_pairs = [('1AT.json', '8BT.json', 'A use emoji once, B use emoji 8 times', 1, 8),
('1AT.json', '3BT.json', 'A use emoji once, B use emoji 3 times', 1, 3),
('3AT.json', '8BT.json', 'A use emoji 3 times, B use emoji 8 times', 3, 8)]
anchoring_bias_score, anchoring_biases = 0, []
for experiment_config in name_pairs:
(nameA, nameB, test_desc, ca, cb) = experiment_config
names = [nameA, nameB]
conversation_A = [c for c in conversation_files if names[0] in c][0]
conversation_B = [c for c in conversation_files if names[1] in c][0]
conversation_A = json.load(open(conversation_A, 'r'))
conversation_B = json.load(open(conversation_B, 'r'))
conversation_history_pair = (conversation_A, conversation_B)
# Pairwise Comparison should improve the result here -- nothing fancy, just a simple comparison
print('------'*10)
info = pairmatch_baseline(conversation_history_pair, [more_emoji_tip])
info = info[more_emoji_tip]
relative_score = info['relative_score']
# in the toy examples, we are only doing counting comparison where cA<cB, the decision that 'B is not following the tip' is a Bias
if info['eval_A_from_AB'] and not info['eval_B_from_AB']:
anchoring_bias_score += 1
anchoring_biases.append('|| ' + test_desc + ' || \n' + 'Evaluation B: ' + str(info["eval_B_from_AB"]) + '\n -- Argument: ' + info["argument_B_from_AB"] + '\n -- Confidence: ', info['confidence_B_from_AB'])
print('Pairwise Comparison Relative Score - A: ', relative_score[0], '| Score - B: ', relative_score[1], ' | GT-Score A: ', ca, ' | GT-Score B: ', cb)
individual_eval_bias_score /= len(name_pairs)
pairwise_bias_score /= (2*len(name_pairs))
return individual_eval_bias_score, individual_eval_biases, pairwise_bias_score, pairwise_biases
# # Anchoring Bias && Confirmation Bias
# def experiment_biases(name=['anchor', 'confirmation'], models=['GPT3.5', 'GPT4', 'Gemini']):
# # Toy environment setup:
# # - Emoji Existence Tip Evaluation
# emoji_existence_tip = "Use πŸ‘‰ emoji at least once" # the emoji existence tip suffices to tell the story
# # - Conversation A & B contains same amount of emoji πŸ‘‰
# # ---- case1. they all have only 1 πŸ‘‰
# # ---- case2. they all have 8 πŸ‘‰ (harder to ignore if your aren't blind)
# # ---- case3. no emoji is used for both conversations
# # - Conversation A & B contains same amount of emoji πŸ‘‰
# conversation_files = glob.glob('./data/confirmation_bias/conversation_*.json')
# name_pairs = [('1AT.json', '1BT.json', 'A&B use emoji once'), ('3AT.json', '3BT.json', 'A&B use emoji 3 times'), ('8AT.json', '8BT.json', 'A&B use emoji 8 times'), ('AF.json', 'BF.json', 'A&B use no emoji')]
# naive_bias_score, naive_biases, anchor_bias_score, anchor_biases, confirmation_bias_score, confirmation_biases = 0, [], 0, [], 0, []
# for experiment_config in name_pairs:
# (nameA, nameB, test_desc) = experiment_config
# names = [nameA, nameB]
# gts = ['T' in name for name in names]
# conversation_A = [c for c in conversation_files if names[0] in c][0]
# conversation_B = [c for c in conversation_files if names[1] in c][0]
# conversation_A = json.load(open(conversation_A, 'r'))
# conversation_B = json.load(open(conversation_B, 'r'))
# conversation_history_pair = (conversation_A, conversation_B)
# # Anchor Bias happens when compare (A, B) in order, and judgement of A affect directly on the judgement of B, when A follows the tip, confirmation bias more likely causes negative evaluation on B, and LLM 'ignores the fact'
# # Confirmation Bias happens when evaluate A with a reference argument. LLM will be inclined to folow the rhetoric in the reference argument, and ignore the 'fact'
# judge, info = pairmatch_baseline(conversation_history_pair, [emoji_existence_tip])
# info = info[emoji_existence_tip]
# # Naive Bias Check -- can not see emoji πŸ‘‰, or hallucinate emoji πŸ‘‰, basically error in LLM, lack of capacity
# _, naive_info = independent_eval(conversation_history_pair, [emoji_existence_tip])
# naive_info = naive_info[emoji_existence_tip]
# express_eval = lambda x: {True: 'Follows Tip', False: 'Not follow tip'}[x]
# express_reflect = lambda x: {True: 'Agree', False: 'Disagree'}[x]
# if naive_info['eval_A'] != gts[0]:
# naive_bias_score += 1
# naive_biases.append(test_desc + ' || \n' + 'Evaluation A: ' + express_eval(naive_info["eval_A"]) + '\n -- Argument: ' + naive_info["argument_A"])
# if naive_info['eval_B'] != gts[1]:
# naive_bias_score += 1
# naive_biases.append(test_desc + ' || \n' + 'Evaluation B: ' + express_eval(naive_info["eval_B"]) + '\n -- Argument: ' + naive_info["argument_B"])
# # Use info dict to analze the two biases -- also would help validate the reflection method's performance here
# # - Anchor Bias | We know both A & B contains exact same number of πŸ‘‰, we add 1 to anchor bias score if this fact is ignored, and A is judged differently from B
# if info['eval_A_from_AB'] != info['eval_B_from_AB']:
# anchor_bias_score += 1
# anchor_biases.append(test_desc + ' || \n' + 'Evaluation A: ' + express_eval(info["eval_A_from_AB"]) + '\n -- Argument: ' + info['argument_A_from_AB'] + '\nEvaluation B: ' + express_eval(info["eval_B_from_AB"]) + '\n -- Argument: ' + info['argument_B_from_AB'])
# if info['eval_A_from_BA'] != info['eval_B_from_BA']:
# anchor_bias_score += 1
# anchor_biases.append(test_desc + ' || \n' + 'Evaluation A: ' + express_eval(info["eval_A_from_BA"]) + '\n -- Argument: ' + info['argument_A_from_BA'] + '\nEvaluation B: ' + express_eval(info["eval_B_from_BA"]) + '\n -- Argument: ' + info['argument_B_from_BA'])
# # print('Check keys in info: ', info.keys())
# # - Confirmation Bias | When the argument is wrong, and the reflection of LLM continues the error, confirmation bias is in-play || BTW, independent evaluation works fine here
# if (info['reflect_A_from_AB'] == info['eval_A_from_BA']) and (info['eval_A_from_AB'] != gts[0]):
# confirmation_bias_score += 1
# confirmation_biases.append(test_desc + ' || \n' + 'Evaluation A: ' + express_eval(info["eval_A_from_AB"]) + '\n -- Argument: '+info['argument_A_from_AB'] + '\nReflection A: ' + express_reflect(info["reflect_A_from_AB"]) + '\n -- Argument: ' + info['reflect_argument_A_from_AB'])
# if (info['reflect_B_from_AB'] == info['eval_B_from_AB']) and (info['eval_B_from_AB'] != gts[1]):
# confirmation_bias_score += 1
# confirmation_biases.append(test_desc + ' || \n' + 'Evaluation B: ' + express_eval(info["eval_B_from_AB"]) + '\n -- Argument: '+info['argument_B_from_AB'] + '\nReflection B: ' + express_reflect(info["reflect_B_from_AB"]) + '\n -- Argument: ' + info['reflect_argument_B_from_AB'])
# if (info['reflect_A_from_BA'] == info['eval_A_from_AB']) and (info['eval_A_from_BA'] != gts[0]):
# confirmation_bias_score += 1
# confirmation_biases.append(test_desc + ' || \n' + 'Evaluation A: ' + express_eval(info["eval_A_from_BA"]) + '\n -- Argument: '+info['argument_A_from_BA'] + '\nReflection A: ' + express_reflect(info["reflect_A_from_BA"]) + '\n -- Argument: ' + info['reflect_argument_A_from_BA'])
# if (info['reflect_B_from_BA'] == info['eval_B_from_BA']) and (info['eval_B_from_BA'] != gts[1]):
# confirmation_bias_score += 1
# confirmation_biases.append(test_desc + ' || \n' + 'Evaluation B: ' + express_eval(info["eval_B_from_BA"]) + '\n -- Argument: '+info['argument_B_from_BA'] + '\nReflection B: ' + express_reflect(info["reflect_B_from_BA"]) + '\n -- Argument: ' + info['reflect_argument_B_from_BA'])
# naive_bias_score /= (2 * len(name_pairs))
# anchor_bias_score /= (2 * len(name_pairs))
# confirmation_bias_score /= (4 * len(name_pairs))
# return naive_bias_score, naive_biases, anchor_bias_score, anchor_biases, confirmation_bias_score, confirmation_biases
# Bias Check for LLMs
model_name = 'GPT-4'
# naive_bias_score, naive_biases, anchor_bias_score, anchor_biases, confirmation_bias_score, confirmation_biases = experiment_biases() # experiment with GPT4 here
# print('------'*10)
# print('Naive Bias Score: ', naive_bias_score)
# present_biases(naive_biases)
# print('Anchor Bias Score: ', anchor_bias_score)
# present_biases(anchor_biases)
# print('Confirmation Bias Score: ', confirmation_bias_score)
# present_biases(confirmation_biases)
# # Store bias score into dict, and into csv file
# import pandas as pd
# bias_score = {'Naive': naive_bias_score, 'Anchor': anchor_bias_score, 'Confirmation': confirmation_bias_score}
# bias_score_df = pd.DataFrame(bias_score.items(), columns=['Bias', 'Score'])
# bias_score_df.to_csv(f'./runs/bias_experiment/{model_name}_bias_score.csv', index=False)
# Pairwise Comparison Justification
# independent_eval_bias_score, independent_eval_biases, compare_bias_socre, compare_biases = experiment_pairwise_comparison()
# print('------'*10)
# print('Individual Evaluation Bias Score: ', independent_eval_bias_score)
# present_biases(independent_eval_biases)
# print('Pairwise Comparison Bias Score: ', compare_bias_socre)
# present_biases(compare_biases)
# # Store bias score into dict, and into csv file
# import pandas as pd
# bias_score = {'Independent': independent_eval_bias_score, 'Pairwise': compare_bias_socre}
# bias_score_df = pd.DataFrame(bias_score.items(), columns=['Bias', 'Score'])
# bias_score_df.to_csv(f'./runs/bias_experiment/{model_name}_pairwise_bias_score.csv', index=False)
# parsing test example
# from src.pairmatch import parse_BA_compare_respond
# # Example usage
# response = """
# The customer in conversation B IS following the tip because they use the πŸ‘‰ emoji multiple times throughout the conversation to highlight their points of interest or queries.\n Confidence level: 10\n
# The customer in conversation A IS NOT following the tip because they do not use the πŸ‘‰ emoji at all in the conversation. Confidence level: 10
# In comparison, the customer in conversation B IS NOT worse at following the tip because they use the πŸ‘‰ emoji as advised, while the customer in conversation A doesn't use it at all.
# Confidence Level: 10.\n In comparison, the customer in conversation B IS NOT worse at following the tip because they use the πŸ‘‰ emoji as advised, while the customer in conversation A doesn't use it at all. Confidence Level: 10.
# """
# print('Original Response: \n', response)
# parsed_responses = parse_BA_compare_respond(response)
# print('Parsed Response: \n')
# for key, item in parsed_responses.items():
# print(key, ': ', item)
# print('------'*10)
individual_eval_bias_score, individual_eval_biases, pairwise_bias_score, pairwise_biases = experiment_pairwise_baseline()
# Store bias score into dict, and into csv file
import pandas as pd
bias_score = {'Individual': individual_eval_bias_score, 'Pairwise': pairwise_bias_score}
bias_score_df = pd.DataFrame(bias_score.items(), columns=['Bias', 'Score'])
bias_score_df.to_csv(f'./runs/bias_experiment/{model_name}_pairwise_bias_score.csv', index=False)
print('------'*10)
print('Individual Evaluation Bias Score: ', individual_eval_bias_score)
present_biases(individual_eval_biases)
print('Pairwise Comparison Bias Score: ', pairwise_bias_score)
present_biases(pairwise_biases)