Spaces:

jbilcke-hf
/

SNIPED_grasp-any-region

Running on Zero

App Files Files Community

SNIPED_grasp-any-region / evaluation /Ferret-Bench /summarize_gpt_review.py

jbilcke-hf

Upload core files for paper 2510.18876

46861c5 verified about 2 months ago

raw

history blame contribute delete

2.55 kB

	import argparse
	import json
	import os
	from collections import defaultdict

	import numpy as np


	def parse_args():
	parser = argparse.ArgumentParser(description="ChatGPT-based QA evaluation.")
	parser.add_argument("-d", "--dir", default=None)
	parser.add_argument("-f", "--files", nargs="*", default=None)
	parser.add_argument("-i", "--ignore", nargs="*", default=None)
	parser.add_argument("-s", "--save", action="store_true")
	return parser.parse_args()


	if __name__ == "__main__":
	args = parse_args()

	if args.ignore is not None:
	args.ignore = [int(x) for x in args.ignore]

	if args.files is not None and len(args.files) > 0:
	review_files = args.files
	else:
	review_files = [
	x
	for x in os.listdir(args.dir)
	if x.endswith(".jsonl")
	and (
	x.startswith("gpt4_text")
	or x.startswith("reviews_")
	or x.startswith("review_")
	)
	]

	metrics = []
	for review_file in sorted(review_files):
	config = (
	os.path.basename(review_file)
	.replace("gpt4_text_", "")
	.replace(".jsonl", "")
	)
	scores = defaultdict(list)
	print(config)
	with open(
	os.path.join(args.dir, review_file) if args.dir is not None else review_file
	) as f:
	for review_str in f:
	review = json.loads(review_str)
	if args.ignore is not None and review["question_id"] in args.ignore:
	continue
	if "category" in review:
	scores[review["category"]].append(review["tuple"])
	scores["all"].append(review["tuple"])
	else:
	if "tuple" in review:
	scores["all"].append(review["tuple"])
	else:
	scores["all"].append(review["score"])
	summ_dict = defaultdict(list)
	for k, v in sorted(scores.items()):
	stats = np.asarray(v).mean(0).tolist()
	stats = [round(x, 3) for x in stats]
	# print(k, stats, round(stats[1]/stats[0]*100, 1))
	print(k, round(stats[1] / stats[0] * 100, 1))
	summ_dict[k] = round(stats[1] / stats[0] * 100, 1)
	print("=================================")
	metrics.append(summ_dict)

	if args.save:
	with open(os.path.join(args.dir, "metric.json"), "w") as f:
	json.dump(metrics, f, indent=2)