Spaces:

qgyd2021
/

llm_eval_system

Running

update

a984ba9 6 days ago

2.17 kB

	#!/usr/bin/python3
	# -- coding: utf-8 --
	import argparse
	import json

	import pandas as pd

	from project_settings import environment, project_path


	def get_args():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--filename",
	default="evaluation_results_max7.xlsx",
	type=str
	)
	parser.add_argument(
	"--dataset",
	default=(project_path / "data/dataset/agent-lingoace-zh-400-choice.jsonl").as_posix(),
	type=str
	)
	args = parser.parse_args()
	return args


	def main():
	args = get_args()

	dataset = dict()
	with open(args.dataset, "r", encoding="utf-8") as f:
	for row in f:
	row = json.loads(row)
	idx = row["idx"]
	prompt = row["prompt"]
	response = row["response"]
	dataset[idx] = row

	result = list()
	df = pd.read_excel(args.filename)
	for i, row in df.iterrows():
	# print(row)
	idx = row["idx"]
	conversation = row["conversation"]
	expected = row["expected"]
	actual_label = row["actual_label"]
	actual_reason = row["actual_reason"]
	correct = row["correct"]
	note = row["note"]

	if correct is False:
	print(idx)
	print(conversation)
	print(expected, actual_label)
	print(actual_reason)
	print(note)
	print("+" * 150)

	dataset_ = dataset[idx]
	prompt = dataset_["prompt"]
	response = dataset_["response"]
	print(prompt)
	print(response)
	print("-" * 150)

	result.append({
	"idx": idx,
	"conversation": conversation,
	"expected": expected,
	"actual_label": actual_label,
	"actual_reason": actual_reason,
	"note": note,
	"prompt": prompt,
	"response": response,
	"op": None,
	"remark": None,

	})
	result = pd.DataFrame(result)
	result.to_excel("result.xlsx", index=False)
	return


	if __name__ == "__main__":
	main()