Spaces:

BrainDrive
/

Summary-Evaluator

Running

Upload 2 files

0cee974 verified 8 months ago

1.13 kB

	import json
	from src.api_clients import openai_client

	last_eval_result = {} # Shared state

	def get_last_eval_data():
	return last_eval_result if last_eval_result else None

	def run_comparison(human_scores, human_comments, model_scores, model_comments):
	prompt = f"""Compare human and model summary evaluations.

	Human Scores: {human_scores}
	Model Scores: {model_scores}

	Human Comments: {human_comments}
	Model Comments: {model_comments}

	Output key differences, strengths, and any mismatches."""
	res = openai_client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[{"role": "user", "content": prompt}],
	max_tokens=800
	)
	return res.choices[0].message.content

	def import_model_metrics():
	data = get_last_eval_data()
	if not data or "scores" not in data:
	return ["" ] * 6
	s = data["scores"]
	return (
	str(s.get("coverage", "")),
	str(s.get("alignment", "")),
	str(s.get("hallucination", "")),
	str(s.get("relevance", "")),
	str(s.get("bias_toxicity", "")),
	json.dumps(data.get("comments", ""), indent=2)
	)