Spaces:

BrainDrive
/

Summary-Evaluator

Running

File size: 1,126 Bytes

0cee974

import json
from src.api_clients import openai_client

last_eval_result = {}  # Shared state

def get_last_eval_data():
    return last_eval_result if last_eval_result else None

def run_comparison(human_scores, human_comments, model_scores, model_comments):
    prompt = f"""Compare human and model summary evaluations.

Human Scores: {human_scores}
Model Scores: {model_scores}

Human Comments: {human_comments}
Model Comments: {model_comments}

Output key differences, strengths, and any mismatches."""
    res = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=800
    )
    return res.choices[0].message.content

def import_model_metrics():
    data = get_last_eval_data()
    if not data or "scores" not in data:
        return ["" ] * 6
    s = data["scores"]
    return (
        str(s.get("coverage", "")),
        str(s.get("alignment", "")),
        str(s.get("hallucination", "")),
        str(s.get("relevance", "")),
        str(s.get("bias_toxicity", "")),
        json.dumps(data.get("comments", ""), indent=2)
    )