Spaces:

Sadhanha
/

soapbox

Sleeping

File size: 2,522 Bytes

268b40a

import anthropic
import os
import json
from dotenv import load_dotenv

load_dotenv()
client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

METRICS = {
    "completeness":       "Did the agent capture every required SOAP field?",
    "accuracy":           "Does the note correctly reflect what was said in the transcript?",
    "medication_capture": "Were all medications, doses and frequencies correctly extracted?",
    "clinical_reasoning": "Is the diagnosis and plan clinically justified by the findings?",
    "structure":          "Is the note properly formatted and organized for clinical use?"
}

def evaluate_note(transcript: str, soap_note: str) -> dict:
    try:
        response = client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=800,
            messages=[{
                "role": "user",
                "content": f"""You are a clinical documentation expert.
Evaluate this SOAP note against the original transcript.
Score each category strictly from 1 to 10.

TRANSCRIPT:
{transcript}

GENERATED SOAP NOTE:
{soap_note}

Return ONLY valid JSON, no extra text, no markdown:
{{
    "completeness":       {{"score": 0, "reason": "one sentence"}},
    "accuracy":           {{"score": 0, "reason": "one sentence"}},
    "medication_capture": {{"score": 0, "reason": "one sentence"}},
    "clinical_reasoning": {{"score": 0, "reason": "one sentence"}},
    "structure":          {{"score": 0, "reason": "one sentence"}},
    "overall_score": 0
}}"""
            }]
        )

        raw = response.content[0].text.strip()
        clean = raw.replace("```json", "").replace("```", "").strip()
        result = json.loads(clean)

        for key in METRICS:
            if key in result:
                result[key]["description"] = METRICS[key]

        return result

    except Exception as e:
        return {
            "completeness":       {"score": 0, "reason": "Evaluation failed", "description": METRICS["completeness"]},
            "accuracy":           {"score": 0, "reason": "Evaluation failed", "description": METRICS["accuracy"]},
            "medication_capture": {"score": 0, "reason": "Evaluation failed", "description": METRICS["medication_capture"]},
            "clinical_reasoning": {"score": 0, "reason": "Evaluation failed", "description": METRICS["clinical_reasoning"]},
            "structure":          {"score": 0, "reason": "Evaluation failed", "description": METRICS["structure"]},
            "overall_score": 0
        }