import anthropic import os import json from dotenv import load_dotenv load_dotenv() client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) METRICS = { "completeness": "Did the agent capture every required SOAP field?", "accuracy": "Does the note correctly reflect what was said in the transcript?", "medication_capture": "Were all medications, doses and frequencies correctly extracted?", "clinical_reasoning": "Is the diagnosis and plan clinically justified by the findings?", "structure": "Is the note properly formatted and organized for clinical use?" } def evaluate_note(transcript: str, soap_note: str) -> dict: try: response = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=800, messages=[{ "role": "user", "content": f"""You are a clinical documentation expert. Evaluate this SOAP note against the original transcript. Score each category strictly from 1 to 10. TRANSCRIPT: {transcript} GENERATED SOAP NOTE: {soap_note} Return ONLY valid JSON, no extra text, no markdown: {{ "completeness": {{"score": 0, "reason": "one sentence"}}, "accuracy": {{"score": 0, "reason": "one sentence"}}, "medication_capture": {{"score": 0, "reason": "one sentence"}}, "clinical_reasoning": {{"score": 0, "reason": "one sentence"}}, "structure": {{"score": 0, "reason": "one sentence"}}, "overall_score": 0 }}""" }] ) raw = response.content[0].text.strip() clean = raw.replace("```json", "").replace("```", "").strip() result = json.loads(clean) for key in METRICS: if key in result: result[key]["description"] = METRICS[key] return result except Exception as e: return { "completeness": {"score": 0, "reason": "Evaluation failed", "description": METRICS["completeness"]}, "accuracy": {"score": 0, "reason": "Evaluation failed", "description": METRICS["accuracy"]}, "medication_capture": {"score": 0, "reason": "Evaluation failed", "description": METRICS["medication_capture"]}, "clinical_reasoning": {"score": 0, "reason": "Evaluation failed", "description": METRICS["clinical_reasoning"]}, "structure": {"score": 0, "reason": "Evaluation failed", "description": METRICS["structure"]}, "overall_score": 0 }