Spaces:

Sadhanha
/

soapbox

Sleeping

soapbox / evaluate_note.py

Sadhanha Anand

SoapBox — AI Clinical Scribe Agent

268b40a about 1 month ago

2.52 kB

	import anthropic
	import os
	import json
	from dotenv import load_dotenv

	load_dotenv()
	client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

	METRICS = {
	"completeness": "Did the agent capture every required SOAP field?",
	"accuracy": "Does the note correctly reflect what was said in the transcript?",
	"medication_capture": "Were all medications, doses and frequencies correctly extracted?",
	"clinical_reasoning": "Is the diagnosis and plan clinically justified by the findings?",
	"structure": "Is the note properly formatted and organized for clinical use?"
	}

	def evaluate_note(transcript: str, soap_note: str) -> dict:
	try:
	response = client.messages.create(
	model="claude-sonnet-4-20250514",
	max_tokens=800,
	messages=[{
	"role": "user",
	"content": f"""You are a clinical documentation expert.
	Evaluate this SOAP note against the original transcript.
	Score each category strictly from 1 to 10.

	TRANSCRIPT:
	{transcript}

	GENERATED SOAP NOTE:
	{soap_note}

	Return ONLY valid JSON, no extra text, no markdown:
	{{
	"completeness": {{"score": 0, "reason": "one sentence"}},
	"accuracy": {{"score": 0, "reason": "one sentence"}},
	"medication_capture": {{"score": 0, "reason": "one sentence"}},
	"clinical_reasoning": {{"score": 0, "reason": "one sentence"}},
	"structure": {{"score": 0, "reason": "one sentence"}},
	"overall_score": 0
	}}"""
	}]
	)

	raw = response.content[0].text.strip()
	clean = raw.replace("```json", "").replace("```", "").strip()
	result = json.loads(clean)

	for key in METRICS:
	if key in result:
	result[key]["description"] = METRICS[key]

	return result

	except Exception as e:
	return {
	"completeness": {"score": 0, "reason": "Evaluation failed", "description": METRICS["completeness"]},
	"accuracy": {"score": 0, "reason": "Evaluation failed", "description": METRICS["accuracy"]},
	"medication_capture": {"score": 0, "reason": "Evaluation failed", "description": METRICS["medication_capture"]},
	"clinical_reasoning": {"score": 0, "reason": "Evaluation failed", "description": METRICS["clinical_reasoning"]},
	"structure": {"score": 0, "reason": "Evaluation failed", "description": METRICS["structure"]},
	"overall_score": 0
	}