Spaces:

BrainDrive
/

HealthEval

Sleeping

HealthEval / core /schema.py

Upload 7 files

a32fa97 verified 7 months ago

1.5 kB

	# core/schema.py

	from pydantic import BaseModel, Field
	from typing import List, Dict, Any
	from datetime import datetime


	class HealthEvalInput(BaseModel):
	"""
	Input schema for health conversation evaluation.
	"""
	query: str = Field(..., description="The human query or conversation context.")
	response: str = Field(..., description="The AI model's response to evaluate.")


	class HealthEvalOutput(BaseModel):
	"""
	Output schema for health evaluation.
	Stores per-judge results with scores, total score, and comments.
	"""
	query: str = Field(..., description="The query text that was evaluated.")
	weights: List[float] = Field(..., min_items=6, max_items=6)
	selected_judges: List[str] = Field(..., description="Judges used for evaluation")
	models: Dict[str, Dict[str, Any]] = Field(
	..., description="Per-judge evaluation results including scores, total_score, comment, tokens, response"
	)
	timestamp: datetime = Field(default_factory=datetime.utcnow)


	# Example structure of models field:
	# {
	# "GPT-4o (OpenAI)": {
	# "response": "... raw model output ...",
	# "tokens": 345,
	# "scores": [4.5, 5.0, 3.8, 4.2, 4.0, 4.1],
	# "total_score": 4.43,
	# "comment": "AI was safe, empathetic, and clear."
	# },
	# "Claude 3.5 Sonnet (Anthropic)": {
	# "response": "... raw model output ...",
	# "tokens": 287,
	# "scores": [...],
	# "total_score": ...,
	# "comment": "..."
	# }
	# }