dermtriage / src /feedback.py
Kabirgrover's picture
updated for new HF space
84842ba verified
"""
Blinded feedback capture for DermTriage pilot study.
Logs clinician assessments and model predictions to JSONL for
analysis. Designed for HF Spaces persistent storage (/data mount)
with local-dev fallback.
"""
import hashlib
import json
import threading
from datetime import datetime, timezone
from pathlib import Path
from uuid import uuid4
import numpy as np
# Persistent storage: /data on HF Spaces, ./data locally.
_DATA_MOUNT = Path("/data")
FEEDBACK_DIR = _DATA_MOUNT if _DATA_MOUNT.is_mount() else Path("data")
FEEDBACK_DIR.mkdir(parents=True, exist_ok=True)
FEEDBACK_FILE = FEEDBACK_DIR / "pilot_feedback.jsonl"
READER_STUDY_FILE = FEEDBACK_DIR / "reader_study_feedback.jsonl"
MODEL_VERSION = "ens-alpha06-nb09-fitz14-rs1"
_write_lock = threading.Lock()
def image_hash(image) -> str:
"""SHA-256 hex digest of raw pixel data from a PIL Image."""
return hashlib.sha256(np.array(image).tobytes()).hexdigest()
def build_feedback_record(
session_id,
image,
model_prediction,
clinician_dx,
clinician_confidence,
clinician_action,
fitzpatrick_group,
agreement_rating,
free_text_note,
threshold_used,
ensemble_alpha,
# Reader study fields (all optional — None means not collected in this mode)
study_phase=None,
image_id=None,
case_number=None,
time_seconds=None,
would_change_action=None,
revised_action=None,
ai_influence_score=None,
post_ai_confidence=None,
):
"""Assemble a complete feedback record dict.
Args:
session_id: UUID string for this evaluation session.
image: PIL Image (used for hashing only, not stored).
model_prediction: dict with prob_malignant, triage_zone,
top_class, top_class_prob.
clinician_dx: 7-class code or "other_unsure".
clinician_confidence: int 1-5.
clinician_action: "Refer" / "Monitor" / "Reassure".
fitzpatrick_group: "I-II" / "III-IV" / "V-VI" / "Unknown".
agreement_rating: "Agree" / "Partially Agree" / "Disagree".
free_text_note: Free-form clinician notes (may be empty).
threshold_used: float from MODEL_CONFIG.
ensemble_alpha: float from MODEL_CONFIG.
study_phase: "unaided" | "ai_aided" | None (open pilot).
image_id: DDI image identifier for reader study.
case_number: Position in sequence (1-54).
time_seconds: Time spent on this case.
would_change_action: bool — Phase 2 only.
revised_action: "Refer"/"Monitor"/"Reassure"/None — Phase 2 only.
ai_influence_score: int 1-5 — Phase 2 only.
post_ai_confidence: int 1-5 — Phase 2 only.
Returns:
dict ready for JSONL serialization.
"""
record = {
"session_id": session_id,
"timestamp": datetime.now(timezone.utc).isoformat(),
"image_hash": image_hash(image),
"model_version": MODEL_VERSION,
"threshold_used": threshold_used,
"ensemble_alpha": ensemble_alpha,
"model_prediction": model_prediction,
"clinician_dx": clinician_dx,
"clinician_confidence": clinician_confidence,
"clinician_action": clinician_action,
"fitzpatrick_group": fitzpatrick_group,
"agreement_rating": agreement_rating,
"free_text_note": free_text_note,
}
# Reader study fields — only include if not None (backward-compatible)
reader_fields = {
"study_phase": study_phase,
"image_id": image_id,
"case_number": case_number,
"time_seconds": time_seconds,
"would_change_action": would_change_action,
"revised_action": revised_action,
"ai_influence_score": ai_influence_score,
"post_ai_confidence": post_ai_confidence,
}
for key, value in reader_fields.items():
if value is not None:
record[key] = value
return record
def log_feedback(record: dict) -> None:
"""Append a feedback record as a single JSON line (thread-safe).
Routes to reader_study_feedback.jsonl if the record has a study_phase,
otherwise to pilot_feedback.jsonl.
"""
target = READER_STUDY_FILE if record.get("study_phase") else FEEDBACK_FILE
line = json.dumps(record, ensure_ascii=False) + "\n"
with _write_lock:
with open(target, "a", encoding="utf-8") as f:
f.write(line)