|
|
"""Common dataset detection utilities for all evaluation scripts.""" |
|
|
|
|
|
def detect_dataset_from_video_id(video_id): |
|
|
"""Detect dataset from video ID patterns.""" |
|
|
video_id = str(video_id).lower() |
|
|
|
|
|
|
|
|
if len(video_id) == 11 and any(c.isalpha() for c in video_id): |
|
|
return "AVOS" |
|
|
|
|
|
|
|
|
if "_part" in video_id and video_id.replace("_part", "").split("_")[0].isdigit(): |
|
|
return "CoPESD" |
|
|
|
|
|
|
|
|
if video_id.startswith("vid") and any(c.isdigit() for c in video_id): |
|
|
return "CholecTrack20" |
|
|
|
|
|
|
|
|
if video_id.startswith("video") and any(c.isdigit() for c in video_id): |
|
|
return "Cholec80-CVS" |
|
|
|
|
|
|
|
|
if "knot_tying" in video_id or "needle_passing" in video_id or "suturing" in video_id: |
|
|
return "JIGSAWS" |
|
|
|
|
|
|
|
|
if any(keyword in video_id for keyword in ["nur", "nursing", "medical"]): |
|
|
return "NurViD" |
|
|
|
|
|
return "Unknown" |
|
|
|
|
|
|
|
|
def detect_dataset_from_question(question): |
|
|
"""Detect dataset from question text patterns.""" |
|
|
question_lower = question.lower() |
|
|
|
|
|
if "avos" in question_lower: |
|
|
return "AVOS" |
|
|
elif "copesd" in question_lower: |
|
|
return "CoPESD" |
|
|
elif "cholect50" in question_lower or "cholec-t50" in question_lower: |
|
|
return "CholecT50" |
|
|
elif "cholectrack20" in question_lower or "cholec-track20" in question_lower: |
|
|
return "CholecTrack20" |
|
|
elif "cholec80-cvs" in question_lower or "critical view of safety" in question_lower: |
|
|
return "Cholec80-CVS" |
|
|
elif "jigsaws" in question_lower or "robotic bench-top" in question_lower: |
|
|
return "JIGSAWS" |
|
|
elif "nurvid" in question_lower or "nursing" in question_lower: |
|
|
return "NurViD" |
|
|
elif "laparoscopic cholecystectomy" in question_lower: |
|
|
return "CholecTrack20" |
|
|
|
|
|
|
|
|
if any(action in question_lower for action in ["cutting", "tying", "suturing"]) and "open surgery" in question_lower: |
|
|
return "AVOS" |
|
|
elif "forceps" in question_lower and "knife" in question_lower: |
|
|
return "CoPESD" |
|
|
|
|
|
return "Unknown" |
|
|
|
|
|
|
|
|
def get_dataset_name(record): |
|
|
"""Get dataset name from a record, preferring data_source field.""" |
|
|
|
|
|
dataset = record.get("data_source", "Unknown") |
|
|
if dataset != "Unknown" and dataset: |
|
|
return dataset |
|
|
|
|
|
|
|
|
dataset_from_video_id = detect_dataset_from_video_id(record["metadata"]["video_id"]) |
|
|
dataset_from_question = detect_dataset_from_question(record.get("question", "")) |
|
|
|
|
|
|
|
|
if dataset_from_question != "Unknown": |
|
|
return dataset_from_question |
|
|
else: |
|
|
return dataset_from_video_id |
|
|
|