Spaces:

samwell
/

medrax2

Paused

App Files Files Community

Junzhe Li commited on Oct 14, 2025

Commit

b93ad3f

1 Parent(s): 9006287

updated benchmarks

Browse files

Files changed (3) hide show

benchmarking/benchmarks/base.py +13 -81
benchmarking/benchmarks/chestagentbench_benchmark.py +15 -27
benchmarking/benchmarks/rexvqa_benchmark.py +114 -172

benchmarking/benchmarks/base.py CHANGED Viewed

@@ -14,8 +14,6 @@ class BenchmarkDataPoint:
     text: str  # The question/prompt
     images: Optional[List[str]] = None  # List of image paths
     correct_answer: Optional[str] = None  # Ground truth answer
-    case_id: Optional[str] = None  # For grouping related questions
-    category: Optional[str] = None  # Type of question/task
     metadata: Optional[Dict[str, Any]] = None  # Additional metadata
@@ -36,26 +34,32 @@ class Benchmark(ABC):
         """
         self.data_dir = Path(data_dir)
         self.config = kwargs
         self.data_points = []
         self._load_data()
         self._shuffle_data()
     @abstractmethod
     def _load_data(self) -> None:
         """Load benchmark data from the data directory."""
         pass
-    def _shuffle_data(self) -> None:
-        """Shuffle the data points if a random seed is provided.
         This method is called automatically after data loading to ensure
         reproducible benchmark runs when a random seed is specified.
         """
-        random_seed = self.config.get("random_seed", None)
-        if random_seed is not None:
-            random.seed(random_seed)
-            random.shuffle(self.data_points)
-            print(f"Shuffled {len(self.data_points)} data points with seed {random_seed}")
     def get_data_point(self, index: int) -> BenchmarkDataPoint:
         """Get a specific data point by index.
@@ -82,28 +86,6 @@ class Benchmark(ABC):
         """
         return [self.get_data_point(i) for i in indices]
-    def get_by_category(self, category: str) -> List[BenchmarkDataPoint]:
-        """Get all data points of a specific category.
-        Args:
-            category (str): Category to filter by
-        Returns:
-            List[BenchmarkDataPoint]: List of data points in the category
-        """
-        return [dp for dp in self if dp.category == category]
-    def get_by_case_id(self, case_id: str) -> List[BenchmarkDataPoint]:
-        """Get all data points for a specific case.
-        Args:
-            case_id (str): Case ID to filter by
-        Returns:
-            List[BenchmarkDataPoint]: List of data points for the case
-        """
-        return [dp for dp in self if dp.case_id == case_id]
     def __str__(self) -> str:
         """String representation of the benchmark."""
         return f"{self.__class__.__name__}(data_dir={self.data_dir}, size={len(self)})"
@@ -117,56 +99,6 @@ class Benchmark(ABC):
         for i in range(len(self)):
             yield self.get_data_point(i)
-    def get_categories(self) -> List[str]:
-        """Get all unique categories in the benchmark.
-        Returns:
-            List[str]: List of unique categories
-        """
-        categories = set()
-        for dp in self:
-            if dp.category:
-                categories.add(dp.category)
-        return sorted(list(categories))
-    def get_case_ids(self) -> List[str]:
-        """Get all unique case IDs in the benchmark.
-        Returns:
-            List[str]: List of unique case IDs
-        """
-        case_ids = set()
-        for dp in self:
-            if dp.case_id:
-                case_ids.add(dp.case_id)
-        return sorted(list(case_ids))
-    def get_stats(self) -> Dict[str, Any]:
-        """Get statistics about the benchmark.
-        Returns:
-            Dict[str, Any]: Dictionary containing benchmark statistics
-        """
-        stats = {
-            "total_questions": len(self),
-            "total_cases": len(self.get_case_ids()),
-            "categories": self.get_categories(),
-            "category_counts": {},
-            "has_images": False,
-            "num_images": 0,
-        }
-        for dp in self:
-            # Category counts
-            if dp.category:
-                stats["category_counts"][dp.category] = stats["category_counts"].get(dp.category, 0) + 1
-            # Image statistics
-            if dp.images:
-                stats["has_images"] = True
-                stats["num_images"] += len(dp.images)
-        return stats
     def validate_images(self) -> Tuple[List[str], List[str]]:
         """Validate that all image paths exist.

     text: str  # The question/prompt
     images: Optional[List[str]] = None  # List of image paths
     correct_answer: Optional[str] = None  # Ground truth answer
     metadata: Optional[Dict[str, Any]] = None  # Additional metadata
         """
         self.data_dir = Path(data_dir)
         self.config = kwargs
         self.data_points = []
         self._load_data()
         self._shuffle_data()
+        self.max_questions = kwargs.get("max_questions", None)
+        if self.max_questions:
+            self.data_points = self.data_points[:self.max_questions]
+            print(f"Randomly sampled {self.max_questions} questions from {self.__class__.__name__}")
+        else:
+            print(f"Loaded all {len(self.data_points)} questions from {self.__class__.__name__}")
     @abstractmethod
     def _load_data(self) -> None:
         """Load benchmark data from the data directory."""
         pass
+    def _shuffle_data(self, random_seed: Optional[int]=42) -> None:
+        """Shuffle the data points if a random seed is provided. If no random seed is provided, use 42 as default.
         This method is called automatically after data loading to ensure
         reproducible benchmark runs when a random seed is specified.
         """
+        random.seed(random_seed)
+        random.shuffle(self.data_points)
+        print(f"Shuffled {len(self.data_points)} data points with seed {random_seed}")
     def get_data_point(self, index: int) -> BenchmarkDataPoint:
         """Get a specific data point by index.
         """
         return [self.get_data_point(i) for i in indices]
     def __str__(self) -> str:
         """String representation of the benchmark."""
         return f"{self.__class__.__name__}(data_dir={self.data_dir}, size={len(self)})"
         for i in range(len(self)):
             yield self.get_data_point(i)
     def validate_images(self) -> Tuple[List[str], List[str]]:
         """Validate that all image paths exist.

benchmarking/benchmarks/chestagentbench_benchmark.py CHANGED Viewed

@@ -9,19 +9,18 @@ class ChestAgentBenchBenchmark(Benchmark):
     Loads the dataset from a local metadata.jsonl file and parses each entry into a BenchmarkDataPoint.
     """
     def __init__(self, data_dir: str, **kwargs):
-        self.max_questions = kwargs.get("max_questions", None)
         super().__init__(data_dir, **kwargs)
     def _load_data(self) -> None:
         metadata_path = Path(self.data_dir) / "metadata.jsonl"
         if not metadata_path.exists():
             raise FileNotFoundError(f"Could not find metadata.jsonl in {self.data_dir}")
         print(f"Loading ChestAgentBench from local file: {metadata_path}")
-        self.data_points = []
         with open(metadata_path, "r", encoding="utf-8") as f:
             for i, line in enumerate(f):
-                if self.max_questions and i >= self.max_questions:
-                    break
                 try:
                     item = json.loads(line)
                     data_point = self._parse_item(item, i)
@@ -30,43 +29,32 @@ class ChestAgentBenchBenchmark(Benchmark):
                 except Exception as e:
                     print(f"Error loading item {i}: {e}")
                     continue
     def _parse_item(self, item: Dict[str, Any], index: int) -> Optional[BenchmarkDataPoint]:
-        # Use full_question_id or question_id if available, else fallback
-        question_id = item.get("full_question_id") or item.get("question_id") or f"chestagentbench_{index}"
         question = item.get("question", "")
         correct_answer = item.get("answer", "")
-        explanation = item.get("explanation", "")
-        images = item.get("images", [])
-        case_id = item.get("case_id", "")
-        category = item.get("categories", "")
-        # Compose question text (options are embedded in the question string)
-        question_with_options = question
         # Map image paths to local figures directory
         local_images = None
         if images:
-            figures_dir = Path(self.data_dir) / "figures"
             local_images = []
             for img in images:
-                # Handle relative paths like "figures/11583/figure_1.jpg"
-                if img.startswith("figures/"):
-                    # Remove "figures/" prefix and construct full path
-                    relative_path = img[8:]  # Remove "figures/" prefix
-                    full_path = figures_dir / relative_path
-                    local_images.append(str(full_path))
-                else:
-                    # Fallback to original logic
-                    local_images.append(str(figures_dir / Path(img).name))
-        # Metadata
         metadata = dict(item)
-        metadata["explanation"] = explanation
         metadata["dataset"] = "chestagentbench"
         return BenchmarkDataPoint(
             id=question_id,
-            text=question_with_options,
             images=local_images,
             correct_answer=correct_answer,
             metadata=metadata,
-            case_id=case_id,
-            category=category,
         )

     Loads the dataset from a local metadata.jsonl file and parses each entry into a BenchmarkDataPoint.
     """
     def __init__(self, data_dir: str, **kwargs):
         super().__init__(data_dir, **kwargs)
     def _load_data(self) -> None:
+        # Check if metadata.jsonl exists
         metadata_path = Path(self.data_dir) / "metadata.jsonl"
         if not metadata_path.exists():
             raise FileNotFoundError(f"Could not find metadata.jsonl in {self.data_dir}")
         print(f"Loading ChestAgentBench from local file: {metadata_path}")
+        # Load metadata.jsonl
         with open(metadata_path, "r", encoding="utf-8") as f:
             for i, line in enumerate(f):
                 try:
                     item = json.loads(line)
                     data_point = self._parse_item(item, i)
                 except Exception as e:
                     print(f"Error loading item {i}: {e}")
                     continue
     def _parse_item(self, item: Dict[str, Any], index: int) -> Optional[BenchmarkDataPoint]:
+        # Extract required fields
+        question_id = item.get("full_question_id")
         question = item.get("question", "")
         correct_answer = item.get("answer", "")
         # Map image paths to local figures directory
+        images = item.get("images", [])
         local_images = None
         if images:
             local_images = []
             for img in images:
+                full_path = Path(self.data_dir) / img
+                local_images.append(str(full_path))
+        # Extract metadata
         metadata = dict(item)
         metadata["dataset"] = "chestagentbench"
+        # Return data point
         return BenchmarkDataPoint(
             id=question_id,
+            text=question,
             images=local_images,
             correct_answer=correct_answer,
             metadata=metadata,
         )

benchmarking/benchmarks/rexvqa_benchmark.py CHANGED Viewed

@@ -3,13 +3,21 @@
 import json
 import os
 from typing import Dict, Optional, Any
-from datasets import load_dataset
 from .base import Benchmark, BenchmarkDataPoint
 from pathlib import Path
-import subprocess
 import tarfile
 import zstandard as zstd
 from huggingface_hub import hf_hub_download, list_repo_files
 class ReXVQABenchmark(Benchmark):
@@ -40,16 +48,107 @@ class ReXVQABenchmark(Benchmark):
                 max_questions (int): Maximum number of questions to load (default: None, load all)
                 images_dir (str): Directory containing extracted PNG images (default: None)
         """
         self.split = kwargs.get("split", "test")
-        self.trust_remote_code = kwargs.get("trust_remote_code", False)
-        self.max_questions = kwargs.get("max_questions", None)
-        self.image_dataset = None
-        self.image_mapping = {}  # Maps study_id to image data
-        # Set images_dir BEFORE parent initialization to avoid AttributeError
         self.images_dir = f"{data_dir}/images/deid_png"
-        super().__init__(data_dir, **kwargs)
     @staticmethod
     def download_rexgradient_images(output_dir: str = "benchmarking/data/rexvqa", repo_id: str = "rajpurkarlab/ReXGradient-160K", test_only: bool = True):
@@ -99,7 +198,7 @@ class ReXVQABenchmark(Benchmark):
         print(f"Output directory: {output_dir}")
         try:
             print("Listing files in repository...")
-            files = list_repo_files(repo_id, repo_type='dataset')
             part_files = [f for f in files if f.startswith("deid_png.part")]
             if not part_files:
                 print("No part files found. The images might be in a different format.")
@@ -117,7 +216,8 @@ class ReXVQABenchmark(Benchmark):
                     filename=part_file,
                     local_dir=output_dir,
                     local_dir_use_symlinks=False,
-                    repo_type='dataset'
                 )
             # Concatenate part files
             if not tar_path.exists():
@@ -237,168 +337,10 @@ class ReXVQABenchmark(Benchmark):
                 filename="metadata/test_vqa_data.json",
                 local_dir=output_dir,
                 local_dir_use_symlinks=False,
-                repo_type='dataset'
             )
             print("Download complete.")
         except Exception as e:
             print(f"Error downloading test_vqa_data.json: {e}")
-            print("You may need to accept the license agreement on HuggingFace.")
-    def _load_data(self) -> None:
-        """Load ReXVQA data from local JSON file."""
-        try:
-            # Check for images and test_vqa_data.json, download if missing
-            self.download_test_vqa_data_json(self.data_dir)
-            self.download_rexgradient_images(self.data_dir, test_only=True)
-            # Construct path to the JSON file
-            json_file_path = os.path.join("benchmarking", "data", "rexvqa", "metadata", "test_vqa_data.json")
-            # Check if file exists
-            if not os.path.exists(json_file_path):
-                raise FileNotFoundError(f"Could not find test_vqa_data.json in the expected location: {json_file_path}")
-            print(f"Loading ReXVQA {self.split} split from local JSON file: {json_file_path}")
-            # Load JSON file directly
-            with open(json_file_path, 'r', encoding='utf-8') as f:
-                questions_data = json.load(f)
-            # ReXVQA format: {question_id: {question_data}, ...}
-            questions_list = []
-            for question_id, question_data in questions_data.items():
-                # Add the question_id to the question_data for reference
-                question_data['id'] = question_id
-                questions_list.append(question_data)
-            print(f"Loaded {len(questions_list)} questions from local JSON file")
-            # Load images dataset from ReXGradient-160K (metadata only)
-            print("Loading ReXGradient-160K metadata dataset...")
-            try:
-                self.image_dataset = load_dataset(
-                    "rajpurkarlab/ReXGradient-160K",
-                    split="test",
-                    cache_dir=self.data_dir,
-                    trust_remote_code=self.trust_remote_code
-                )
-                print(f"Loaded {len(self.image_dataset)} image metadata entries from ReXGradient-160K")
-                # Create mapping from study_id to image metadata
-                self._create_image_mapping()
-            except Exception as e:
-                print(f"Warning: Could not load ReXGradient-160K dataset: {e}")
-                print("Proceeding without images...")
-                self.load_images = False
-            self.data_points = []
-            # Process questions (limit if max_questions is specified)
-            questions_to_process = questions_list
-            if self.max_questions:
-                questions_to_process = questions_list[:min(self.max_questions, len(questions_list))]
-            for i, item in enumerate(questions_to_process):
-                try:
-                    data_point = self._parse_rexvqa_item(item, i)
-                    if data_point:
-                        self.data_points.append(data_point)
-                except Exception as e:
-                    print(f"Error loading item {i}: {e}")
-                    continue
-        except Exception as e:
-            raise RuntimeError(f"Failed to load ReXVQA dataset: {e}")
-    def _create_image_mapping(self) -> None:
-        """Create mapping from study_id to image metadata."""
-        if not self.image_dataset:
-            return
-        print("Creating image mapping...")
-        for item in self.image_dataset:
-            study_instance_uid = item.get("StudyInstanceUid", "")
-            if study_instance_uid:
-                # Store the image metadata for this study using StudyInstanceUid as key
-                if study_instance_uid not in self.image_mapping:
-                    self.image_mapping[study_instance_uid] = []
-                self.image_mapping[study_instance_uid].append(item)
-        print(f"Created image mapping for {len(self.image_mapping)} studies")
-    def _parse_rexvqa_item(self, item: Dict[str, Any], index: int) -> Optional[BenchmarkDataPoint]:
-        """Parse a ReXVQA dataset item.
-        Args:
-            item (Dict[str, Any]): Dataset item from JSON file
-            index (int): Item index
-        Returns:
-            Optional[BenchmarkDataPoint]: Parsed data point
-        """
-        # Extract basic information
-        question_id = item.get("id", f"rexvqa_{self.split}_{index}")
-        question = item.get("question", "")
-        # Handle multiple choice options
-        options = item.get("options", [])
-        if options:
-            # Add options to the question for multiple choice format
-            question_with_options = question + "\n\nOptions:\n" + "\n".join(options)
-        else:
-            question_with_options = question
-        # Get correct answer
-        correct_answer = item.get("correct_answer", "")
-        if not question:
-            return None
-        # Handle images using ImagePath field
-        images = None
-        if self.images_dir and "ImagePath" in item and item["ImagePath"]:
-            images = []
-            for rel_path in item["ImagePath"]:
-                # Remove leading ../ if present
-                norm_rel_path = rel_path.lstrip("./")
-                # Join with images_dir root
-                full_path = str(Path(self.images_dir).parent / norm_rel_path)
-                images.append(full_path)
-        # Extract metadata
-        metadata = {
-            "dataset": "rexvqa",
-            "split": self.split,
-            "study_id": item.get("study_id", ""),
-            "study_instance_uid": item.get("StudyInstanceUid", ""),
-            "reasoning_type": item.get("task_name", ""),  # task_name maps to reasoning_type
-            "category": item.get("category", ""),
-            "class": item.get("class", ""),
-            "subcategory": item.get("subcategory", ""),
-            "patient_id": item.get("PatientID", ""),
-            "patient_age": item.get("PatientAge", ""),
-            "patient_sex": item.get("PatientSex", ""),
-            "study_date": item.get("StudyDate", ""),
-            "indication": item.get("Indication", ""),
-            "findings": item.get("Findings", ""),
-            "impression": item.get("Impression", ""),
-            "image_modality": item.get("ImageModality", []),
-            "image_view_position": item.get("ImageViewPosition", []),
-            "correct_answer_explanation": item.get("correct_answer_explanation", ""),
-        }
-        case_id = item.get("study_id", "")
-        category = item.get("task_name", "")
-        return BenchmarkDataPoint(
-            id=question_id,
-            text=question_with_options,
-            images=images,
-            correct_answer=correct_answer,
-            metadata=metadata,
-            case_id=case_id,
-            category=category,
-        )

 import json
 import os
 from typing import Dict, Optional, Any
 from .base import Benchmark, BenchmarkDataPoint
 from pathlib import Path
 import tarfile
 import zstandard as zstd
 from huggingface_hub import hf_hub_download, list_repo_files
+import os
+def get_hf_token():
+    """Get Hugging Face token from cache."""
+    token_path = os.path.expanduser("~/.cache/huggingface/token")
+    if os.path.exists(token_path):
+        with open(token_path, 'r') as f:
+            return f.read().strip()
+    return None
 class ReXVQABenchmark(Benchmark):
                 max_questions (int): Maximum number of questions to load (default: None, load all)
                 images_dir (str): Directory containing extracted PNG images (default: None)
         """
+        super().__init__(data_dir, **kwargs)
         self.split = kwargs.get("split", "test")
         self.images_dir = f"{data_dir}/images/deid_png"
+    def _load_data(self) -> None:
+        """Load ReXVQA data from HuggingFace."""
+        try:
+            # Download images and test_vqa_data.json locally if missing
+            self.download_test_vqa_data_json(self.data_dir)
+            self.download_rexgradient_images(self.data_dir, test_only=True)
+            # Load JSON file
+            json_file_path = os.path.join(self.data_dir, "metadata", "test_vqa_data.json")
+            if not os.path.exists(json_file_path):
+                raise FileNotFoundError(f"Could not find test_vqa_data.json in the expected location: {json_file_path}")
+            print(f"Loading ReXVQA {self.split} split from local JSON file: {json_file_path}")
+            with open(json_file_path, 'r', encoding='utf-8') as f:
+                questions_data = json.load(f)
+            # ReXVQA format: {question_id: {question_data}, ...}
+            questions_list = []
+            for question_id, question_data in questions_data.items():
+                # Add the question_id to the question_data for reference
+                question_data['id'] = question_id
+                questions_list.append(question_data)
+            print(f"Loaded {len(questions_list)} questions from local JSON file")
+            # Process questions
+            for i, item in enumerate(questions_list):
+                try:
+                    data_point = self._parse_rexvqa_item(item, i)
+                    if data_point:
+                        self.data_points.append(data_point)
+                except Exception as e:
+                    print(f"Error loading item {i}: {e}")
+                    continue
+        except Exception as e:
+            raise RuntimeError(f"Failed to load ReXVQA dataset: {e}")
+    def _parse_rexvqa_item(self, item: Dict[str, Any], index: int) -> Optional[BenchmarkDataPoint]:
+        """Parse a ReXVQA dataset item.
+        Args:
+            item (Dict[str, Any]): Dataset item from JSON file
+            index (int): Item index
+        Returns:
+            Optional[BenchmarkDataPoint]: Parsed data point
+        """
+        # Extract question ID
+        question_id = item.get("id", f"rexvqa_{self.split}_{index}")
+        # Extract question and options
+        question = item.get("question", "")
+        options = item.get("options", [])
+        question_with_options = question + "\n\nOptions:\n" + "\n".join(options)
+        # Extract correct answer
+        correct_answer = item.get("correct_answer", "")
+        # Extract images
+        images = None
+        if self.images_dir and "ImagePath" in item and item["ImagePath"]:
+            images = []
+            for rel_path in item["ImagePath"]:
+                norm_rel_path = rel_path.lstrip("./")
+                full_path = str(Path(self.images_dir).parent / norm_rel_path)
+                images.append(full_path)
+        # Extract metadata
+        metadata = {
+            "dataset": "rexvqa",
+            "split": self.split,
+            "study_id": item.get("study_id", ""),
+            "study_instance_uid": item.get("StudyInstanceUid", ""),
+            "reasoning_type": item.get("task_name", ""),  # task_name maps to reasoning_type
+            "category": item.get("category", ""),
+            "class": item.get("class", ""),
+            "subcategory": item.get("subcategory", ""),
+            "patient_id": item.get("PatientID", ""),
+            "patient_age": item.get("PatientAge", ""),
+            "patient_sex": item.get("PatientSex", ""),
+            "study_date": item.get("StudyDate", ""),
+            "indication": item.get("Indication", ""),
+            "findings": item.get("Findings", ""),
+            "impression": item.get("Impression", ""),
+            "image_modality": item.get("ImageModality", []),
+            "image_view_position": item.get("ImageViewPosition", []),
+            "correct_answer_explanation": item.get("correct_answer_explanation", ""),
+        }
+        # Return data point
+        return BenchmarkDataPoint(
+            id=question_id,
+            text=question_with_options,
+            images=images,
+            correct_answer=correct_answer,
+            metadata=metadata
+        )
     @staticmethod
     def download_rexgradient_images(output_dir: str = "benchmarking/data/rexvqa", repo_id: str = "rajpurkarlab/ReXGradient-160K", test_only: bool = True):
         print(f"Output directory: {output_dir}")
         try:
             print("Listing files in repository...")
+            files = list_repo_files(repo_id, repo_type='dataset', token=get_hf_token())
             part_files = [f for f in files if f.startswith("deid_png.part")]
             if not part_files:
                 print("No part files found. The images might be in a different format.")
                     filename=part_file,
                     local_dir=output_dir,
                     local_dir_use_symlinks=False,
+                    repo_type='dataset',
+                    token=get_hf_token()
                 )
             # Concatenate part files
             if not tar_path.exists():
                 filename="metadata/test_vqa_data.json",
                 local_dir=output_dir,
                 local_dir_use_symlinks=False,
+                repo_type='dataset',
+                token=get_hf_token()
             )
             print("Download complete.")
         except Exception as e:
             print(f"Error downloading test_vqa_data.json: {e}")
+            print("You may need to accept the license agreement on HuggingFace.")