Spaces:

samwell
/

medrax2

Sleeping

App Files Files Community

VictorLJZ commited on Aug 1, 2025

Commit

044eaf7

1 Parent(s): f06bcdb

final updates

Browse files

Files changed (7) hide show

benchmarking/benchmarks/base.py +15 -0
benchmarking/benchmarks/chestagentbench_benchmark.py +0 -5
benchmarking/cli.py +4 -0
interface.py +10 -5
medrax/tools/__init__.py +1 -1
medrax/tools/classification/arcplus.py +2 -1
medrax/tools/segmentation/medsam2.py +2 -2

benchmarking/benchmarks/base.py CHANGED Viewed

@@ -4,6 +4,7 @@ from abc import ABC, abstractmethod
 from typing import Dict, List, Optional, Any, Iterator, Tuple
 from dataclasses import dataclass
 from pathlib import Path
 @dataclass
@@ -31,17 +32,31 @@ class Benchmark(ABC):
         Args:
             data_dir (str): Directory containing benchmark data
             **kwargs: Additional configuration parameters
         """
         self.data_dir = Path(data_dir)
         self.config = kwargs
         self.data_points = []
         self._load_data()
     @abstractmethod
     def _load_data(self) -> None:
         """Load benchmark data from the data directory."""
         pass
     def get_data_point(self, index: int) -> BenchmarkDataPoint:
         """Get a specific data point by index.

 from typing import Dict, List, Optional, Any, Iterator, Tuple
 from dataclasses import dataclass
 from pathlib import Path
+import random
 @dataclass
         Args:
             data_dir (str): Directory containing benchmark data
             **kwargs: Additional configuration parameters
+                random_seed (int): Random seed for shuffling data (default: None, no shuffling)
         """
         self.data_dir = Path(data_dir)
         self.config = kwargs
         self.data_points = []
         self._load_data()
+        self._shuffle_data()
     @abstractmethod
     def _load_data(self) -> None:
         """Load benchmark data from the data directory."""
         pass
+    def _shuffle_data(self) -> None:
+        """Shuffle the data points if a random seed is provided.
+        This method is called automatically after data loading to ensure
+        reproducible benchmark runs when a random seed is specified.
+        """
+        random_seed = self.config.get("random_seed", None)
+        if random_seed is not None:
+            random.seed(random_seed)
+            random.shuffle(self.data_points)
+            print(f"Shuffled {len(self.data_points)} data points with seed {random_seed}")
     def get_data_point(self, index: int) -> BenchmarkDataPoint:
         """Get a specific data point by index.

benchmarking/benchmarks/chestagentbench_benchmark.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import json
-import random
 from pathlib import Path
 from typing import Dict, Optional, Any
 from .base import Benchmark, BenchmarkDataPoint
@@ -31,10 +30,6 @@ class ChestAgentBenchBenchmark(Benchmark):
                 except Exception as e:
                     print(f"Error loading item {i}: {e}")
                     continue
-        # Shuffle the final data
-        random.seed(42)
-        random.shuffle(self.data_points)
     def _parse_item(self, item: Dict[str, Any], index: int) -> Optional[BenchmarkDataPoint]:
         # Use full_question_id or question_id if available, else fallback

 import json
 from pathlib import Path
 from typing import Dict, Optional, Any
 from .base import Benchmark, BenchmarkDataPoint
                 except Exception as e:
                     print(f"Error loading item {i}: {e}")
                     continue
     def _parse_item(self, item: Dict[str, Any], index: int) -> Optional[BenchmarkDataPoint]:
         # Use full_question_id or question_id if available, else fallback

benchmarking/cli.py CHANGED Viewed

@@ -73,6 +73,8 @@ def run_benchmark_command(args) -> None:
     # Create benchmark
     benchmark_kwargs = {}
     benchmark = create_benchmark(benchmark_name=args.benchmark, data_dir=args.data_dir, **benchmark_kwargs)
@@ -141,6 +143,8 @@ def main():
                            help="Top-p nucleus sampling parameter (default: 0.95)")
     run_parser.add_argument("--max-tokens", type=int, default=5000,
                            help="Maximum tokens per model response (default: 5000)")
     run_parser.set_defaults(func=run_benchmark_command)

     # Create benchmark
     benchmark_kwargs = {}
+    if args.random_seed is not None:
+        benchmark_kwargs["random_seed"] = args.random_seed
     benchmark = create_benchmark(benchmark_name=args.benchmark, data_dir=args.data_dir, **benchmark_kwargs)
                            help="Top-p nucleus sampling parameter (default: 0.95)")
     run_parser.add_argument("--max-tokens", type=int, default=5000,
                            help="Maximum tokens per model response (default: 5000)")
+    run_parser.add_argument("--random-seed", type=int, default=42,
+                           help="Random seed for shuffling benchmark data (enables reproducible runs, default: None)")
     run_parser.set_defaults(func=run_benchmark_command)

interface.py CHANGED Viewed

@@ -192,7 +192,11 @@ class ChatInterface:
                                 tool_args = pending_call["args"]
                                 try:
-                                    tool_output_json = json.loads(msg.content)
                                     tool_output_str = json.dumps(tool_output_json, indent=2)
                                 except (json.JSONDecodeError, TypeError):
                                     tool_output_str = str(msg.content)
@@ -217,10 +221,11 @@ class ChatInterface:
                                 if tool_name == "image_visualizer":
                                     try:
-                                        result = json.loads(msg.content)
-                                        # Handle case where tool returns array [output, metadata]
-                                        if isinstance(result, list) and len(result) > 0:
-                                            result = result[0]  # Take the first element (output)
                                         if isinstance(result, dict) and "image_path" in result:
                                             self.display_file_path = result["image_path"]
                                             chat_history.append(

                                 tool_args = pending_call["args"]
                                 try:
+                                    # Handle case where tool returns tuple (output, metadata)
+                                    content = msg.content
+                                    content_tuple = ast.literal_eval(content)
+                                    content = json.dumps(content_tuple[0])
+                                    tool_output_json = json.loads(content)
                                     tool_output_str = json.dumps(tool_output_json, indent=2)
                                 except (json.JSONDecodeError, TypeError):
                                     tool_output_str = str(msg.content)
                                 if tool_name == "image_visualizer":
                                     try:
+                                        # Handle case where tool returns tuple (output, metadata)
+                                        content = msg.content
+                                        content_tuple = ast.literal_eval(content)
+                                        result = content_tuple[0]
                                         if isinstance(result, dict) and "image_path" in result:
                                             self.display_file_path = result["image_path"]
                                             chat_history.append(

medrax/tools/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from .report_generation import *
 from .segmentation import *
 from .vqa import *
 from .grounding import *
-from .generation import *
 from .dicom import *
 from .utils import *
 from .rag import *

 from .segmentation import *
 from .vqa import *
 from .grounding import *
+from .xray_generation import *
 from .dicom import *
 from .utils import *
 from .rag import *

medrax/tools/classification/arcplus.py CHANGED Viewed

@@ -345,7 +345,8 @@ class ArcPlusClassifierTool(BaseTool):
                     predictions = predictions[: len(self.disease_list)]
             # Create output dictionary mapping disease names to probabilities
-            output = dict(zip(self.disease_list, predictions.astype(float)))
             metadata = {
                 "image_path": image_path,

                     predictions = predictions[: len(self.disease_list)]
             # Create output dictionary mapping disease names to probabilities
+            # Convert numpy floats to native Python floats for proper serialization
+            output = dict(zip(self.disease_list, [float(pred) for pred in predictions]))
             metadata = {
                 "image_path": image_path,

medrax/tools/segmentation/medsam2.py CHANGED Viewed

@@ -15,7 +15,7 @@ from langchain_core.callbacks import (
 from langchain_core.tools import BaseTool
 # Add MedSAM2 to Python path for proper module resolution
-medsam2_path = str(Path(__file__).parent.parent.parent / "MedSAM2")
 if medsam2_path not in sys.path:
     sys.path.append(medsam2_path)
@@ -93,7 +93,7 @@ class MedSAM2Tool(BaseTool):
             if GlobalHydra.instance().is_initialized():
                 GlobalHydra.instance().clear()
-            config_dir = Path(__file__).parent.parent.parent / "MedSAM2" / "sam2" / "configs"
             initialize_config_dir(config_dir=str(config_dir), version_base="1.2")
             hf_hub_download(

 from langchain_core.tools import BaseTool
 # Add MedSAM2 to Python path for proper module resolution
+medsam2_path = str(Path(__file__).parent.parent.parent.parent / "MedSAM2")
 if medsam2_path not in sys.path:
     sys.path.append(medsam2_path)
             if GlobalHydra.instance().is_initialized():
                 GlobalHydra.instance().clear()
+            config_dir = Path(__file__).parent.parent.parent.parent / "MedSAM2" / "sam2" / "configs"
             initialize_config_dir(config_dir=str(config_dir), version_base="1.2")
             hf_hub_download(