Spaces:

samwell
/

medrax2

Paused

App Files Files Community

VictorLJZ commited on Jul 29, 2025

Commit

d6cb1b4

1 Parent(s): c90e4b6

updates

Browse files

Files changed (6) hide show

benchmarking/llm_providers/base.py +1 -1
benchmarking/llm_providers/google_provider.py +2 -4
benchmarking/llm_providers/medrax_provider.py +10 -39
benchmarking/llm_providers/openai_provider.py +2 -4
benchmarking/llm_providers/openrouter_provider.py +2 -4
benchmarking/runner.py +7 -7

benchmarking/llm_providers/base.py CHANGED Viewed

@@ -25,7 +25,7 @@ class LLMResponse:
     content: str
     usage: Optional[Dict[str, Any]] = None
     duration: Optional[float] = None
-    raw_response: Optional[Any] = None
 class LLMProvider(ABC):

     content: str
     usage: Optional[Dict[str, Any]] = None
     duration: Optional[float] = None
+    chunk_history: Optional[Any] = None
 class LLMProvider(ABC):

benchmarking/llm_providers/google_provider.py CHANGED Viewed

@@ -92,13 +92,11 @@ class GoogleProvider(LLMProvider):
             return LLMResponse(
                 content=content,
                 usage=usage,
-                duration=duration,
-                raw_response=response
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
-                duration=time.time() - start_time,
-                raw_response=None
             )

             return LLMResponse(
                 content=content,
                 usage=usage,
+                duration=duration
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
+                duration=time.time() - start_time
             )

benchmarking/llm_providers/medrax_provider.py CHANGED Viewed

@@ -35,21 +35,13 @@ class MedRAXProvider(LLMProvider):
             print("Starting server...")
             selected_tools = [
-                # "ImageVisualizerTool",  # For displaying images in the UI
-                # "DicomProcessorTool",  # For processing DICOM medical image files
-                # "ChestXRaySegmentationTool",  # For segmenting anatomical regions in chest X-rays
-                # "LlavaMedTool",  # For multimodal medical image understanding
-                # "ChestXRayGeneratorTool",  # For generating synthetic chest X-rays
-                # "PythonSandboxTool",  # Add the Python sandbox tool
-                # "ChestXRayReportGeneratorTool",  # For generating medical reports from X-rays
-                # "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
-                # "WebBrowserTool",  # For web browsing and search capabilities
-                # "XRayVQATool",  # For visual question answering on X-rays
                 "TorchXRayVisionClassifierTool",  # For classifying chest X-ray images using TorchXRayVision
-                # "ArcPlusClassifierTool",  # For advanced chest X-ray classification using ArcPlus
-                # "XRayPhraseGroundingTool",  # For locating described features in X-rays
             ]
             rag_config = RAGConfig(
@@ -106,8 +98,7 @@ class MedRAXProvider(LLMProvider):
         if self.agent is None:
             return LLMResponse(
                 content="Error: MedRAX agent not initialized",
-                duration=time.time() - start_time,
-                raw_response=None
             )
         try:
@@ -115,27 +106,12 @@ class MedRAXProvider(LLMProvider):
             messages = []
             thread_id = str(int(time.time() * 1000))  # Unique thread ID
-            # Copy images to session temp directory and provide paths
-            image_paths = []
             if request.images:
                 valid_images = self._validate_image_paths(request.images)
                 print(f"Processing {len(valid_images)} images")
                 for i, image_path in enumerate(valid_images):
-                    print(f"Original image path: {image_path}")
-                    # Copy image to session temp directory
-                    dest_path = Path("temp") / f"image_{i}_{Path(image_path).name}"
-                    print(f"Destination path: {dest_path}")
-                    shutil.copy2(image_path, dest_path)
-                    image_paths.append(str(dest_path))
-                    # Verify file exists after copy
-                    if not dest_path.exists():
-                        print(f"ERROR: File not found after copy: {dest_path}")
-                    else:
-                        print(f"File successfully copied: {dest_path}")
                     # Add image path message for tools
-                    messages.append(HumanMessage(content=f"image_path: {dest_path}"))
                     # Add image content for multimodal LLM
                     with open(image_path, "rb") as img_file:
@@ -214,16 +190,11 @@ class MedRAXProvider(LLMProvider):
                 content=response_content,
                 usage={"agent_tools": list(self.tools_dict.keys())},
                 duration=duration,
-                raw_response={
-                    "thread_id": thread_id,
-                    "image_paths": image_paths,
-                    "chunk_history": chunk_history,
-                }
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
-                duration=time.time() - start_time,
-                raw_response=None
             )

             print("Starting server...")
             selected_tools = [
+                "ChestXRayReportGeneratorTool",  # For generating medical reports from X-rays
+                "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
+                "WebBrowserTool",  # For web browsing and search capabilities
                 "TorchXRayVisionClassifierTool",  # For classifying chest X-ray images using TorchXRayVision
+                "XRayVQATool",  # For visual question answering on X-rays
+                "ArcPlusClassifierTool",  # For advanced chest X-ray classification using ArcPlus
+                "XRayPhraseGroundingTool",  # For locating described features in X-rays
             ]
             rag_config = RAGConfig(
         if self.agent is None:
             return LLMResponse(
                 content="Error: MedRAX agent not initialized",
+                duration=time.time() - start_time
             )
         try:
             messages = []
             thread_id = str(int(time.time() * 1000))  # Unique thread ID
             if request.images:
                 valid_images = self._validate_image_paths(request.images)
                 print(f"Processing {len(valid_images)} images")
                 for i, image_path in enumerate(valid_images):
                     # Add image path message for tools
+                    messages.append(HumanMessage(content=f"image_path: {image_path}"))
                     # Add image content for multimodal LLM
                     with open(image_path, "rb") as img_file:
                 content=response_content,
                 usage={"agent_tools": list(self.tools_dict.keys())},
                 duration=duration,
+                chunk_history=chunk_history
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
+                duration=time.time() - start_time
             )

benchmarking/llm_providers/openai_provider.py CHANGED Viewed

@@ -101,13 +101,11 @@ class OpenAIProvider(LLMProvider):
             return LLMResponse(
                 content=content,
                 usage=usage,
-                duration=duration,
-                raw_response=response
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
-                duration=time.time() - start_time,
-                raw_response=None
             )

             return LLMResponse(
                 content=content,
                 usage=usage,
+                duration=duration
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
+                duration=time.time() - start_time
             )

benchmarking/llm_providers/openrouter_provider.py CHANGED Viewed

@@ -78,12 +78,10 @@ class OpenRouterProvider(LLMProvider):
             return LLMResponse(
                 content=content,
                 usage=usage,
-                duration=duration,
-                raw_response=response
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
-                duration=time.time() - start_time,
-                raw_response=None
             )

             return LLMResponse(
                 content=content,
                 usage=usage,
+                duration=duration
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
+                duration=time.time() - start_time
             )

benchmarking/runner.py CHANGED Viewed

@@ -24,7 +24,7 @@ class BenchmarkResult:
     duration: float
     usage: Optional[Dict[str, Any]] = None
     error: Optional[str] = None
-    raw_response: Optional[Dict[str, Any]] = None
     metadata: Optional[Dict[str, Any]] = None
@@ -226,7 +226,7 @@ class BenchmarkRunner:
                 is_correct=is_correct,
                 duration=duration,
                 usage=response.usage,
-                raw_response=response.raw_response,
                 metadata={
                     "data_point_metadata": data_point.metadata,
                     "case_id": data_point.case_id,
@@ -245,7 +245,7 @@ class BenchmarkRunner:
                 is_correct=False,
                 duration=duration,
                 error=str(e),
-                raw_response=None,
                 metadata={
                     "data_point_metadata": data_point.metadata,
                     "case_id": data_point.case_id,
@@ -318,6 +318,8 @@ class BenchmarkRunner:
         # Convert result to serializable format
         result_data = {
             "data_point_id": result.data_point_id,
             "question": result.question,
             "model_answer": result.model_answer,
@@ -326,10 +328,8 @@ class BenchmarkRunner:
             "duration": result.duration,
             "usage": result.usage,
             "error": result.error,
-            "raw_response": result.raw_response,
-            "metadata": result.metadata,
-            "timestamp": datetime.now().isoformat(),
-            "run_id": self.run_id,
         }
         # Save to file

     duration: float
     usage: Optional[Dict[str, Any]] = None
     error: Optional[str] = None
+    chunk_history: Optional[Dict[str, Any]] = None
     metadata: Optional[Dict[str, Any]] = None
                 is_correct=is_correct,
                 duration=duration,
                 usage=response.usage,
+                chunk_history=response.chunk_history,
                 metadata={
                     "data_point_metadata": data_point.metadata,
                     "case_id": data_point.case_id,
                 is_correct=False,
                 duration=duration,
                 error=str(e),
+                chunk_history=None,
                 metadata={
                     "data_point_metadata": data_point.metadata,
                     "case_id": data_point.case_id,
         # Convert result to serializable format
         result_data = {
+            "timestamp": datetime.now().isoformat(),
+            "run_id": self.run_id,
             "data_point_id": result.data_point_id,
             "question": result.question,
             "model_answer": result.model_answer,
             "duration": result.duration,
             "usage": result.usage,
             "error": result.error,
+            "chunk_history": result.chunk_history,
+            "metadata": result.metadata
         }
         # Save to file