Spaces:

samwell
/

medrax2

Paused

App Files Files Community

Adibvafa commited on Jul 25, 2025

Commit

a7d0aad

1 Parent(s): 5084d75

Fix prompt load

Browse files

Files changed (4) hide show

benchmarking/cli.py +47 -27
benchmarking/llm_providers/base.py +6 -4
benchmarking/llm_providers/medrax_provider.py +5 -3
main.py +4 -1

benchmarking/cli.py CHANGED Viewed

@@ -3,34 +3,40 @@
 import argparse
 import sys
-from .llm_providers import *
 from .benchmarks import *
 from .runner import BenchmarkRunner, BenchmarkRunConfig
-def create_llm_provider(model_name: str, provider_type: str, **kwargs) -> LLMProvider:
     """Create an LLM provider based on the model name and type.
     Args:
         model_name (str): Name of the model
-        provider_type (str): Type of provider (openai, google, openrouter, anthropic, medrax)
         **kwargs: Additional configuration parameters
     Returns:
         LLMProvider: The configured LLM provider
     """
-    provider_map = {
-        "openai": OpenAIProvider,
-        "google": GoogleProvider,
-        "openrouter": OpenRouterProvider,
-        "medrax": MedRAXProvider,
-    }
-    if provider_type not in provider_map:
-        raise ValueError(f"Unknown provider type: {provider_type}. Available: {list(provider_map.keys())}")
-    provider_class = provider_map[provider_type]
-    return provider_class(model_name, **kwargs)
 def create_benchmark(benchmark_name: str, data_dir: str, **kwargs) -> Benchmark:
@@ -63,12 +69,12 @@ def run_benchmark_command(args) -> None:
     # Create LLM provider
     provider_kwargs = {}
-    llm_provider = create_llm_provider(args.model, args.provider, **provider_kwargs)
     # Create benchmark
     benchmark_kwargs = {}
-    benchmark = create_benchmark(args.benchmark, args.data_dir, **benchmark_kwargs)
     # Create runner config
     config = BenchmarkRunConfig(
@@ -111,16 +117,30 @@ def main():
     subparsers = parser.add_subparsers(dest="command", help="Available commands")
     # Run benchmark command
-    run_parser = subparsers.add_parser("run", help="Run a benchmark")
-    run_parser.add_argument("--model", required=True, help="Model name (e.g., gpt-4o, gemini-2.5-pro)")
-    run_parser.add_argument("--provider", required=True, choices=["openai", "google", "openrouter", "medrax"], help="LLM provider")
-    run_parser.add_argument("--benchmark", required=True, choices=["rexvqa", "chestagentbench"], help="Benchmark to run")
-    run_parser.add_argument("--data-dir", required=True, help="Directory containing benchmark data")
-    run_parser.add_argument("--output-dir", default="benchmark_results", help="Output directory for results")
-    run_parser.add_argument("--max-questions", type=int, help="Maximum number of questions to process")
-    run_parser.add_argument("--temperature", type=float, default=0.7, help="Model temperature")
-    run_parser.add_argument("--top-p", type=float, default=0.95, help="Top-p value")
-    run_parser.add_argument("--max-tokens", type=int, default=1000, help="Maximum tokens per response")
     run_parser.set_defaults(func=run_benchmark_command)

 import argparse
 import sys
+from .llm_providers.base import LLMProvider
 from .benchmarks import *
 from .runner import BenchmarkRunner, BenchmarkRunConfig
+def create_llm_provider(model_name: str, provider_type: str, system_prompt: str, **kwargs) -> LLMProvider:
     """Create an LLM provider based on the model name and type.
     Args:
         model_name (str): Name of the model
+        provider_type (str): Type of provider (openai, google, openrouter, medrax)
+        system_prompt (str): System prompt identifier to load from file
         **kwargs: Additional configuration parameters
     Returns:
         LLMProvider: The configured LLM provider
     """
+    # Lazy imports to avoid slow startup
+    if provider_type == "openai":
+        from .llm_providers.openai_provider import OpenAIProvider
+        provider_class = OpenAIProvider
+    elif provider_type == "google":
+        from .llm_providers.google_provider import GoogleProvider
+        provider_class = GoogleProvider
+    elif provider_type == "openrouter":
+        from .llm_providers.openrouter_provider import OpenRouterProvider
+        provider_class = OpenRouterProvider
+    elif provider_type == "medrax":
+        from .llm_providers.medrax_provider import MedRAXProvider
+        provider_class = MedRAXProvider
+    else:
+        raise ValueError(f"Unknown provider type: {provider_type}. Available: openai, google, openrouter, medrax")
+    return provider_class(model_name, system_prompt, **kwargs)
 def create_benchmark(benchmark_name: str, data_dir: str, **kwargs) -> Benchmark:
     # Create LLM provider
     provider_kwargs = {}
+    llm_provider = create_llm_provider(model_name=args.model, provider_type=args.provider, system_prompt=args.system_prompt, **provider_kwargs)
     # Create benchmark
     benchmark_kwargs = {}
+    benchmark = create_benchmark(benchmark_name=args.benchmark, data_dir=args.data_dir, **benchmark_kwargs)
     # Create runner config
     config = BenchmarkRunConfig(
     subparsers = parser.add_subparsers(dest="command", help="Available commands")
     # Run benchmark command
+    run_parser = subparsers.add_parser("run", help="Run a benchmark evaluation")
+    run_parser.add_argument("--model", required=True,
+                           help="Model name (e.g., gpt-4o, gpt-4.1-2025-04-14, gemini-2.5-pro)")
+    run_parser.add_argument("--provider", required=True,
+                           choices=["openai", "google", "openrouter", "medrax"],
+                           help="LLM provider to use")
+    run_parser.add_argument("--system-prompt", required=True,
+                           choices=["MEDICAL_ASSISTANT", "CHESTAGENTBENCH_PROMPT"],
+                           help="System prompt: MEDICAL_ASSISTANT (general) or CHESTAGENTBENCH_PROMPT (benchmarks)")
+    run_parser.add_argument("--benchmark", required=True,
+                           choices=["rexvqa", "chestagentbench"],
+                           help="Benchmark dataset: rexvqa (radiology VQA) or chestagentbench (chest X-ray reasoning)")
+    run_parser.add_argument("--data-dir", required=True,
+                           help="Directory containing benchmark data files")
+    run_parser.add_argument("--output-dir", default="benchmark_results",
+                           help="Output directory for results (default: benchmark_results)")
+    run_parser.add_argument("--max-questions", type=int,
+                           help="Maximum number of questions to process (default: all)")
+    run_parser.add_argument("--temperature", type=float, default=0.7,
+                           help="Model temperature for response generation (default: 0.7)")
+    run_parser.add_argument("--top-p", type=float, default=0.95,
+                           help="Top-p nucleus sampling parameter (default: 0.95)")
+    run_parser.add_argument("--max-tokens", type=int, default=1000,
+                           help="Maximum tokens per model response (default: 1000)")
     run_parser.set_defaults(func=run_benchmark_command)

benchmarking/llm_providers/base.py CHANGED Viewed

@@ -35,22 +35,24 @@ class LLMProvider(ABC):
     text + image input -> text output across different models and APIs.
     """
-    def __init__(self, model_name: str, **kwargs):
         """Initialize the LLM provider.
         Args:
             model_name (str): Name of the model to use
             **kwargs: Additional configuration parameters
         """
         self.model_name = model_name
         self.config = kwargs
-        # Always load system prompt from file
         try:
             prompts = load_prompts_from_file("medrax/docs/system_prompts.txt")
-            self.system_prompt = prompts.get("CHESTAGENTBENCH_PROMPT", None)
             if self.system_prompt is None:
-                print(f"Warning: System prompt not found in medrax/docs/system_prompts.txt.")
         except Exception as e:
             print(f"Error loading system prompt: {e}")
             self.system_prompt = None

     text + image input -> text output across different models and APIs.
     """
+    def __init__(self, model_name: str, system_prompt: str, **kwargs):
         """Initialize the LLM provider.
         Args:
             model_name (str): Name of the model to use
+            system_prompt (str): System prompt identifier to load from file
             **kwargs: Additional configuration parameters
         """
         self.model_name = model_name
         self.config = kwargs
+        self.prompt_name = system_prompt  # Store the original prompt identifier
+        # Load system prompt content from file
         try:
             prompts = load_prompts_from_file("medrax/docs/system_prompts.txt")
+            self.system_prompt = prompts.get(system_prompt, None)
             if self.system_prompt is None:
+                print(f"Warning: System prompt '{system_prompt}' not found in medrax/docs/system_prompts.txt.")
         except Exception as e:
             print(f"Error loading system prompt: {e}")
             self.system_prompt = None

benchmarking/llm_providers/medrax_provider.py CHANGED Viewed

@@ -13,18 +13,19 @@ from main import initialize_agent
 class MedRAXProvider(LLMProvider):
     """MedRAX LLM provider that uses the full MedRAX agent system."""
-    def __init__(self, model_name: str, **kwargs):
         """Initialize MedRAX provider.
         Args:
             model_name (str): Base LLM model name (e.g., "gpt-4.1-2025-04-14")
             **kwargs: Additional configuration parameters
         """
         self.model_name = model_name
         self.agent = None
         self.tools_dict = None
-        super().__init__(model_name, **kwargs)
     def _setup(self) -> None:
         """Set up MedRAX agent system."""
@@ -75,6 +76,7 @@ class MedRAXProvider(LLMProvider):
                 top_p=0.95,
                 model_kwargs=model_kwargs,
                 rag_config=rag_config,
                 debug=True,
             )

 class MedRAXProvider(LLMProvider):
     """MedRAX LLM provider that uses the full MedRAX agent system."""
+    def __init__(self, model_name: str, system_prompt: str, **kwargs):
         """Initialize MedRAX provider.
         Args:
             model_name (str): Base LLM model name (e.g., "gpt-4.1-2025-04-14")
+            system_prompt (str): System prompt to use
             **kwargs: Additional configuration parameters
         """
         self.model_name = model_name
         self.agent = None
         self.tools_dict = None
+        super().__init__(model_name, system_prompt, **kwargs)
     def _setup(self) -> None:
         """Set up MedRAX agent system."""
                 top_p=0.95,
                 model_kwargs=model_kwargs,
                 rag_config=rag_config,
+                system_prompt=self.prompt_name,
                 debug=True,
             )

main.py CHANGED Viewed

@@ -41,6 +41,7 @@ def initialize_agent(
     top_p: float = 0.95,
     rag_config: Optional[RAGConfig] = None,
     model_kwargs: Dict[str, Any] = {},
     debug: bool = False,
 ):
     """Initialize the MedRAX agent with specified tools and configuration.
@@ -56,6 +57,7 @@ def initialize_agent(
         top_p (float, optional): Top P for the model. Defaults to 0.95.
         rag_config (RAGConfig, optional): Configuration for the RAG tool. Defaults to None.
         model_kwargs (dict, optional): Additional keyword arguments for model.
         debug (bool, optional): Whether to enable debug mode. Defaults to False.
     Returns:
@@ -63,7 +65,7 @@ def initialize_agent(
     """
     # Load system prompts from file
     prompts = load_prompts_from_file(prompt_file)
-    prompt = prompts["MEDICAL_ASSISTANT"]
     all_tools = {
         "TorchXRayVisionClassifierTool": lambda: TorchXRayVisionClassifierTool(device=device),
@@ -186,6 +188,7 @@ if __name__ == "__main__":
         model_kwargs=model_kwargs,
         rag_config=rag_config,
         debug=True,
     )
     # Create and launch the web interface

     top_p: float = 0.95,
     rag_config: Optional[RAGConfig] = None,
     model_kwargs: Dict[str, Any] = {},
+    system_prompt: str = "MEDICAL_ASSISTANT",
     debug: bool = False,
 ):
     """Initialize the MedRAX agent with specified tools and configuration.
         top_p (float, optional): Top P for the model. Defaults to 0.95.
         rag_config (RAGConfig, optional): Configuration for the RAG tool. Defaults to None.
         model_kwargs (dict, optional): Additional keyword arguments for model.
+        system_prompt (str, optional): System prompt to use. Defaults to "MEDICAL_ASSISTANT".
         debug (bool, optional): Whether to enable debug mode. Defaults to False.
     Returns:
     """
     # Load system prompts from file
     prompts = load_prompts_from_file(prompt_file)
+    prompt = prompts[system_prompt]
     all_tools = {
         "TorchXRayVisionClassifierTool": lambda: TorchXRayVisionClassifierTool(device=device),
         model_kwargs=model_kwargs,
         rag_config=rag_config,
         debug=True,
+        system_prompt="MEDICAL_ASSISTANT",
     )
     # Create and launch the web interface