VictorLJZ commited on
Commit
719e93e
·
2 Parent(s): 05f20bf 3ba600b
benchmarking/llm_providers/medrax_provider.py CHANGED
@@ -35,28 +35,19 @@ class MedRAXProvider(LLMProvider):
35
  print("Starting server...")
36
 
37
  selected_tools = [
38
- # To be tested
39
- # "DicomProcessorTool", # For processing DICOM medical image files
40
- # "TorchXRayVisionClassifierTool", # For classifying chest X-ray images using TorchXRayVision
41
- "ChestXRaySegmentationTool", # For segmenting anatomical regions in chest X-rays
42
- # "WebBrowserTool", # For web browsing and search capabilities
43
-
44
- # These tools are working
45
- # "MedicalRAGTool", # For retrieval-augmented generation with medical knowledge
46
- # "ChestXRayReportGeneratorTool", # For generating medical reports from X-rays
47
- # "XRayVQATool", # For visual question answering on X-rays
48
-
49
-
50
- # Couldn't test these tools
51
  # "ImageVisualizerTool", # For displaying images in the UI
52
- # "PythonSandboxTool", # Add the Python sandbox tool
 
 
 
 
 
53
  # "LlavaMedTool", # For multimodal medical image understanding
 
54
  # "ChestXRayGeneratorTool", # For generating synthetic chest X-rays
55
- # "ArcPlusClassifierTool", # For advanced chest X-ray classification using ArcPlus
56
-
57
-
58
- # Something fishy is going on here
59
- # "XRayPhraseGroundingTool", # For locating described features in X-rays
60
  ]
61
 
62
  rag_config = RAGConfig(
@@ -67,7 +58,7 @@ class MedRAXProvider(LLMProvider):
67
  pinecone_index_name="medrax2", # Name for the Pinecone index
68
  chunk_size=1500,
69
  chunk_overlap=300,
70
- retriever_k=7,
71
  local_docs_dir="rag_docs", # Change this to the path of the documents for RAG
72
  huggingface_datasets=["VictorLJZ/medrax2"], # List of HuggingFace datasets to load
73
  dataset_split="train", # Which split of the datasets to use
@@ -79,11 +70,11 @@ class MedRAXProvider(LLMProvider):
79
  agent, tools_dict = initialize_agent(
80
  prompt_file="medrax/docs/system_prompts.txt",
81
  tools_to_use=selected_tools,
82
- model_dir="model-weights",
83
  temp_dir="temp", # Change this to the path of the temporary directory
84
- device="cpu",
85
  model=self.model_name, # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
86
- temperature=0.7,
87
  top_p=0.95,
88
  model_kwargs=model_kwargs,
89
  rag_config=rag_config,
 
35
  print("Starting server...")
36
 
37
  selected_tools = [
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # "ImageVisualizerTool", # For displaying images in the UI
39
+ # "DicomProcessorTool", # For processing DICOM medical image files
40
+ "TorchXRayVisionClassifierTool", # For classifying chest X-ray images using TorchXRayVision
41
+ "ArcPlusClassifierTool", # For advanced chest X-ray classification using ArcPlus
42
+ # "ChestXRaySegmentationTool", # For segmenting anatomical regions in chest X-rays
43
+ "ChestXRayReportGeneratorTool", # For generating medical reports from X-rays
44
+ "XRayVQATool", # For visual question answering on X-rays
45
  # "LlavaMedTool", # For multimodal medical image understanding
46
+ "XRayPhraseGroundingTool", # For locating described features in X-rays
47
  # "ChestXRayGeneratorTool", # For generating synthetic chest X-rays
48
+ "WebBrowserTool", # For web browsing and search capabilities
49
+ "MedicalRAGTool", # For retrieval-augmented generation with medical knowledge
50
+ # "PythonSandboxTool", # Add the Python sandbox tool
 
 
51
  ]
52
 
53
  rag_config = RAGConfig(
 
58
  pinecone_index_name="medrax2", # Name for the Pinecone index
59
  chunk_size=1500,
60
  chunk_overlap=300,
61
+ retriever_k=3,
62
  local_docs_dir="rag_docs", # Change this to the path of the documents for RAG
63
  huggingface_datasets=["VictorLJZ/medrax2"], # List of HuggingFace datasets to load
64
  dataset_split="train", # Which split of the datasets to use
 
70
  agent, tools_dict = initialize_agent(
71
  prompt_file="medrax/docs/system_prompts.txt",
72
  tools_to_use=selected_tools,
73
+ model_dir="/model-weights",
74
  temp_dir="temp", # Change this to the path of the temporary directory
75
+ device="cuda:0",
76
  model=self.model_name, # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
77
+ temperature=0.3,
78
  top_p=0.95,
79
  model_kwargs=model_kwargs,
80
  rag_config=rag_config,
benchmarking/runner.py CHANGED
@@ -262,9 +262,15 @@ class BenchmarkRunner:
262
  Returns:
263
  str: The extracted answer
264
  """
265
- # First, look for the '<|A|>' format
266
- final_answer_pattern = r'\s*<\|([A-F])\|>'
267
- match = re.search(final_answer_pattern, response_text)
 
 
 
 
 
 
268
  if match:
269
  return match.group(1).upper()
270
 
 
262
  Returns:
263
  str: The extracted answer
264
  """
265
+ # Look for the '\boxed{A}' format
266
+ boxed_pattern = r'\\boxed\{([A-Fa-f])\}'
267
+ match = re.search(boxed_pattern, response_text)
268
+ if match:
269
+ return match.group(1).upper()
270
+
271
+ # Fallback: look for the '<|A|>' format (legacy code, will remove later on)
272
+ legacy_pattern = r'\s*<\|([A-F])\|>'
273
+ match = re.search(legacy_pattern, response_text)
274
  if match:
275
  return match.group(1).upper()
276
 
main.py CHANGED
@@ -143,15 +143,15 @@ if __name__ == "__main__":
143
  selected_tools = [
144
  "ImageVisualizerTool", # For displaying images in the UI
145
  # "DicomProcessorTool", # For processing DICOM medical image files
146
- # "TorchXRayVisionClassifierTool", # For classifying chest X-ray images using TorchXRayVision
147
- # "ArcPlusClassifierTool", # For advanced chest X-ray classification using ArcPlus
148
- # "ChestXRaySegmentationTool", # For segmenting anatomical regions in chest X-rays
149
- # "ChestXRayReportGeneratorTool", # For generating medical reports from X-rays
150
- # "XRayVQATool", # For visual question answering on X-rays
151
  # "LlavaMedTool", # For multimodal medical image understanding
152
- # "XRayPhraseGroundingTool", # For locating described features in X-rays
153
  # "ChestXRayGeneratorTool", # For generating synthetic chest X-rays
154
- "MedSAM2Tool", # For advanced medical image segmentation using MedSAM2
155
  "WebBrowserTool", # For web browsing and search capabilities
156
  "MedicalRAGTool", # For retrieval-augmented generation with medical knowledge
157
  # "PythonSandboxTool", # Add the Python sandbox tool
@@ -167,7 +167,7 @@ if __name__ == "__main__":
167
  pinecone_index_name="medrax2", # Name for the Pinecone index
168
  chunk_size=1500,
169
  chunk_overlap=300,
170
- retriever_k=7,
171
  local_docs_dir="rag_docs", # Change this to the path of the documents for RAG
172
  huggingface_datasets=["VictorLJZ/medrax2"], # List of HuggingFace datasets to load
173
  dataset_split="train", # Which split of the datasets to use
@@ -179,10 +179,10 @@ if __name__ == "__main__":
179
  agent, tools_dict = initialize_agent(
180
  prompt_file="medrax/docs/system_prompts.txt",
181
  tools_to_use=selected_tools,
182
- model_dir="model-weights",
183
  temp_dir="temp", # Change this to the path of the temporary directory
184
- device="cuda",
185
- model="grok-4", # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
186
  temperature=0.7,
187
  top_p=0.95,
188
  model_kwargs=model_kwargs,
 
143
  selected_tools = [
144
  "ImageVisualizerTool", # For displaying images in the UI
145
  # "DicomProcessorTool", # For processing DICOM medical image files
146
+ "TorchXRayVisionClassifierTool", # For classifying chest X-ray images using TorchXRayVision
147
+ "ArcPlusClassifierTool", # For advanced chest X-ray classification using ArcPlus
148
+ "ChestXRaySegmentationTool", # For segmenting anatomical regions in chest X-rays
149
+ "ChestXRayReportGeneratorTool", # For generating medical reports from X-rays
150
+ "XRayVQATool", # For visual question answering on X-rays
151
  # "LlavaMedTool", # For multimodal medical image understanding
152
+ "XRayPhraseGroundingTool", # For locating described features in X-rays
153
  # "ChestXRayGeneratorTool", # For generating synthetic chest X-rays
154
+ # "MedSAM2Tool", # For advanced medical image segmentation using MedSAM2
155
  "WebBrowserTool", # For web browsing and search capabilities
156
  "MedicalRAGTool", # For retrieval-augmented generation with medical knowledge
157
  # "PythonSandboxTool", # Add the Python sandbox tool
 
167
  pinecone_index_name="medrax2", # Name for the Pinecone index
168
  chunk_size=1500,
169
  chunk_overlap=300,
170
+ retriever_k=3,
171
  local_docs_dir="rag_docs", # Change this to the path of the documents for RAG
172
  huggingface_datasets=["VictorLJZ/medrax2"], # List of HuggingFace datasets to load
173
  dataset_split="train", # Which split of the datasets to use
 
179
  agent, tools_dict = initialize_agent(
180
  prompt_file="medrax/docs/system_prompts.txt",
181
  tools_to_use=selected_tools,
182
+ model_dir="/model-weights",
183
  temp_dir="temp", # Change this to the path of the temporary directory
184
+ device="cuda:0",
185
+ model="gpt-4.1-2025-04-14", # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
186
  temperature=0.7,
187
  top_p=0.95,
188
  model_kwargs=model_kwargs,
medrax/docs/system_prompts.txt CHANGED
@@ -22,5 +22,5 @@ Solve using your own vision and reasoning and use tools (if available) to comple
22
  You can make multiple tool calls in parallel or in sequence as needed for comprehensive answers.
23
  Think critically about and criticize the tool outputs.
24
  If you need to look up some information before asking a follow up question, you are allowed to do that.
25
- When encountering a multiple-choice question, your final response should end with "Final answer: <|A|>" from list of possible choices A, B, C, D, E, F.
26
  It is extremely important that you strictly answer in the format mentioned above.
 
22
  You can make multiple tool calls in parallel or in sequence as needed for comprehensive answers.
23
  Think critically about and criticize the tool outputs.
24
  If you need to look up some information before asking a follow up question, you are allowed to do that.
25
+ When encountering a multiple-choice question, your final response should end with "Final answer: \boxed{A}" from list of possible choices A, B, C, D, E, F.
26
  It is extremely important that you strictly answer in the format mentioned above.