Adibvafa commited on
Commit
36f77f2
·
1 Parent(s): 7f4d4c2

Chagne formatting to use boxed

Browse files
benchmarking/llm_providers/medrax_provider.py CHANGED
@@ -70,7 +70,7 @@ class MedRAXProvider(LLMProvider):
70
  agent, tools_dict = initialize_agent(
71
  prompt_file="medrax/docs/system_prompts.txt",
72
  tools_to_use=selected_tools,
73
- model_dir="model-weights",
74
  temp_dir="temp", # Change this to the path of the temporary directory
75
  device="cuda:0",
76
  model=self.model_name, # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
 
70
  agent, tools_dict = initialize_agent(
71
  prompt_file="medrax/docs/system_prompts.txt",
72
  tools_to_use=selected_tools,
73
+ model_dir="/model-weights",
74
  temp_dir="temp", # Change this to the path of the temporary directory
75
  device="cuda:0",
76
  model=self.model_name, # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
benchmarking/runner.py CHANGED
@@ -259,9 +259,15 @@ class BenchmarkRunner:
259
  Returns:
260
  str: The extracted answer
261
  """
262
- # First, look for the '<|A|>' format
263
- final_answer_pattern = r'\s*<\|([A-F])\|>'
264
- match = re.search(final_answer_pattern, response_text)
 
 
 
 
 
 
265
  if match:
266
  return match.group(1).upper()
267
 
 
259
  Returns:
260
  str: The extracted answer
261
  """
262
+ # Look for the '\boxed{A}' format
263
+ boxed_pattern = r'\\boxed\{([A-Fa-f])\}'
264
+ match = re.search(boxed_pattern, response_text)
265
+ if match:
266
+ return match.group(1).upper()
267
+
268
+ # Fallback: look for the '<|A|>' format (legacy code, will remove later on)
269
+ legacy_pattern = r'\s*<\|([A-F])\|>'
270
+ match = re.search(legacy_pattern, response_text)
271
  if match:
272
  return match.group(1).upper()
273
 
medrax/docs/system_prompts.txt CHANGED
@@ -22,5 +22,5 @@ Solve using your own vision and reasoning and use tools (if available) to comple
22
  You can make multiple tool calls in parallel or in sequence as needed for comprehensive answers.
23
  Think critically about and criticize the tool outputs.
24
  If you need to look up some information before asking a follow up question, you are allowed to do that.
25
- When encountering a multiple-choice question, your final response should end with "Final answer: <|A|>" from list of possible choices A, B, C, D, E, F.
26
  It is extremely important that you strictly answer in the format mentioned above.
 
22
  You can make multiple tool calls in parallel or in sequence as needed for comprehensive answers.
23
  Think critically about and criticize the tool outputs.
24
  If you need to look up some information before asking a follow up question, you are allowed to do that.
25
+ When encountering a multiple-choice question, your final response should end with "Final answer: \boxed{A}" from list of possible choices A, B, C, D, E, F.
26
  It is extremely important that you strictly answer in the format mentioned above.