Spaces:
Sleeping
Sleeping
Adibvafa
commited on
Commit
·
36f77f2
1
Parent(s):
7f4d4c2
Chagne formatting to use boxed
Browse files
benchmarking/llm_providers/medrax_provider.py
CHANGED
|
@@ -70,7 +70,7 @@ class MedRAXProvider(LLMProvider):
|
|
| 70 |
agent, tools_dict = initialize_agent(
|
| 71 |
prompt_file="medrax/docs/system_prompts.txt",
|
| 72 |
tools_to_use=selected_tools,
|
| 73 |
-
model_dir="model-weights",
|
| 74 |
temp_dir="temp", # Change this to the path of the temporary directory
|
| 75 |
device="cuda:0",
|
| 76 |
model=self.model_name, # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
|
|
|
|
| 70 |
agent, tools_dict = initialize_agent(
|
| 71 |
prompt_file="medrax/docs/system_prompts.txt",
|
| 72 |
tools_to_use=selected_tools,
|
| 73 |
+
model_dir="/model-weights",
|
| 74 |
temp_dir="temp", # Change this to the path of the temporary directory
|
| 75 |
device="cuda:0",
|
| 76 |
model=self.model_name, # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
|
benchmarking/runner.py
CHANGED
|
@@ -259,9 +259,15 @@ class BenchmarkRunner:
|
|
| 259 |
Returns:
|
| 260 |
str: The extracted answer
|
| 261 |
"""
|
| 262 |
-
#
|
| 263 |
-
|
| 264 |
-
match = re.search(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
if match:
|
| 266 |
return match.group(1).upper()
|
| 267 |
|
|
|
|
| 259 |
Returns:
|
| 260 |
str: The extracted answer
|
| 261 |
"""
|
| 262 |
+
# Look for the '\boxed{A}' format
|
| 263 |
+
boxed_pattern = r'\\boxed\{([A-Fa-f])\}'
|
| 264 |
+
match = re.search(boxed_pattern, response_text)
|
| 265 |
+
if match:
|
| 266 |
+
return match.group(1).upper()
|
| 267 |
+
|
| 268 |
+
# Fallback: look for the '<|A|>' format (legacy code, will remove later on)
|
| 269 |
+
legacy_pattern = r'\s*<\|([A-F])\|>'
|
| 270 |
+
match = re.search(legacy_pattern, response_text)
|
| 271 |
if match:
|
| 272 |
return match.group(1).upper()
|
| 273 |
|
medrax/docs/system_prompts.txt
CHANGED
|
@@ -22,5 +22,5 @@ Solve using your own vision and reasoning and use tools (if available) to comple
|
|
| 22 |
You can make multiple tool calls in parallel or in sequence as needed for comprehensive answers.
|
| 23 |
Think critically about and criticize the tool outputs.
|
| 24 |
If you need to look up some information before asking a follow up question, you are allowed to do that.
|
| 25 |
-
When encountering a multiple-choice question, your final response should end with "Final answer:
|
| 26 |
It is extremely important that you strictly answer in the format mentioned above.
|
|
|
|
| 22 |
You can make multiple tool calls in parallel or in sequence as needed for comprehensive answers.
|
| 23 |
Think critically about and criticize the tool outputs.
|
| 24 |
If you need to look up some information before asking a follow up question, you are allowed to do that.
|
| 25 |
+
When encountering a multiple-choice question, your final response should end with "Final answer: \boxed{A}" from list of possible choices A, B, C, D, E, F.
|
| 26 |
It is extremely important that you strictly answer in the format mentioned above.
|