KeenWoo commited on
Commit
5d71367
·
verified ·
1 Parent(s): a09a9f3

Update evaluate.py

Browse files
Files changed (1) hide show
  1. evaluate.py +7 -1
evaluate.py CHANGED
@@ -341,7 +341,13 @@ def run_comprehensive_evaluation(
341
  answer_correctness_score = None
342
  if ground_truth_answer and "ERROR" not in answer_text:
343
  try:
344
- judge_msg = ANSWER_CORRECTNESS_JUDGE_PROMPT.format(ground_truth_answer=ground_truth_answer, generated_answer=answer_text)
 
 
 
 
 
 
345
  print(f" - Judge Prompt Sent:\n{judge_msg}")
346
  raw_correctness = call_llm([{"role": "user", "content": judge_msg}], temperature=0.0)
347
  print(f" - Judge Raw Response: {raw_correctness}")
 
341
  answer_correctness_score = None
342
  if ground_truth_answer and "ERROR" not in answer_text:
343
  try:
344
+ # Change this line in the answer correctness section:
345
+ judge_msg = ANSWER_CORRECTNESS_JUDGE_PROMPT.format(
346
+ ground_truth_answer=ground_truth_answer,
347
+ generated_answer=answer_text,
348
+ query_type=expected_route # <-- Add this line
349
+ )
350
+ # judge_msg = ANSWER_CORRECTNESS_JUDGE_PROMPT.format(ground_truth_answer=ground_truth_answer, generated_answer=answer_text)
351
  print(f" - Judge Prompt Sent:\n{judge_msg}")
352
  raw_correctness = call_llm([{"role": "user", "content": judge_msg}], temperature=0.0)
353
  print(f" - Judge Raw Response: {raw_correctness}")