Spaces:

KeenWoo
/

AD_Multimodal_Chatbot

Sleeping

KeenWoo commited on Sep 17

Commit

5d71367

verified ·

1 Parent(s): a09a9f3

Update evaluate.py

Files changed (1) hide show

evaluate.py CHANGED Viewed

@@ -341,7 +341,13 @@ def run_comprehensive_evaluation(
         answer_correctness_score = None
         if ground_truth_answer and "ERROR" not in answer_text:
             try:
-                judge_msg = ANSWER_CORRECTNESS_JUDGE_PROMPT.format(ground_truth_answer=ground_truth_answer, generated_answer=answer_text)
                 print(f"  - Judge Prompt Sent:\n{judge_msg}")
                 raw_correctness = call_llm([{"role": "user", "content": judge_msg}], temperature=0.0)
                 print(f"  - Judge Raw Response: {raw_correctness}")

         answer_correctness_score = None
         if ground_truth_answer and "ERROR" not in answer_text:
             try:
+                # Change this line in the answer correctness section:
+                judge_msg = ANSWER_CORRECTNESS_JUDGE_PROMPT.format(
+                    ground_truth_answer=ground_truth_answer,
+                    generated_answer=answer_text,
+                    query_type=expected_route  # <-- Add this line
+                )
+                # judge_msg = ANSWER_CORRECTNESS_JUDGE_PROMPT.format(ground_truth_answer=ground_truth_answer, generated_answer=answer_text)
                 print(f"  - Judge Prompt Sent:\n{judge_msg}")
                 raw_correctness = call_llm([{"role": "user", "content": judge_msg}], temperature=0.0)
                 print(f"  - Judge Raw Response: {raw_correctness}")