Spaces:

KeenWoo
/

AD_Multimodal_Chatbot

Sleeping

KeenWoo commited on Sep 19

Commit

96ba81c

verified ·

1 Parent(s): 8cc1d0f

Update evaluate.py

Files changed (1) hide show

evaluate.py CHANGED Viewed

@@ -564,9 +564,9 @@ def run_comprehensive_evaluation(
-        # Choose to use Hallucination instead of - **RAG: Faithfulness**: {faith_mean:.1f}%
-         --- START: CORRECTED SUMMARY LOGIC ---
         # 1. Start building the summary_text string with the common parts
         summary_text = f"""## Evaluation Summary (Mode: {'NLU-Only' if NLU_ONLY_TEST else 'Full RAG'})
 - **Routing Accuracy**: {pct:.2f}%
@@ -590,6 +590,7 @@ def run_comprehensive_evaluation(
             rag_with_sources_pct = (df["source_count"] > 0).mean() * 100 if "source_count" in df else 0
             # Calculate the mean for Faithfulness
             faith_mean = to_f(df["faithfulness"]).mean() * 100
             # halluc_mean = (1 - to_f(df["faithfulness_score"])).mean() * 100
             halluc_mean = to_f(df["hallucination_rate"]).mean() * 100

+        # --- START: CORRECTED SUMMARY LOGIC ---
         # 1. Start building the summary_text string with the common parts
         summary_text = f"""## Evaluation Summary (Mode: {'NLU-Only' if NLU_ONLY_TEST else 'Full RAG'})
 - **Routing Accuracy**: {pct:.2f}%
             rag_with_sources_pct = (df["source_count"] > 0).mean() * 100 if "source_count" in df else 0
             # Calculate the mean for Faithfulness
+            # Choose to use Hallucination instead of - **RAG: Faithfulness**: {faith_mean:.1f}%
             faith_mean = to_f(df["faithfulness"]).mean() * 100
             # halluc_mean = (1 - to_f(df["faithfulness_score"])).mean() * 100
             halluc_mean = to_f(df["hallucination_rate"]).mean() * 100