Spaces:
Sleeping
Sleeping
Update evaluate.py
Browse files- evaluate.py +3 -2
evaluate.py
CHANGED
|
@@ -564,9 +564,9 @@ def run_comprehensive_evaluation(
|
|
| 564 |
|
| 565 |
|
| 566 |
|
| 567 |
-
|
| 568 |
|
| 569 |
-
|
| 570 |
# 1. Start building the summary_text string with the common parts
|
| 571 |
summary_text = f"""## Evaluation Summary (Mode: {'NLU-Only' if NLU_ONLY_TEST else 'Full RAG'})
|
| 572 |
- **Routing Accuracy**: {pct:.2f}%
|
|
@@ -590,6 +590,7 @@ def run_comprehensive_evaluation(
|
|
| 590 |
|
| 591 |
rag_with_sources_pct = (df["source_count"] > 0).mean() * 100 if "source_count" in df else 0
|
| 592 |
# Calculate the mean for Faithfulness
|
|
|
|
| 593 |
faith_mean = to_f(df["faithfulness"]).mean() * 100
|
| 594 |
# halluc_mean = (1 - to_f(df["faithfulness_score"])).mean() * 100
|
| 595 |
halluc_mean = to_f(df["hallucination_rate"]).mean() * 100
|
|
|
|
| 564 |
|
| 565 |
|
| 566 |
|
| 567 |
+
|
| 568 |
|
| 569 |
+
# --- START: CORRECTED SUMMARY LOGIC ---
|
| 570 |
# 1. Start building the summary_text string with the common parts
|
| 571 |
summary_text = f"""## Evaluation Summary (Mode: {'NLU-Only' if NLU_ONLY_TEST else 'Full RAG'})
|
| 572 |
- **Routing Accuracy**: {pct:.2f}%
|
|
|
|
| 590 |
|
| 591 |
rag_with_sources_pct = (df["source_count"] > 0).mean() * 100 if "source_count" in df else 0
|
| 592 |
# Calculate the mean for Faithfulness
|
| 593 |
+
# Choose to use Hallucination instead of - **RAG: Faithfulness**: {faith_mean:.1f}%
|
| 594 |
faith_mean = to_f(df["faithfulness"]).mean() * 100
|
| 595 |
# halluc_mean = (1 - to_f(df["faithfulness_score"])).mean() * 100
|
| 596 |
halluc_mean = to_f(df["hallucination_rate"]).mean() * 100
|