Spaces:
Build error
Build error
Ilia Tambovtsev commited on
Commit ·
d06b625
1
Parent(s): 683b3ae
feat: implement BestChunkMatch metric
Browse files- src/eval/eval_mlflow.py +34 -1
src/eval/eval_mlflow.py
CHANGED
|
@@ -168,6 +168,34 @@ class PresentationCount(BaseMetric):
|
|
| 168 |
)
|
| 169 |
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
class LLMRelevance(BaseMetric):
|
| 172 |
"""LLM-based relevance scoring"""
|
| 173 |
|
|
@@ -267,8 +295,9 @@ class MetricsRegistry:
|
|
| 267 |
"presentationfound": PresentationFound,
|
| 268 |
"pagematch": PageMatch,
|
| 269 |
"pagefound": PageFound,
|
| 270 |
-
"llmrelevance": LLMRelevance,
|
| 271 |
"presentationcount": PresentationCount,
|
|
|
|
|
|
|
| 272 |
}
|
| 273 |
|
| 274 |
@classmethod
|
|
@@ -290,6 +319,7 @@ class MetricPresets:
|
|
| 290 |
"pagematch",
|
| 291 |
"pagefound",
|
| 292 |
"presentationcount",
|
|
|
|
| 293 |
]
|
| 294 |
|
| 295 |
LLM = ["llmrelevance"]
|
|
@@ -483,6 +513,7 @@ class RAGEvaluatorMlflow:
|
|
| 483 |
"question": row["question"],
|
| 484 |
"pres_name": row["pres_name"],
|
| 485 |
"pages": [int(x) for x in row["page"].split(",") if x],
|
|
|
|
| 486 |
}
|
| 487 |
|
| 488 |
try:
|
|
@@ -499,6 +530,7 @@ class RAGEvaluatorMlflow:
|
|
| 499 |
# Update aggregated results
|
| 500 |
result_row = {
|
| 501 |
"question": row["question"],
|
|
|
|
| 502 |
"expected_presentation": row["pres_name"],
|
| 503 |
"expected_pages": row["page"],
|
| 504 |
"retrieved_presentations": [
|
|
@@ -507,6 +539,7 @@ class RAGEvaluatorMlflow:
|
|
| 507 |
"retrieved_pages": [
|
| 508 |
",".join(map(str, p["pages"])) for p in output["contexts"]
|
| 509 |
],
|
|
|
|
| 510 |
}
|
| 511 |
|
| 512 |
for metric_name, metric_result in results.items():
|
|
|
|
| 168 |
)
|
| 169 |
|
| 170 |
|
| 171 |
+
class BestChunkMatch(BaseMetric):
|
| 172 |
+
"""Count number of retrieved presentations"""
|
| 173 |
+
|
| 174 |
+
async def acalculate(self, run_output: Dict, ground_truth: Dict) -> MetricResult:
|
| 175 |
+
"""Count presentations in retrieved results"""
|
| 176 |
+
best_pres = run_output["contexts"][0]
|
| 177 |
+
best_chunk = best_pres["best_chunk"]
|
| 178 |
+
|
| 179 |
+
true_content_type = ground_truth["content_type"]
|
| 180 |
+
found_content_type = best_chunk["chunk_type"]
|
| 181 |
+
|
| 182 |
+
score = 0
|
| 183 |
+
if true_content_type in found_content_type: # text_content and visual_content
|
| 184 |
+
score = 1
|
| 185 |
+
if true_content_type == "general" and found_content_type in [
|
| 186 |
+
"general_description",
|
| 187 |
+
"conclusions_and_insights",
|
| 188 |
+
"layout_and_composition",
|
| 189 |
+
]:
|
| 190 |
+
score = 1
|
| 191 |
+
|
| 192 |
+
return MetricResult(
|
| 193 |
+
name=self.name,
|
| 194 |
+
score=float(score),
|
| 195 |
+
explanation=f"Found content type '{found_content_type}' matches ground truth '{true_content_type}'",
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
class LLMRelevance(BaseMetric):
|
| 200 |
"""LLM-based relevance scoring"""
|
| 201 |
|
|
|
|
| 295 |
"presentationfound": PresentationFound,
|
| 296 |
"pagematch": PageMatch,
|
| 297 |
"pagefound": PageFound,
|
|
|
|
| 298 |
"presentationcount": PresentationCount,
|
| 299 |
+
"bestchunkmatch": BestChunkMatch,
|
| 300 |
+
"llmrelevance": LLMRelevance,
|
| 301 |
}
|
| 302 |
|
| 303 |
@classmethod
|
|
|
|
| 319 |
"pagematch",
|
| 320 |
"pagefound",
|
| 321 |
"presentationcount",
|
| 322 |
+
"bestchunkmatch",
|
| 323 |
]
|
| 324 |
|
| 325 |
LLM = ["llmrelevance"]
|
|
|
|
| 513 |
"question": row["question"],
|
| 514 |
"pres_name": row["pres_name"],
|
| 515 |
"pages": [int(x) for x in row["page"].split(",") if x],
|
| 516 |
+
"content_type": row["content"],
|
| 517 |
}
|
| 518 |
|
| 519 |
try:
|
|
|
|
| 530 |
# Update aggregated results
|
| 531 |
result_row = {
|
| 532 |
"question": row["question"],
|
| 533 |
+
"expected_content_type": row["content"],
|
| 534 |
"expected_presentation": row["pres_name"],
|
| 535 |
"expected_pages": row["page"],
|
| 536 |
"retrieved_presentations": [
|
|
|
|
| 539 |
"retrieved_pages": [
|
| 540 |
",".join(map(str, p["pages"])) for p in output["contexts"]
|
| 541 |
],
|
| 542 |
+
"best_chunk_type": output["contexts"][0]["best_chunk"]["chunk_type"], # fmt: skip
|
| 543 |
}
|
| 544 |
|
| 545 |
for metric_name, metric_result in results.items():
|