Sync from GitHub: f94f60c3463cc1866c793ac6b6c79e0d27e516e8
Browse files
app.py
CHANGED
|
@@ -1286,6 +1286,20 @@ def on_interleaved_benchmark(
|
|
| 1286 |
raw_output = extraction_detail.get("raw_output", "")[:500]
|
| 1287 |
extractor_elapsed = extraction_detail.get("elapsed_s", 0.0)
|
| 1288 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1289 |
results.append({
|
| 1290 |
"turn": i + 1,
|
| 1291 |
"category": category,
|
|
@@ -1298,6 +1312,7 @@ def on_interleaved_benchmark(
|
|
| 1298 |
"deposit_node_id": deposit_nid,
|
| 1299 |
"ignition_size": len(ignition_sets[i]),
|
| 1300 |
"pith_ids": list(pith_ids),
|
|
|
|
| 1301 |
"trees": trees_for_turn,
|
| 1302 |
"raw_extractor_output": raw_output,
|
| 1303 |
"extractor_elapsed_s": extractor_elapsed,
|
|
|
|
| 1286 |
raw_output = extraction_detail.get("raw_output", "")[:500]
|
| 1287 |
extractor_elapsed = extraction_detail.get("elapsed_s", 0.0)
|
| 1288 |
|
| 1289 |
+
# Qualitative content — pair each surfaced text with the
|
| 1290 |
+
# category our centroid-similarity lookup tagged it as. Lets us
|
| 1291 |
+
# eyeball "is this actually biology content for a biology query"
|
| 1292 |
+
# without running another similarity pass at heatmap-build time.
|
| 1293 |
+
# Truncate text to 200 chars so the JSON stays readable.
|
| 1294 |
+
_surfaced_context = []
|
| 1295 |
+
for _idx, _pid in enumerate(pith_ids):
|
| 1296 |
+
_text = pith_context[_idx] if _idx < len(pith_context) else ""
|
| 1297 |
+
_surfaced_context.append({
|
| 1298 |
+
"id": _pid,
|
| 1299 |
+
"category_tagged": _categorize_node(_pid),
|
| 1300 |
+
"text": (_text[:200] + ("..." if len(_text) > 200 else "")),
|
| 1301 |
+
})
|
| 1302 |
+
|
| 1303 |
results.append({
|
| 1304 |
"turn": i + 1,
|
| 1305 |
"category": category,
|
|
|
|
| 1312 |
"deposit_node_id": deposit_nid,
|
| 1313 |
"ignition_size": len(ignition_sets[i]),
|
| 1314 |
"pith_ids": list(pith_ids),
|
| 1315 |
+
"surfaced_context": _surfaced_context,
|
| 1316 |
"trees": trees_for_turn,
|
| 1317 |
"raw_extractor_output": raw_output,
|
| 1318 |
"extractor_elapsed_s": extractor_elapsed,
|