Spaces:
Running
Running
GitHub Actions commited on
Commit ·
c1411e9
1
Parent(s): 30f4f71
Deploy 8168b12
Browse files
app/pipeline/nodes/retrieve.py
CHANGED
|
@@ -308,17 +308,22 @@ def make_retrieve_node(
|
|
| 308 |
if sibling_count >= _SIBLING_TOTAL_CAP:
|
| 309 |
break
|
| 310 |
doc_id = seed["metadata"]["doc_id"]
|
| 311 |
-
seed_idx = seed["metadata"].get("chunk_index"
|
|
|
|
|
|
|
|
|
|
| 312 |
siblings = vector_store.fetch_by_doc_id(doc_id, limit=_SIBLING_FETCH_LIMIT)
|
| 313 |
# RC-2 fix: sort by chunk_index so we can prefer adjacent chunks.
|
| 314 |
-
|
|
|
|
|
|
|
| 315 |
# If seed position is known, prefer adjacent indices (±2) first,
|
| 316 |
# then fall through to remaining siblings in document order.
|
| 317 |
if seed_idx >= 0:
|
| 318 |
adjacent = [s for s in siblings
|
| 319 |
-
if abs(s["metadata"].get("chunk_index"
|
| 320 |
rest = [s for s in siblings
|
| 321 |
-
if abs(s["metadata"].get("chunk_index"
|
| 322 |
ordered_siblings = adjacent + rest
|
| 323 |
else:
|
| 324 |
ordered_siblings = siblings
|
|
|
|
| 308 |
if sibling_count >= _SIBLING_TOTAL_CAP:
|
| 309 |
break
|
| 310 |
doc_id = seed["metadata"]["doc_id"]
|
| 311 |
+
seed_idx = seed["metadata"].get("chunk_index")
|
| 312 |
+
if seed_idx is None:
|
| 313 |
+
seed_idx = -1
|
| 314 |
+
|
| 315 |
siblings = vector_store.fetch_by_doc_id(doc_id, limit=_SIBLING_FETCH_LIMIT)
|
| 316 |
# RC-2 fix: sort by chunk_index so we can prefer adjacent chunks.
|
| 317 |
+
# Guard against chunk_index being actual None in Qdrant payloads.
|
| 318 |
+
siblings.sort(key=lambda c: c["metadata"].get("chunk_index") or 0)
|
| 319 |
+
|
| 320 |
# If seed position is known, prefer adjacent indices (±2) first,
|
| 321 |
# then fall through to remaining siblings in document order.
|
| 322 |
if seed_idx >= 0:
|
| 323 |
adjacent = [s for s in siblings
|
| 324 |
+
if abs((s["metadata"].get("chunk_index") or -999) - seed_idx) <= 2]
|
| 325 |
rest = [s for s in siblings
|
| 326 |
+
if abs((s["metadata"].get("chunk_index") or -999) - seed_idx) > 2]
|
| 327 |
ordered_siblings = adjacent + rest
|
| 328 |
else:
|
| 329 |
ordered_siblings = siblings
|