Hybrid RAG: BM25+Dense (sqlite-vec/BGE-M3) + cross-encoder reranker (bge-reranker-v2-m3)
Browse files- src/kpaa/cases/index.py +35 -0
src/kpaa/cases/index.py
CHANGED
|
@@ -263,6 +263,41 @@ def search(
|
|
| 263 |
]
|
| 264 |
|
| 265 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
def count(db_path: Path | None = None) -> int:
|
| 267 |
path = db_path or default_db_path()
|
| 268 |
if not path.exists():
|
|
|
|
| 263 |
]
|
| 264 |
|
| 265 |
|
| 266 |
+
def get_cases(
|
| 267 |
+
ntt_ids: Iterable[str], *, db_path: Path | None = None
|
| 268 |
+
) -> dict[str, Case]:
|
| 269 |
+
"""ntt_id 리스트 → {ntt_id: Case} dict.
|
| 270 |
+
|
| 271 |
+
Hybrid retrieval 의 dense 검색 결과(chunk_id='case_<ntt_id>')를 Case 로 복원할 때 사용.
|
| 272 |
+
"""
|
| 273 |
+
ids = [str(i) for i in ntt_ids]
|
| 274 |
+
if not ids:
|
| 275 |
+
return {}
|
| 276 |
+
path = db_path or default_db_path()
|
| 277 |
+
if not path.exists():
|
| 278 |
+
return {}
|
| 279 |
+
placeholders = ",".join("?" * len(ids))
|
| 280 |
+
conn = _connect(path)
|
| 281 |
+
try:
|
| 282 |
+
rows = conn.execute(
|
| 283 |
+
f"SELECT * FROM cases WHERE ntt_id IN ({placeholders})", ids
|
| 284 |
+
).fetchall()
|
| 285 |
+
finally:
|
| 286 |
+
conn.close()
|
| 287 |
+
return {
|
| 288 |
+
r["ntt_id"]: Case(
|
| 289 |
+
ntt_id=r["ntt_id"], ntt_no=r["ntt_no"],
|
| 290 |
+
title=r["title"], summary=r["summary"], body=r["body"],
|
| 291 |
+
type_code=r["type_code"], type_label=r["type_label"],
|
| 292 |
+
category1=r["category1"], category2=r["category2"], category3=r["category3"],
|
| 293 |
+
reg_dt=r["reg_dt"], case_year=r["case_year"],
|
| 294 |
+
source_note=r["source_note"], detail_url=r["detail_url"],
|
| 295 |
+
chunk_context=(r["chunk_context"] if "chunk_context" in r.keys() else "") or "",
|
| 296 |
+
)
|
| 297 |
+
for r in rows
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
|
| 301 |
def count(db_path: Path | None = None) -> int:
|
| 302 |
path = db_path or default_db_path()
|
| 303 |
if not path.exists():
|