scvcoder commited on
Commit
ce557af
·
verified ·
1 Parent(s): 49b0a74

Hybrid RAG: BM25+Dense (sqlite-vec/BGE-M3) + cross-encoder reranker (bge-reranker-v2-m3)

Browse files
Files changed (1) hide show
  1. src/kpaa/cases/index.py +35 -0
src/kpaa/cases/index.py CHANGED
@@ -263,6 +263,41 @@ def search(
263
  ]
264
 
265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  def count(db_path: Path | None = None) -> int:
267
  path = db_path or default_db_path()
268
  if not path.exists():
 
263
  ]
264
 
265
 
266
+ def get_cases(
267
+ ntt_ids: Iterable[str], *, db_path: Path | None = None
268
+ ) -> dict[str, Case]:
269
+ """ntt_id 리스트 → {ntt_id: Case} dict.
270
+
271
+ Hybrid retrieval 의 dense 검색 결과(chunk_id='case_<ntt_id>')를 Case 로 복원할 때 사용.
272
+ """
273
+ ids = [str(i) for i in ntt_ids]
274
+ if not ids:
275
+ return {}
276
+ path = db_path or default_db_path()
277
+ if not path.exists():
278
+ return {}
279
+ placeholders = ",".join("?" * len(ids))
280
+ conn = _connect(path)
281
+ try:
282
+ rows = conn.execute(
283
+ f"SELECT * FROM cases WHERE ntt_id IN ({placeholders})", ids
284
+ ).fetchall()
285
+ finally:
286
+ conn.close()
287
+ return {
288
+ r["ntt_id"]: Case(
289
+ ntt_id=r["ntt_id"], ntt_no=r["ntt_no"],
290
+ title=r["title"], summary=r["summary"], body=r["body"],
291
+ type_code=r["type_code"], type_label=r["type_label"],
292
+ category1=r["category1"], category2=r["category2"], category3=r["category3"],
293
+ reg_dt=r["reg_dt"], case_year=r["case_year"],
294
+ source_note=r["source_note"], detail_url=r["detail_url"],
295
+ chunk_context=(r["chunk_context"] if "chunk_context" in r.keys() else "") or "",
296
+ )
297
+ for r in rows
298
+ }
299
+
300
+
301
  def count(db_path: Path | None = None) -> int:
302
  path = db_path or default_db_path()
303
  if not path.exists():