File size: 1,123 Bytes
968e24d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
"""Test section-aware retrieval"""
import faiss
import json
import sqlite3
import numpy as np
from sentence_transformers import SentenceTransformer

# Load
index = faiss.read_index("data/processed/faiss/faiss_index.bin")
with open("data/processed/embeddings/paragraph_ids.json") as f:
    para_ids = json.load(f)

db = sqlite3.connect("data/processed/indexed/paragraphs.db")
cursor = db.cursor()

model = SentenceTransformer("BAAI/bge-base-en-v1.5")

# Test query
query = "What were the facts of the case?"
query_vec = model.encode([query], normalize_embeddings=True)

# Search
scores, indices = index.search(query_vec, k=10)

print(f"Query: {query}\n")
print("Top results with sections:")

for i, (score, idx) in enumerate(zip(scores[0], indices[0]), 1):
    para_id = para_ids[idx]
    
    cursor.execute(
        "SELECT judgment_id, section, section_conf, text FROM paragraphs WHERE id = ?",
        (para_id,)
    )
    row = cursor.fetchone()
    
    print(f"\n[{i}] Score: {score:.3f} | Section: {row[1]} (conf={row[2]:.2f})")
    print(f"    Case: {row[0]}")
    print(f"    Text: {row[3][:100]}...")

db.close()