File size: 1,928 Bytes
ba86059
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import sys
import torch
import numpy as np

# Thêm đường dẫn để load các service
backend_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, backend_dir)

from services.tq_service import tq_service
from services.ingestion_service import ingestion_service
from services.metadata_service import metadata_service

def quick_test():
    print("--- TESTING RETRIEVAL SYSTEM (Multilingual E5) ---")
    
    # 1. Khởi tạo (tq_service tự nạp index khi init)
    
    # 2. Thử tìm kiếm
    query = "Thủ đô của Việt Nam là gì?"
    print(f"QUERY: {query}")
    
    # Lấy embedding (E5 Base)
    emb = ingestion_service.get_embeddings([query], is_query=True)
    
    # Search
    print("\n[STEP 1] Calling TurboQuant Engine directly...")
    query_t = torch.from_numpy(emb).float()
    ids, scores = tq_service.system_engine.search(query_t, top_k=5, n_probe=256)
    
    # Chuyển sang numpy để loop an toàn
    if torch.is_tensor(ids): ids = ids.cpu().numpy()
    if torch.is_tensor(scores): scores = scores.cpu().numpy()
    
    # Flatten nếu là mảng 2D (batch size 1)
    if len(ids.shape) > 1: ids = ids[0]
    if len(scores.shape) > 1: scores = scores[0]

    print(f"RAW RESULTS FROM ENGINE:")
    for i, (idx, score) in enumerate(zip(ids, scores)):
        print(f"  [{i+1}] ID: {int(idx)}, Score: {float(score):.4f}")

    # 3. Thử mapping sang Metadata (Service call)
    print("\n[STEP 2] Mapping to Metadata via TQService...")
    results = tq_service.search(emb[0], user_id=-1, top_k=5, scope="system")
    
    print(f"RESULTS WITH TEXT CONTENT: {len(results)}")
    for i, res in enumerate(results):
        payload = metadata_service.get_chunk(res['id'], user_id=-1)
        if payload:
            print(f"  [{i+1}] Score: {res['score']:.4f} | Content: {payload['text'][:100]}...")

if __name__ == "__main__":
    quick_test()