File size: 1,838 Bytes
0914e96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# FILE: ai-service/core/matcher.py (FINAL VERSION)

from sentence_transformers import SentenceTransformer, util
import torch
from typing import List, Dict, Any

_embedding_model = None

def load_embedding_model(model_path: str):
    """Loads the sentence transformer model into memory."""
    global _embedding_model
    if _embedding_model is None:
        print(f"   - 🧠 Loading embedding model from: {model_path}")
        device = "cuda" if torch.cuda.is_available() else "cpu"
        _embedding_model = SentenceTransformer(model_path, device=device)
        print(f"   - ✅ Embedding model loaded successfully on '{device}'.")

def rank_documents_by_similarity(query: str, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Ranks a list of documents based on their semantic similarity to a query.
    """
    global _embedding_model
    if _embedding_model is None:
        # This error is critical. If the model isn't loaded, nothing will work.
        raise Exception("CRITICAL: Embedding model is not loaded. Please ensure load_embedding_model() is called on startup.")
    
    if not documents:
        return []

    doc_texts = [doc.get('text', '') for doc in documents]

    query_embedding = _embedding_model.encode(query, convert_to_tensor=True)
    doc_embeddings = _embedding_model.encode(doc_texts, convert_to_tensor=True)

    cosine_scores = util.pytorch_cos_sim(query_embedding, doc_embeddings)[0]

    # ✨ THE FIX: We MUST add the 'match_score' to each document BEFORE sorting.
    for i, doc in enumerate(documents):
        doc['match_score'] = round(max(0, cosine_scores[i].item() * 100))
    
    # Now, sort the documents which already have the 'match_score' key
    sorted_documents = sorted(documents, key=lambda x: x.get('match_score', 0), reverse=True)

    return sorted_documents