# FILE: ai-service/core/matcher.py (FINAL VERSION) from sentence_transformers import SentenceTransformer, util import torch from typing import List, Dict, Any _embedding_model = None def load_embedding_model(model_path: str): """Loads the sentence transformer model into memory.""" global _embedding_model if _embedding_model is None: print(f" - 🧠 Loading embedding model from: {model_path}") device = "cuda" if torch.cuda.is_available() else "cpu" _embedding_model = SentenceTransformer(model_path, device=device) print(f" - ✅ Embedding model loaded successfully on '{device}'.") def rank_documents_by_similarity(query: str, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Ranks a list of documents based on their semantic similarity to a query. """ global _embedding_model if _embedding_model is None: # This error is critical. If the model isn't loaded, nothing will work. raise Exception("CRITICAL: Embedding model is not loaded. Please ensure load_embedding_model() is called on startup.") if not documents: return [] doc_texts = [doc.get('text', '') for doc in documents] query_embedding = _embedding_model.encode(query, convert_to_tensor=True) doc_embeddings = _embedding_model.encode(doc_texts, convert_to_tensor=True) cosine_scores = util.pytorch_cos_sim(query_embedding, doc_embeddings)[0] # ✨ THE FIX: We MUST add the 'match_score' to each document BEFORE sorting. for i, doc in enumerate(documents): doc['match_score'] = round(max(0, cosine_scores[i].item() * 100)) # Now, sort the documents which already have the 'match_score' key sorted_documents = sorted(documents, key=lambda x: x.get('match_score', 0), reverse=True) return sorted_documents