amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
# FILE: ai-service/core/matcher.py (FINAL VERSION)
from sentence_transformers import SentenceTransformer, util
import torch
from typing import List, Dict, Any
_embedding_model = None
def load_embedding_model(model_path: str):
"""Loads the sentence transformer model into memory."""
global _embedding_model
if _embedding_model is None:
print(f" - 🧠 Loading embedding model from: {model_path}")
device = "cuda" if torch.cuda.is_available() else "cpu"
_embedding_model = SentenceTransformer(model_path, device=device)
print(f" - βœ… Embedding model loaded successfully on '{device}'.")
def rank_documents_by_similarity(query: str, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Ranks a list of documents based on their semantic similarity to a query.
"""
global _embedding_model
if _embedding_model is None:
# This error is critical. If the model isn't loaded, nothing will work.
raise Exception("CRITICAL: Embedding model is not loaded. Please ensure load_embedding_model() is called on startup.")
if not documents:
return []
doc_texts = [doc.get('text', '') for doc in documents]
query_embedding = _embedding_model.encode(query, convert_to_tensor=True)
doc_embeddings = _embedding_model.encode(doc_texts, convert_to_tensor=True)
cosine_scores = util.pytorch_cos_sim(query_embedding, doc_embeddings)[0]
# ✨ THE FIX: We MUST add the 'match_score' to each document BEFORE sorting.
for i, doc in enumerate(documents):
doc['match_score'] = round(max(0, cosine_scores[i].item() * 100))
# Now, sort the documents which already have the 'match_score' key
sorted_documents = sorted(documents, key=lambda x: x.get('match_score', 0), reverse=True)
return sorted_documents