Spaces:

amitbhatt6075
/

reachify-ai-service

Running

App Files Files Community

reachify-ai-service / core /matcher.py

amitbhatt6075

Complete fresh start - FINAL UPLOAD

0914e96 20 days ago

raw

history blame contribute delete

1.84 kB

	# FILE: ai-service/core/matcher.py (FINAL VERSION)

	from sentence_transformers import SentenceTransformer, util
	import torch
	from typing import List, Dict, Any

	_embedding_model = None

	def load_embedding_model(model_path: str):
	"""Loads the sentence transformer model into memory."""
	global _embedding_model
	if _embedding_model is None:
	print(f" - 🧠 Loading embedding model from: {model_path}")
	device = "cuda" if torch.cuda.is_available() else "cpu"
	_embedding_model = SentenceTransformer(model_path, device=device)
	print(f" - ✅ Embedding model loaded successfully on '{device}'.")

	def rank_documents_by_similarity(query: str, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
	"""
	Ranks a list of documents based on their semantic similarity to a query.
	"""
	global _embedding_model
	if _embedding_model is None:
	# This error is critical. If the model isn't loaded, nothing will work.
	raise Exception("CRITICAL: Embedding model is not loaded. Please ensure load_embedding_model() is called on startup.")

	if not documents:
	return []

	doc_texts = [doc.get('text', '') for doc in documents]

	query_embedding = _embedding_model.encode(query, convert_to_tensor=True)
	doc_embeddings = _embedding_model.encode(doc_texts, convert_to_tensor=True)

	cosine_scores = util.pytorch_cos_sim(query_embedding, doc_embeddings)[0]

	# ✨ THE FIX: We MUST add the 'match_score' to each document BEFORE sorting.
	for i, doc in enumerate(documents):
	doc['match_score'] = round(max(0, cosine_scores[i].item() * 100))

	# Now, sort the documents which already have the 'match_score' key
	sorted_documents = sorted(documents, key=lambda x: x.get('match_score', 0), reverse=True)

	return sorted_documents