pashaa
/

ance-msmarco-passage

Model card Files Files and versions

ance-msmarco-passage / handler.py

pashaa's picture

Upload folder using huggingface_hub

ed4f8d3 verified about 1 month ago

history blame contribute delete

3.44 kB

	"""
	Custom handler for ANCE dense retrieval on HuggingFace Inference Endpoints.

	Returns dense embeddings for queries and passages.
	"""

	from typing import Any, Dict, List, Union

	import torch
	from transformers import AutoModel, AutoTokenizer


	class EndpointHandler:
	"""Handler for ANCE embedding generation."""

	def __init__(self, path: str = ""):
	"""Initialize the model and tokenizer."""
	self.tokenizer = AutoTokenizer.from_pretrained(path)
	self.model = AutoModel.from_pretrained(path)
	self.model.eval()

	# Move to GPU if available
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.model = self.model.to(self.device)

	print(f"ANCE loaded on {self.device}")

	def __call__(self, data: Dict[str, Any]) -> Union[List[List[float]], Dict[str, Any]]:
	"""
	Process inference requests.

	Accepts:
	- {"inputs": "text"} - single text
	- {"inputs": ["text1", "text2", ...]} - batch of texts
	- {"query": "...", "passages": ["...", ...]} - query + passages (returns similarity scores)

	Returns:
	- List of embeddings (each embedding is a list of floats)
	- Or similarity scores if query + passages provided
	"""
	inputs = data.get("inputs", None)
	query = data.get("query", None)
	passages = data.get("passages", None)

	# Mode 1: Query + Passages -> return similarity scores
	if query is not None and passages is not None:
	query_emb = self._encode([query])[0]
	passage_embs = self._encode(passages)

	# Compute cosine similarities
	scores = []
	for p_emb in passage_embs:
	score = self._cosine_similarity(query_emb, p_emb)
	scores.append(score)

	return {"scores": scores}

	# Mode 2: Just inputs -> return embeddings
	if inputs is None:
	return {"error": "No inputs provided. Use 'inputs' or 'query'+'passages'."}

	if isinstance(inputs, str):
	inputs = [inputs]

	embeddings = self._encode(inputs)
	return {"embeddings": embeddings}

	def _encode(self, texts: List[str], max_length: int = 512) -> List[List[float]]:
	"""Encode texts into embeddings."""
	# Tokenize
	encoded = self.tokenizer(
	texts,
	padding=True,
	truncation=True,
	max_length=max_length,
	return_tensors="pt"
	).to(self.device)

	# Get embeddings
	with torch.no_grad():
	outputs = self.model(**encoded)
	# Use CLS token embedding
	embeddings = outputs.last_hidden_state[:, 0, :]
	# Normalize embeddings
	embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)

	return embeddings.cpu().tolist()

	def _cosine_similarity(self, emb1: List[float], emb2: List[float]) -> float:
	"""Compute cosine similarity between two embeddings."""
	import math
	dot = sum(a * b for a, b in zip(emb1, emb2))
	norm1 = math.sqrt(sum(a * a for a in emb1))
	norm2 = math.sqrt(sum(b * b for b in emb2))
	if norm1 == 0 or norm2 == 0:
	return 0.0
	return dot / (norm1 * norm2)