""" HuggingFace Inference Endpoint Handler Custom handler for the Two-Tower recommendation model. This file is required for deploying to HuggingFace Inference Endpoints. See: https://huggingface.co/docs/inference-endpoints/guides/custom_handler Input format: { "inputs": { "user_wines": [ {"embedding": [768 floats], "rating": 4.5}, ... ], "candidate_wine": { "embedding": [768 floats], "color": "red", "type": "still", "style": "Classic", "climate_type": "continental", "climate_band": "cool", "vintage_band": "medium" } } } OR for batch scoring: { "inputs": { "user_wines": [...], "candidate_wines": [...] # Multiple candidates } } Output format: { "score": 75.5 # Single wine } OR { "scores": [75.5, 82.3, ...] # Batch } """ import torch from typing import Dict, List, Any # Categorical feature vocabularies for one-hot encoding CATEGORICAL_VOCABS = { "color": ["red", "white", "rosé", "orange", "sparkling"], "type": ["still", "sparkling", "fortified", "dessert"], "style": [ "Classic", "Natural", "Organic", "Biodynamic", "Conventional", "Pet-Nat", "Orange", "Skin-Contact", "Amphora", "Traditional", ], "climate_type": ["cool", "moderate", "warm", "hot"], "climate_band": ["cool", "moderate", "warm", "hot"], "vintage_band": ["young", "developing", "mature", "non_vintage"], } class EndpointHandler: """ Custom handler for HuggingFace Inference Endpoints. Loads the Two-Tower model and handles inference requests. """ def __init__(self, path: str = ""): """ Initialize the handler. Args: path: Path to the model directory (provided by HF Inference Endpoints) """ from model import TwoTowerModel # Load model from the checkpoint if path: self.model = TwoTowerModel.from_pretrained(path) else: self.model = TwoTowerModel.from_pretrained("swirl/two-tower-recommender") self.model.eval() # Move to GPU if available self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model.to(self.device) print(f"Two-Tower model loaded on {self.device}") def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: """ Handle inference request. Args: data: Request payload with "inputs" key Returns: Response with "score" or "scores" key """ inputs = data.get("inputs", data) # Get user wines user_wines = inputs.get("user_wines", []) if not user_wines: return {"error": "No user_wines provided"} # Check for single or batch candidate if "candidate_wine" in inputs: # Single wine scoring return self._score_single(user_wines, inputs["candidate_wine"]) elif "candidate_wines" in inputs: # Batch scoring return self._score_batch(user_wines, inputs["candidate_wines"]) else: return {"error": "No candidate_wine or candidate_wines provided"} def _score_single( self, user_wines: List[Dict[str, Any]], candidate_wine: Dict[str, Any] ) -> Dict[str, float]: """Score a single candidate wine.""" with torch.no_grad(): # Prepare user data user_embeddings, user_ratings, user_mask = self._prepare_user_data( user_wines ) # Prepare candidate data wine_embedding, wine_categorical = self._prepare_wine_data(candidate_wine) # Forward pass score = self.model( user_embeddings, user_ratings, wine_embedding, wine_categorical, user_mask, ) return {"score": float(score.item())} def _score_batch( self, user_wines: List[Dict[str, Any]], candidate_wines: List[Dict[str, Any]] ) -> Dict[str, List[float]]: """Score multiple candidate wines.""" with torch.no_grad(): # Prepare user data (same for all candidates) user_embeddings, user_ratings, user_mask = self._prepare_user_data( user_wines ) # Get user embedding once user_vector = self.model.get_user_embedding( user_embeddings, user_ratings, user_mask ) # Score each candidate scores = [] for wine in candidate_wines: wine_embedding, wine_categorical = self._prepare_wine_data(wine) wine_vector = self.model.get_wine_embedding( wine_embedding, wine_categorical ) score = self.model.score_from_embeddings(user_vector, wine_vector) scores.append(float(score.item())) return {"scores": scores} def _prepare_user_data(self, user_wines: List[Dict[str, Any]]) -> tuple: """ Prepare user wine data for model input. Returns: user_embeddings: (1, num_wines, 768) user_ratings: (1, num_wines) user_mask: (1, num_wines) """ embeddings = [] ratings = [] for wine in user_wines: embedding = wine.get("embedding", [0.0] * 768) rating = wine.get("rating", 3.0) embeddings.append(embedding) ratings.append(rating) # Convert to tensors with batch dimension user_embeddings = torch.tensor( [embeddings], dtype=torch.float32, device=self.device ) user_ratings = torch.tensor([ratings], dtype=torch.float32, device=self.device) # Create mask (all 1s since no padding) user_mask = torch.ones( 1, len(user_wines), dtype=torch.float32, device=self.device ) return user_embeddings, user_ratings, user_mask def _prepare_wine_data(self, wine: Dict[str, Any]) -> tuple: """ Prepare wine data for model input. Returns: wine_embedding: (1, 768) wine_categorical: (1, categorical_dim) """ # Get embedding embedding = wine.get("embedding", [0.0] * 768) wine_embedding = torch.tensor( [embedding], dtype=torch.float32, device=self.device ) # Build one-hot categorical encoding categorical = self._encode_categorical(wine) wine_categorical = torch.tensor( [categorical], dtype=torch.float32, device=self.device ) return wine_embedding, wine_categorical def _encode_categorical(self, wine: Dict[str, Any]) -> List[float]: """ One-hot encode categorical features. Args: wine: Wine dict with categorical features Returns: List of floats (one-hot encoded) """ encoding = [] for feature, vocab in CATEGORICAL_VOCABS.items(): value = wine.get(feature) one_hot = [0.0] * len(vocab) if value and value in vocab: idx = vocab.index(value) one_hot[idx] = 1.0 encoding.extend(one_hot) return encoding # For local testing if __name__ == "__main__": # Test the handler handler = EndpointHandler() # Mock request test_data = { "inputs": { "user_wines": [ {"embedding": [0.1] * 768, "rating": 4.5}, {"embedding": [0.2] * 768, "rating": 3.0}, ], "candidate_wine": { "embedding": [0.15] * 768, "color": "red", "type": "still", }, } } result = handler(test_data) print(f"Score: {result}")