File size: 8,045 Bytes
ea9cf67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
"""
HuggingFace Inference Endpoint Handler

Custom handler for the Two-Tower recommendation model.
This file is required for deploying to HuggingFace Inference Endpoints.

See: https://huggingface.co/docs/inference-endpoints/guides/custom_handler

Input format:
{
    "inputs": {
        "user_wines": [
            {"embedding": [768 floats], "rating": 4.5},
            ...
        ],
        "candidate_wine": {
            "embedding": [768 floats],
            "color": "red",
            "type": "still",
            "style": "Classic",
            "climate_type": "continental",
            "climate_band": "cool",
            "vintage_band": "medium"
        }
    }
}

OR for batch scoring:
{
    "inputs": {
        "user_wines": [...],
        "candidate_wines": [...]  # Multiple candidates
    }
}

Output format:
{
    "score": 75.5  # Single wine
}
OR
{
    "scores": [75.5, 82.3, ...]  # Batch
}
"""

import torch
from typing import Dict, List, Any

# Categorical feature vocabularies for one-hot encoding
CATEGORICAL_VOCABS = {
    "color": ["red", "white", "rosé", "orange", "sparkling"],
    "type": ["still", "sparkling", "fortified", "dessert"],
    "style": [
        "Classic",
        "Natural",
        "Organic",
        "Biodynamic",
        "Conventional",
        "Pet-Nat",
        "Orange",
        "Skin-Contact",
        "Amphora",
        "Traditional",
    ],
    "climate_type": ["cool", "moderate", "warm", "hot"],
    "climate_band": ["cool", "moderate", "warm", "hot"],
    "vintage_band": ["young", "developing", "mature", "non_vintage"],
}


class EndpointHandler:
    """
    Custom handler for HuggingFace Inference Endpoints.

    Loads the Two-Tower model and handles inference requests.
    """

    def __init__(self, path: str = ""):
        """
        Initialize the handler.

        Args:
            path: Path to the model directory (provided by HF Inference Endpoints)
        """
        from model import TwoTowerModel

        # Load model from the checkpoint
        if path:
            self.model = TwoTowerModel.from_pretrained(path)
        else:
            self.model = TwoTowerModel.from_pretrained("swirl/two-tower-recommender")

        self.model.eval()

        # Move to GPU if available
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

        print(f"Two-Tower model loaded on {self.device}")

    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Handle inference request.

        Args:
            data: Request payload with "inputs" key

        Returns:
            Response with "score" or "scores" key
        """
        inputs = data.get("inputs", data)

        # Get user wines
        user_wines = inputs.get("user_wines", [])

        if not user_wines:
            return {"error": "No user_wines provided"}

        # Check for single or batch candidate
        if "candidate_wine" in inputs:
            # Single wine scoring
            return self._score_single(user_wines, inputs["candidate_wine"])
        elif "candidate_wines" in inputs:
            # Batch scoring
            return self._score_batch(user_wines, inputs["candidate_wines"])
        else:
            return {"error": "No candidate_wine or candidate_wines provided"}

    def _score_single(
        self, user_wines: List[Dict[str, Any]], candidate_wine: Dict[str, Any]
    ) -> Dict[str, float]:
        """Score a single candidate wine."""
        with torch.no_grad():
            # Prepare user data
            user_embeddings, user_ratings, user_mask = self._prepare_user_data(
                user_wines
            )

            # Prepare candidate data
            wine_embedding, wine_categorical = self._prepare_wine_data(candidate_wine)

            # Forward pass
            score = self.model(
                user_embeddings,
                user_ratings,
                wine_embedding,
                wine_categorical,
                user_mask,
            )

            return {"score": float(score.item())}

    def _score_batch(
        self, user_wines: List[Dict[str, Any]], candidate_wines: List[Dict[str, Any]]
    ) -> Dict[str, List[float]]:
        """Score multiple candidate wines."""
        with torch.no_grad():
            # Prepare user data (same for all candidates)
            user_embeddings, user_ratings, user_mask = self._prepare_user_data(
                user_wines
            )

            # Get user embedding once
            user_vector = self.model.get_user_embedding(
                user_embeddings, user_ratings, user_mask
            )

            # Score each candidate
            scores = []
            for wine in candidate_wines:
                wine_embedding, wine_categorical = self._prepare_wine_data(wine)
                wine_vector = self.model.get_wine_embedding(
                    wine_embedding, wine_categorical
                )
                score = self.model.score_from_embeddings(user_vector, wine_vector)
                scores.append(float(score.item()))

            return {"scores": scores}

    def _prepare_user_data(self, user_wines: List[Dict[str, Any]]) -> tuple:
        """
        Prepare user wine data for model input.

        Returns:
            user_embeddings: (1, num_wines, 768)
            user_ratings: (1, num_wines)
            user_mask: (1, num_wines)
        """
        embeddings = []
        ratings = []

        for wine in user_wines:
            embedding = wine.get("embedding", [0.0] * 768)
            rating = wine.get("rating", 3.0)

            embeddings.append(embedding)
            ratings.append(rating)

        # Convert to tensors with batch dimension
        user_embeddings = torch.tensor(
            [embeddings], dtype=torch.float32, device=self.device
        )
        user_ratings = torch.tensor([ratings], dtype=torch.float32, device=self.device)

        # Create mask (all 1s since no padding)
        user_mask = torch.ones(
            1, len(user_wines), dtype=torch.float32, device=self.device
        )

        return user_embeddings, user_ratings, user_mask

    def _prepare_wine_data(self, wine: Dict[str, Any]) -> tuple:
        """
        Prepare wine data for model input.

        Returns:
            wine_embedding: (1, 768)
            wine_categorical: (1, categorical_dim)
        """
        # Get embedding
        embedding = wine.get("embedding", [0.0] * 768)
        wine_embedding = torch.tensor(
            [embedding], dtype=torch.float32, device=self.device
        )

        # Build one-hot categorical encoding
        categorical = self._encode_categorical(wine)
        wine_categorical = torch.tensor(
            [categorical], dtype=torch.float32, device=self.device
        )

        return wine_embedding, wine_categorical

    def _encode_categorical(self, wine: Dict[str, Any]) -> List[float]:
        """
        One-hot encode categorical features.

        Args:
            wine: Wine dict with categorical features

        Returns:
            List of floats (one-hot encoded)
        """
        encoding = []

        for feature, vocab in CATEGORICAL_VOCABS.items():
            value = wine.get(feature)
            one_hot = [0.0] * len(vocab)

            if value and value in vocab:
                idx = vocab.index(value)
                one_hot[idx] = 1.0

            encoding.extend(one_hot)

        return encoding


# For local testing
if __name__ == "__main__":
    # Test the handler
    handler = EndpointHandler()

    # Mock request
    test_data = {
        "inputs": {
            "user_wines": [
                {"embedding": [0.1] * 768, "rating": 4.5},
                {"embedding": [0.2] * 768, "rating": 3.0},
            ],
            "candidate_wine": {
                "embedding": [0.15] * 768,
                "color": "red",
                "type": "still",
            },
        }
    }

    result = handler(test_data)
    print(f"Score: {result}")