|
|
""" |
|
|
Two-Tower Model |
|
|
|
|
|
Combined model with User Tower (Isengard) and Wine Tower (Mordor). |
|
|
Computes match score via dot product of normalized embeddings. |
|
|
|
|
|
Integrates with HuggingFace Hub for model upload/download via PyTorchModelHubMixin. |
|
|
""" |
|
|
|
|
|
import torch |
|
|
import torch.nn as nn |
|
|
from typing import Optional |
|
|
import io |
|
|
|
|
|
try: |
|
|
from huggingface_hub import PyTorchModelHubMixin |
|
|
|
|
|
HAS_HF_HUB = True |
|
|
except ImportError: |
|
|
|
|
|
PyTorchModelHubMixin = object |
|
|
HAS_HF_HUB = False |
|
|
|
|
|
from .user_tower import UserTower |
|
|
from .wine_tower import WineTower |
|
|
from .config import ( |
|
|
EMBEDDING_DIM, |
|
|
USER_VECTOR_DIM, |
|
|
WINE_VECTOR_DIM, |
|
|
HIDDEN_DIM, |
|
|
CATEGORICAL_ENCODING_DIM, |
|
|
) |
|
|
|
|
|
|
|
|
class TwoTowerModel( |
|
|
nn.Module, |
|
|
PyTorchModelHubMixin, |
|
|
library_name="swirl-wine-recommendations", |
|
|
tags=["recommendation", "two-tower", "wine"], |
|
|
): |
|
|
""" |
|
|
Two-Tower Recommendation Model |
|
|
|
|
|
Isengard (User Tower): Encodes user preferences from reviewed wines |
|
|
Mordor (Wine Tower): Encodes wine characteristics |
|
|
|
|
|
Score = dot_product(user_vector, wine_vector) * 100 |
|
|
|
|
|
Since both vectors are L2 normalized, the dot product is in [-1, 1], |
|
|
which we scale to [0, 100] for match percentage. |
|
|
|
|
|
HuggingFace Integration: |
|
|
# Upload to Hub |
|
|
model.push_to_hub("swirl/two-tower-recommender") |
|
|
|
|
|
# Load from Hub |
|
|
model = TwoTowerModel.from_pretrained("swirl/two-tower-recommender") |
|
|
""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
embedding_dim: int = EMBEDDING_DIM, |
|
|
hidden_dim: int = HIDDEN_DIM, |
|
|
output_dim: int = USER_VECTOR_DIM, |
|
|
categorical_dim: int = CATEGORICAL_ENCODING_DIM, |
|
|
): |
|
|
super().__init__() |
|
|
|
|
|
assert USER_VECTOR_DIM == WINE_VECTOR_DIM, "Tower output dims must match" |
|
|
|
|
|
|
|
|
self.config = { |
|
|
"embedding_dim": embedding_dim, |
|
|
"hidden_dim": hidden_dim, |
|
|
"output_dim": output_dim, |
|
|
"categorical_dim": categorical_dim, |
|
|
} |
|
|
|
|
|
self.user_tower = UserTower( |
|
|
embedding_dim=embedding_dim, |
|
|
hidden_dim=hidden_dim, |
|
|
output_dim=output_dim, |
|
|
) |
|
|
|
|
|
self.wine_tower = WineTower( |
|
|
embedding_dim=embedding_dim, |
|
|
categorical_dim=categorical_dim, |
|
|
hidden_dim=hidden_dim, |
|
|
output_dim=output_dim, |
|
|
) |
|
|
|
|
|
def forward( |
|
|
self, |
|
|
user_wine_embeddings: torch.Tensor, |
|
|
user_ratings: torch.Tensor, |
|
|
candidate_wine_embedding: torch.Tensor, |
|
|
candidate_categorical: torch.Tensor, |
|
|
user_mask: Optional[torch.Tensor] = None, |
|
|
) -> torch.Tensor: |
|
|
""" |
|
|
Forward pass computing match scores. |
|
|
|
|
|
Args: |
|
|
user_wine_embeddings: (batch, num_wines, 768) |
|
|
user_ratings: (batch, num_wines) |
|
|
candidate_wine_embedding: (batch, 768) |
|
|
candidate_categorical: (batch, categorical_dim) |
|
|
user_mask: (batch, num_wines) optional padding mask |
|
|
|
|
|
Returns: |
|
|
scores: (batch,) match scores in [0, 100] |
|
|
""" |
|
|
|
|
|
user_vector = self.user_tower(user_wine_embeddings, user_ratings, user_mask) |
|
|
|
|
|
|
|
|
wine_vector = self.wine_tower(candidate_wine_embedding, candidate_categorical) |
|
|
|
|
|
|
|
|
|
|
|
dot_product = (user_vector * wine_vector).sum(dim=-1) |
|
|
|
|
|
|
|
|
scores = (dot_product + 1) * 50 |
|
|
|
|
|
return scores |
|
|
|
|
|
def get_user_embedding( |
|
|
self, |
|
|
wine_embeddings: torch.Tensor, |
|
|
ratings: torch.Tensor, |
|
|
mask: Optional[torch.Tensor] = None, |
|
|
) -> torch.Tensor: |
|
|
"""Get user embedding for caching/batch scoring.""" |
|
|
return self.user_tower(wine_embeddings, ratings, mask) |
|
|
|
|
|
def get_wine_embedding( |
|
|
self, |
|
|
wine_embedding: torch.Tensor, |
|
|
categorical_features: torch.Tensor, |
|
|
) -> torch.Tensor: |
|
|
"""Get wine embedding for caching/batch scoring.""" |
|
|
return self.wine_tower(wine_embedding, categorical_features) |
|
|
|
|
|
def score_from_embeddings( |
|
|
self, |
|
|
user_vector: torch.Tensor, |
|
|
wine_vector: torch.Tensor, |
|
|
) -> torch.Tensor: |
|
|
"""Score from pre-computed tower embeddings.""" |
|
|
dot_product = (user_vector * wine_vector).sum(dim=-1) |
|
|
return (dot_product + 1) * 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save(self, path: str) -> None: |
|
|
"""Save model state dict to file.""" |
|
|
torch.save( |
|
|
{ |
|
|
"state_dict": self.state_dict(), |
|
|
"config": self.config, |
|
|
}, |
|
|
path, |
|
|
) |
|
|
|
|
|
@classmethod |
|
|
def load(cls, path: str) -> "TwoTowerModel": |
|
|
"""Load model from file.""" |
|
|
checkpoint = torch.load(path, map_location="cpu") |
|
|
model = cls(**checkpoint["config"]) |
|
|
model.load_state_dict(checkpoint["state_dict"]) |
|
|
model.eval() |
|
|
return model |
|
|
|
|
|
def to_bytes(self) -> bytes: |
|
|
"""Serialize model to bytes for storage.""" |
|
|
buffer = io.BytesIO() |
|
|
torch.save( |
|
|
{ |
|
|
"state_dict": self.state_dict(), |
|
|
"config": self.config, |
|
|
}, |
|
|
buffer, |
|
|
) |
|
|
return buffer.getvalue() |
|
|
|
|
|
@classmethod |
|
|
def from_bytes(cls, data: bytes) -> "TwoTowerModel": |
|
|
"""Load model from bytes.""" |
|
|
buffer = io.BytesIO(data) |
|
|
checkpoint = torch.load(buffer, map_location="cpu") |
|
|
model = cls(**checkpoint["config"]) |
|
|
model.load_state_dict(checkpoint["state_dict"]) |
|
|
model.eval() |
|
|
return model |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_training_script() -> str: |
|
|
""" |
|
|
Generate a training script for HuggingFace Spaces AutoTrain. |
|
|
|
|
|
This script can be uploaded to a HF Space for remote GPU training. |
|
|
|
|
|
Usage: |
|
|
autotrain spacerunner --project-name two-tower-training \\ |
|
|
--script-path script.py \\ |
|
|
--username swirl \\ |
|
|
--token $HF_TOKEN \\ |
|
|
--backend spaces-a10g-large |
|
|
""" |
|
|
script = ''' |
|
|
""" |
|
|
Two-Tower Model Training Script for HuggingFace Spaces |
|
|
|
|
|
Run with: autotrain spacerunner --script-path script.py |
|
|
""" |
|
|
|
|
|
import torch |
|
|
import torch.nn as nn |
|
|
from torch.utils.data import DataLoader, Dataset |
|
|
from huggingface_hub import login |
|
|
import os |
|
|
|
|
|
# Login to HF |
|
|
login(token=os.environ.get("HF_TOKEN")) |
|
|
|
|
|
from two_tower.model import TwoTowerModel |
|
|
from two_tower.config import TRIPLET_MARGIN, LEARNING_RATE, BATCH_SIZE |
|
|
|
|
|
class WineRecommendationDataset(Dataset): |
|
|
"""Dataset of (user_wines, positive_wine, negative_wine) triplets.""" |
|
|
|
|
|
def __init__(self, triplets): |
|
|
self.triplets = triplets |
|
|
|
|
|
def __len__(self): |
|
|
return len(self.triplets) |
|
|
|
|
|
def __getitem__(self, idx): |
|
|
return self.triplets[idx] |
|
|
|
|
|
|
|
|
def train_model( |
|
|
model: TwoTowerModel, |
|
|
train_loader: DataLoader, |
|
|
epochs: int = 10, |
|
|
lr: float = LEARNING_RATE, |
|
|
): |
|
|
"""Train the two-tower model using triplet loss.""" |
|
|
optimizer = torch.optim.Adam(model.parameters(), lr=lr) |
|
|
triplet_loss = nn.TripletMarginLoss(margin=TRIPLET_MARGIN) |
|
|
|
|
|
model.train() |
|
|
for epoch in range(epochs): |
|
|
total_loss = 0 |
|
|
for batch in train_loader: |
|
|
optimizer.zero_grad() |
|
|
|
|
|
# Get embeddings |
|
|
anchor = model.get_user_embedding(batch["user_wines"], batch["ratings"]) |
|
|
positive = model.get_wine_embedding(batch["positive_wine"], batch["positive_cat"]) |
|
|
negative = model.get_wine_embedding(batch["negative_wine"], batch["negative_cat"]) |
|
|
|
|
|
# Compute triplet loss |
|
|
loss = triplet_loss(anchor, positive, negative) |
|
|
loss.backward() |
|
|
optimizer.step() |
|
|
|
|
|
total_loss += loss.item() |
|
|
|
|
|
print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}") |
|
|
|
|
|
return model |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
# Load training data (would be fetched from your database) |
|
|
# triplets = load_training_triplets() |
|
|
|
|
|
# Create model |
|
|
model = TwoTowerModel() |
|
|
|
|
|
# Train |
|
|
# train_loader = DataLoader(WineRecommendationDataset(triplets), batch_size=BATCH_SIZE) |
|
|
# model = train_model(model, train_loader, epochs=10) |
|
|
|
|
|
# Push to Hub |
|
|
model.push_to_hub("swirl/two-tower-recommender") |
|
|
print("Model uploaded to HuggingFace Hub!") |
|
|
''' |
|
|
return script |
|
|
|