swirl's picture
Upload model.py with huggingface_hub
8b733d3 verified
"""
Two-Tower Model
Combined model with User Tower (Isengard) and Wine Tower (Mordor).
Computes match score via dot product of normalized embeddings.
Integrates with HuggingFace Hub for model upload/download via PyTorchModelHubMixin.
"""
import torch
import torch.nn as nn
from typing import Optional
import io
try:
from huggingface_hub import PyTorchModelHubMixin
HAS_HF_HUB = True
except ImportError:
# Fallback for environments without huggingface_hub
PyTorchModelHubMixin = object
HAS_HF_HUB = False
from .user_tower import UserTower
from .wine_tower import WineTower
from .config import (
EMBEDDING_DIM,
USER_VECTOR_DIM,
WINE_VECTOR_DIM,
HIDDEN_DIM,
CATEGORICAL_ENCODING_DIM,
)
class TwoTowerModel(
nn.Module,
PyTorchModelHubMixin,
library_name="swirl-wine-recommendations",
tags=["recommendation", "two-tower", "wine"],
):
"""
Two-Tower Recommendation Model
Isengard (User Tower): Encodes user preferences from reviewed wines
Mordor (Wine Tower): Encodes wine characteristics
Score = dot_product(user_vector, wine_vector) * 100
Since both vectors are L2 normalized, the dot product is in [-1, 1],
which we scale to [0, 100] for match percentage.
HuggingFace Integration:
# Upload to Hub
model.push_to_hub("swirl/two-tower-recommender")
# Load from Hub
model = TwoTowerModel.from_pretrained("swirl/two-tower-recommender")
"""
def __init__(
self,
embedding_dim: int = EMBEDDING_DIM,
hidden_dim: int = HIDDEN_DIM,
output_dim: int = USER_VECTOR_DIM,
categorical_dim: int = CATEGORICAL_ENCODING_DIM,
):
super().__init__()
assert USER_VECTOR_DIM == WINE_VECTOR_DIM, "Tower output dims must match"
# Store config for serialization (required by PyTorchModelHubMixin)
self.config = {
"embedding_dim": embedding_dim,
"hidden_dim": hidden_dim,
"output_dim": output_dim,
"categorical_dim": categorical_dim,
}
self.user_tower = UserTower(
embedding_dim=embedding_dim,
hidden_dim=hidden_dim,
output_dim=output_dim,
)
self.wine_tower = WineTower(
embedding_dim=embedding_dim,
categorical_dim=categorical_dim,
hidden_dim=hidden_dim,
output_dim=output_dim,
)
def forward(
self,
user_wine_embeddings: torch.Tensor,
user_ratings: torch.Tensor,
candidate_wine_embedding: torch.Tensor,
candidate_categorical: torch.Tensor,
user_mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
"""
Forward pass computing match scores.
Args:
user_wine_embeddings: (batch, num_wines, 768)
user_ratings: (batch, num_wines)
candidate_wine_embedding: (batch, 768)
candidate_categorical: (batch, categorical_dim)
user_mask: (batch, num_wines) optional padding mask
Returns:
scores: (batch,) match scores in [0, 100]
"""
# Get user embedding from reviewed wines
user_vector = self.user_tower(user_wine_embeddings, user_ratings, user_mask)
# Get wine embedding
wine_vector = self.wine_tower(candidate_wine_embedding, candidate_categorical)
# Dot product (batch dot product)
# Both vectors are normalized, so dot product is in [-1, 1]
dot_product = (user_vector * wine_vector).sum(dim=-1)
# Scale to [0, 100]
scores = (dot_product + 1) * 50
return scores
def get_user_embedding(
self,
wine_embeddings: torch.Tensor,
ratings: torch.Tensor,
mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
"""Get user embedding for caching/batch scoring."""
return self.user_tower(wine_embeddings, ratings, mask)
def get_wine_embedding(
self,
wine_embedding: torch.Tensor,
categorical_features: torch.Tensor,
) -> torch.Tensor:
"""Get wine embedding for caching/batch scoring."""
return self.wine_tower(wine_embedding, categorical_features)
def score_from_embeddings(
self,
user_vector: torch.Tensor,
wine_vector: torch.Tensor,
) -> torch.Tensor:
"""Score from pre-computed tower embeddings."""
dot_product = (user_vector * wine_vector).sum(dim=-1)
return (dot_product + 1) * 50
# =========================================================================
# LEGACY SERIALIZATION (fallback when huggingface_hub not available)
# =========================================================================
def save(self, path: str) -> None:
"""Save model state dict to file."""
torch.save(
{
"state_dict": self.state_dict(),
"config": self.config,
},
path,
)
@classmethod
def load(cls, path: str) -> "TwoTowerModel":
"""Load model from file."""
checkpoint = torch.load(path, map_location="cpu")
model = cls(**checkpoint["config"])
model.load_state_dict(checkpoint["state_dict"])
model.eval()
return model
def to_bytes(self) -> bytes:
"""Serialize model to bytes for storage."""
buffer = io.BytesIO()
torch.save(
{
"state_dict": self.state_dict(),
"config": self.config,
},
buffer,
)
return buffer.getvalue()
@classmethod
def from_bytes(cls, data: bytes) -> "TwoTowerModel":
"""Load model from bytes."""
buffer = io.BytesIO(data)
checkpoint = torch.load(buffer, map_location="cpu")
model = cls(**checkpoint["config"])
model.load_state_dict(checkpoint["state_dict"])
model.eval()
return model
# =============================================================================
# TRAINING UTILITIES (for use with HuggingFace Spaces / AutoTrain)
# =============================================================================
def create_training_script() -> str:
"""
Generate a training script for HuggingFace Spaces AutoTrain.
This script can be uploaded to a HF Space for remote GPU training.
Usage:
autotrain spacerunner --project-name two-tower-training \\
--script-path script.py \\
--username swirl \\
--token $HF_TOKEN \\
--backend spaces-a10g-large
"""
script = '''
"""
Two-Tower Model Training Script for HuggingFace Spaces
Run with: autotrain spacerunner --script-path script.py
"""
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from huggingface_hub import login
import os
# Login to HF
login(token=os.environ.get("HF_TOKEN"))
from two_tower.model import TwoTowerModel
from two_tower.config import TRIPLET_MARGIN, LEARNING_RATE, BATCH_SIZE
class WineRecommendationDataset(Dataset):
"""Dataset of (user_wines, positive_wine, negative_wine) triplets."""
def __init__(self, triplets):
self.triplets = triplets
def __len__(self):
return len(self.triplets)
def __getitem__(self, idx):
return self.triplets[idx]
def train_model(
model: TwoTowerModel,
train_loader: DataLoader,
epochs: int = 10,
lr: float = LEARNING_RATE,
):
"""Train the two-tower model using triplet loss."""
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
triplet_loss = nn.TripletMarginLoss(margin=TRIPLET_MARGIN)
model.train()
for epoch in range(epochs):
total_loss = 0
for batch in train_loader:
optimizer.zero_grad()
# Get embeddings
anchor = model.get_user_embedding(batch["user_wines"], batch["ratings"])
positive = model.get_wine_embedding(batch["positive_wine"], batch["positive_cat"])
negative = model.get_wine_embedding(batch["negative_wine"], batch["negative_cat"])
# Compute triplet loss
loss = triplet_loss(anchor, positive, negative)
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")
return model
if __name__ == "__main__":
# Load training data (would be fetched from your database)
# triplets = load_training_triplets()
# Create model
model = TwoTowerModel()
# Train
# train_loader = DataLoader(WineRecommendationDataset(triplets), batch_size=BATCH_SIZE)
# model = train_model(model, train_loader, epochs=10)
# Push to Hub
model.push_to_hub("swirl/two-tower-recommender")
print("Model uploaded to HuggingFace Hub!")
'''
return script