""" Two-Tower Model Combined model with User Tower (Isengard) and Wine Tower (Mordor). Computes match score via dot product of normalized embeddings. Integrates with HuggingFace Hub for model upload/download via PyTorchModelHubMixin. """ import torch import torch.nn as nn from typing import Optional import io try: from huggingface_hub import PyTorchModelHubMixin HAS_HF_HUB = True except ImportError: # Fallback for environments without huggingface_hub PyTorchModelHubMixin = object HAS_HF_HUB = False from .user_tower import UserTower from .wine_tower import WineTower from .config import ( EMBEDDING_DIM, USER_VECTOR_DIM, WINE_VECTOR_DIM, HIDDEN_DIM, CATEGORICAL_ENCODING_DIM, ) class TwoTowerModel( nn.Module, PyTorchModelHubMixin, library_name="swirl-wine-recommendations", tags=["recommendation", "two-tower", "wine"], ): """ Two-Tower Recommendation Model Isengard (User Tower): Encodes user preferences from reviewed wines Mordor (Wine Tower): Encodes wine characteristics Score = dot_product(user_vector, wine_vector) * 100 Since both vectors are L2 normalized, the dot product is in [-1, 1], which we scale to [0, 100] for match percentage. HuggingFace Integration: # Upload to Hub model.push_to_hub("swirl/two-tower-recommender") # Load from Hub model = TwoTowerModel.from_pretrained("swirl/two-tower-recommender") """ def __init__( self, embedding_dim: int = EMBEDDING_DIM, hidden_dim: int = HIDDEN_DIM, output_dim: int = USER_VECTOR_DIM, categorical_dim: int = CATEGORICAL_ENCODING_DIM, ): super().__init__() assert USER_VECTOR_DIM == WINE_VECTOR_DIM, "Tower output dims must match" # Store config for serialization (required by PyTorchModelHubMixin) self.config = { "embedding_dim": embedding_dim, "hidden_dim": hidden_dim, "output_dim": output_dim, "categorical_dim": categorical_dim, } self.user_tower = UserTower( embedding_dim=embedding_dim, hidden_dim=hidden_dim, output_dim=output_dim, ) self.wine_tower = WineTower( embedding_dim=embedding_dim, categorical_dim=categorical_dim, hidden_dim=hidden_dim, output_dim=output_dim, ) def forward( self, user_wine_embeddings: torch.Tensor, user_ratings: torch.Tensor, candidate_wine_embedding: torch.Tensor, candidate_categorical: torch.Tensor, user_mask: Optional[torch.Tensor] = None, ) -> torch.Tensor: """ Forward pass computing match scores. Args: user_wine_embeddings: (batch, num_wines, 768) user_ratings: (batch, num_wines) candidate_wine_embedding: (batch, 768) candidate_categorical: (batch, categorical_dim) user_mask: (batch, num_wines) optional padding mask Returns: scores: (batch,) match scores in [0, 100] """ # Get user embedding from reviewed wines user_vector = self.user_tower(user_wine_embeddings, user_ratings, user_mask) # Get wine embedding wine_vector = self.wine_tower(candidate_wine_embedding, candidate_categorical) # Dot product (batch dot product) # Both vectors are normalized, so dot product is in [-1, 1] dot_product = (user_vector * wine_vector).sum(dim=-1) # Scale to [0, 100] scores = (dot_product + 1) * 50 return scores def get_user_embedding( self, wine_embeddings: torch.Tensor, ratings: torch.Tensor, mask: Optional[torch.Tensor] = None, ) -> torch.Tensor: """Get user embedding for caching/batch scoring.""" return self.user_tower(wine_embeddings, ratings, mask) def get_wine_embedding( self, wine_embedding: torch.Tensor, categorical_features: torch.Tensor, ) -> torch.Tensor: """Get wine embedding for caching/batch scoring.""" return self.wine_tower(wine_embedding, categorical_features) def score_from_embeddings( self, user_vector: torch.Tensor, wine_vector: torch.Tensor, ) -> torch.Tensor: """Score from pre-computed tower embeddings.""" dot_product = (user_vector * wine_vector).sum(dim=-1) return (dot_product + 1) * 50 # ========================================================================= # LEGACY SERIALIZATION (fallback when huggingface_hub not available) # ========================================================================= def save(self, path: str) -> None: """Save model state dict to file.""" torch.save( { "state_dict": self.state_dict(), "config": self.config, }, path, ) @classmethod def load(cls, path: str) -> "TwoTowerModel": """Load model from file.""" checkpoint = torch.load(path, map_location="cpu") model = cls(**checkpoint["config"]) model.load_state_dict(checkpoint["state_dict"]) model.eval() return model def to_bytes(self) -> bytes: """Serialize model to bytes for storage.""" buffer = io.BytesIO() torch.save( { "state_dict": self.state_dict(), "config": self.config, }, buffer, ) return buffer.getvalue() @classmethod def from_bytes(cls, data: bytes) -> "TwoTowerModel": """Load model from bytes.""" buffer = io.BytesIO(data) checkpoint = torch.load(buffer, map_location="cpu") model = cls(**checkpoint["config"]) model.load_state_dict(checkpoint["state_dict"]) model.eval() return model # ============================================================================= # TRAINING UTILITIES (for use with HuggingFace Spaces / AutoTrain) # ============================================================================= def create_training_script() -> str: """ Generate a training script for HuggingFace Spaces AutoTrain. This script can be uploaded to a HF Space for remote GPU training. Usage: autotrain spacerunner --project-name two-tower-training \\ --script-path script.py \\ --username swirl \\ --token $HF_TOKEN \\ --backend spaces-a10g-large """ script = ''' """ Two-Tower Model Training Script for HuggingFace Spaces Run with: autotrain spacerunner --script-path script.py """ import torch import torch.nn as nn from torch.utils.data import DataLoader, Dataset from huggingface_hub import login import os # Login to HF login(token=os.environ.get("HF_TOKEN")) from two_tower.model import TwoTowerModel from two_tower.config import TRIPLET_MARGIN, LEARNING_RATE, BATCH_SIZE class WineRecommendationDataset(Dataset): """Dataset of (user_wines, positive_wine, negative_wine) triplets.""" def __init__(self, triplets): self.triplets = triplets def __len__(self): return len(self.triplets) def __getitem__(self, idx): return self.triplets[idx] def train_model( model: TwoTowerModel, train_loader: DataLoader, epochs: int = 10, lr: float = LEARNING_RATE, ): """Train the two-tower model using triplet loss.""" optimizer = torch.optim.Adam(model.parameters(), lr=lr) triplet_loss = nn.TripletMarginLoss(margin=TRIPLET_MARGIN) model.train() for epoch in range(epochs): total_loss = 0 for batch in train_loader: optimizer.zero_grad() # Get embeddings anchor = model.get_user_embedding(batch["user_wines"], batch["ratings"]) positive = model.get_wine_embedding(batch["positive_wine"], batch["positive_cat"]) negative = model.get_wine_embedding(batch["negative_wine"], batch["negative_cat"]) # Compute triplet loss loss = triplet_loss(anchor, positive, negative) loss.backward() optimizer.step() total_loss += loss.item() print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}") return model if __name__ == "__main__": # Load training data (would be fetched from your database) # triplets = load_training_triplets() # Create model model = TwoTowerModel() # Train # train_loader = DataLoader(WineRecommendationDataset(triplets), batch_size=BATCH_SIZE) # model = train_model(model, train_loader, epochs=10) # Push to Hub model.push_to_hub("swirl/two-tower-recommender") print("Model uploaded to HuggingFace Hub!") ''' return script