""" CTR / CVR prediction model. Given a user state vector and an ad embedding, predicts: P(click | user, ad) → CTR estimate P(convert | click, ad) → CVR estimate Used at inference time to annotate recommended ads with human-interpretable click and conversion probability estimates. The DQN Q-values handle the *long-term* reward signal; the CTR model provides *immediate* interpretability. """ import torch import torch.nn as nn class CTRModel(nn.Module): def __init__(self, state_dim: int, n_ads: int, ad_embed_dim: int = 16): super().__init__() self.ad_emb = nn.Embedding(n_ads, ad_embed_dim) in_dim = state_dim + ad_embed_dim self.net = nn.Sequential( nn.Linear(in_dim, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU(), nn.Linear(32, 2), # [logit_ctr, logit_cvr] ) def forward(self, state: torch.Tensor, ad_ids: torch.Tensor) -> torch.Tensor: """ state : (batch, state_dim) ad_ids : (batch,) or (1,) broadcast returns: (batch, 2) — [p_click, p_convert] """ ad_feat = self.ad_emb(ad_ids) x = torch.cat([state.expand(len(ad_ids), -1), ad_feat], dim=-1) return torch.sigmoid(self.net(x)) @torch.no_grad() def predict(self, state: torch.Tensor, ad_ids: list) -> list: """Return list of (p_click, p_convert) for each ad_id.""" ids = torch.tensor(ad_ids, dtype=torch.long) out = self.forward(state, ids) # (n_ads, 2) return [(float(row[0]), float(row[1])) for row in out]