| """ |
| CTR / CVR prediction model. |
| |
| Given a user state vector and an ad embedding, predicts: |
| P(click | user, ad) → CTR estimate |
| P(convert | click, ad) → CVR estimate |
| |
| Used at inference time to annotate recommended ads with human-interpretable |
| click and conversion probability estimates. The DQN Q-values handle the |
| *long-term* reward signal; the CTR model provides *immediate* interpretability. |
| """ |
| import torch |
| import torch.nn as nn |
|
|
|
|
| class CTRModel(nn.Module): |
| def __init__(self, state_dim: int, n_ads: int, ad_embed_dim: int = 16): |
| super().__init__() |
| self.ad_emb = nn.Embedding(n_ads, ad_embed_dim) |
|
|
| in_dim = state_dim + ad_embed_dim |
| self.net = nn.Sequential( |
| nn.Linear(in_dim, 64), nn.ReLU(), |
| nn.Linear(64, 32), nn.ReLU(), |
| nn.Linear(32, 2), |
| ) |
|
|
| def forward(self, state: torch.Tensor, ad_ids: torch.Tensor) -> torch.Tensor: |
| """ |
| state : (batch, state_dim) |
| ad_ids : (batch,) or (1,) broadcast |
| returns: (batch, 2) — [p_click, p_convert] |
| """ |
| ad_feat = self.ad_emb(ad_ids) |
| x = torch.cat([state.expand(len(ad_ids), -1), ad_feat], dim=-1) |
| return torch.sigmoid(self.net(x)) |
|
|
| @torch.no_grad() |
| def predict(self, state: torch.Tensor, ad_ids: list) -> list: |
| """Return list of (p_click, p_convert) for each ad_id.""" |
| ids = torch.tensor(ad_ids, dtype=torch.long) |
| out = self.forward(state, ids) |
| return [(float(row[0]), float(row[1])) for row in out] |
|
|