DSP_Bidder_4_rules / p440_bidder_inference.py
StanislavKo28's picture
Upload 5 files
7e14863 verified
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from p410_environment import DspCampaign100Env
from p450_functions import _normalize_vector
from p420_bid_requests import generate_bid_requests
budget = 10
impression_max=11.888
price_max=0.118
MODEL_PATH = "d:\\proj\\theneuron\\tasks\\CS_155_ml_spotzi\\200_bidder_dqn_model_042_50_4.pt"
class DQN(nn.Module):
def __init__(self, n_observations, n_actions):
super(DQN, self).__init__()
self.layer1 = nn.Linear(n_observations, 128)
self.layer2 = nn.Linear(128, 128)
self.layer3 = nn.Linear(128, n_actions)
# Called with either one element to determine next action, or a batch
# during optimization. Returns tensor([[left0exp,right0exp]...]).
def forward(self, x):
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
return self.layer3(x)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load checkpoint
checkpoint = torch.load(MODEL_PATH, map_location=device, weights_only=False)
# Recreate model
policy_net = DQN(
checkpoint["n_observations"],
checkpoint["n_actions"]
).to(device)
policy_net.load_state_dict(checkpoint["model_state_dict"])
print("Model architecture loaded successfully")
policy_net.eval() # VERY IMPORTANT (turns off dropout/batchnorm if any)
print("Model weights loaded successfully")
print("Model loaded successfully")
def choose_action(model, observation):
with torch.no_grad():
state = torch.tensor(
observation,
dtype=torch.float32,
device=device
).unsqueeze(0)
q_values = model(state)
action = q_values.argmax(dim=1).item()
return action
# desired_household_vector = _normalize_vector([random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)])
desired_household_vector = _normalize_vector([0.5, 0.3, 0.2])
# desired_income_vector = _normalize_vector([random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)])
# desired_publiser_vector = _normalize_vector([random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)])
desired_publiser_vector = _normalize_vector([0.1, 0.2, 0.7])
# desired_venue_type_vector = _normalize_vector([random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)])
desired_venue_type_vector = _normalize_vector([0.5, 0.3, 0.2])
env = DspCampaign100Env(generate_bid_requests(3),
# desired_distributions={"household": desired_household_vector, "income": desired_income_vector},
# desired_distributions={"publisher": desired_publiser_vector, "venue_type": desired_venue_type_vector},
desired_distributions={"publisher": desired_publiser_vector,
"venue_type": desired_venue_type_vector,
"household": desired_household_vector},
# desired_distributions={"publisher": desired_publiser_vector},
budget=budget, impression_max=impression_max, price_max=price_max)
state, _ = env.reset()
sum_reward = 0.0
while True:
action = choose_action(policy_net, state)
state, reward, terminated, truncated, _ = env.step(action)
if not math.isnan(reward):
sum_reward = sum_reward + reward
if terminated or truncated:
print("############# Budget used:", 1 - env.budget_left / env.initial_budget)
print("############# sum_reward:", sum_reward)
print("############# Desire distributions:", env.desired_distributions, " - probabilities")
print("############# Real distributions:", env.current_distributions, " - amount in playouts")
break