| import math
|
|
|
| import torch
|
| import torch.nn as nn
|
| import torch.nn.functional as F
|
|
|
| from p410_environment import DspCampaign100Env
|
| from p450_functions import _normalize_vector
|
| from p420_bid_requests import generate_bid_requests
|
|
|
| budget = 10
|
| impression_max=11.888
|
| price_max=0.118
|
|
|
| MODEL_PATH = "d:\\proj\\theneuron\\tasks\\CS_155_ml_spotzi\\200_bidder_dqn_model_042_50_4.pt"
|
|
|
| class DQN(nn.Module):
|
|
|
| def __init__(self, n_observations, n_actions):
|
| super(DQN, self).__init__()
|
| self.layer1 = nn.Linear(n_observations, 128)
|
| self.layer2 = nn.Linear(128, 128)
|
| self.layer3 = nn.Linear(128, n_actions)
|
|
|
|
|
|
|
| def forward(self, x):
|
| x = F.relu(self.layer1(x))
|
| x = F.relu(self.layer2(x))
|
| return self.layer3(x)
|
|
|
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
| checkpoint = torch.load(MODEL_PATH, map_location=device, weights_only=False)
|
|
|
|
|
| policy_net = DQN(
|
| checkpoint["n_observations"],
|
| checkpoint["n_actions"]
|
| ).to(device)
|
|
|
| policy_net.load_state_dict(checkpoint["model_state_dict"])
|
| print("Model architecture loaded successfully")
|
| policy_net.eval()
|
| print("Model weights loaded successfully")
|
|
|
| print("Model loaded successfully")
|
|
|
| def choose_action(model, observation):
|
| with torch.no_grad():
|
| state = torch.tensor(
|
| observation,
|
| dtype=torch.float32,
|
| device=device
|
| ).unsqueeze(0)
|
|
|
| q_values = model(state)
|
| action = q_values.argmax(dim=1).item()
|
|
|
| return action
|
|
|
|
|
| desired_household_vector = _normalize_vector([0.5, 0.3, 0.2])
|
|
|
|
|
| desired_publiser_vector = _normalize_vector([0.1, 0.2, 0.7])
|
|
|
| desired_venue_type_vector = _normalize_vector([0.5, 0.3, 0.2])
|
| env = DspCampaign100Env(generate_bid_requests(3),
|
|
|
|
|
| desired_distributions={"publisher": desired_publiser_vector,
|
| "venue_type": desired_venue_type_vector,
|
| "household": desired_household_vector},
|
|
|
| budget=budget, impression_max=impression_max, price_max=price_max)
|
|
|
| state, _ = env.reset()
|
|
|
| sum_reward = 0.0
|
| while True:
|
| action = choose_action(policy_net, state)
|
|
|
| state, reward, terminated, truncated, _ = env.step(action)
|
|
|
| if not math.isnan(reward):
|
| sum_reward = sum_reward + reward
|
|
|
| if terminated or truncated:
|
| print("############# Budget used:", 1 - env.budget_left / env.initial_budget)
|
| print("############# sum_reward:", sum_reward)
|
| print("############# Desire distributions:", env.desired_distributions, " - probabilities")
|
| print("############# Real distributions:", env.current_distributions, " - amount in playouts")
|
| break
|
|
|
|
|
|
|
|
|
|
|