AB_optimization

Sleeping

App Files Files Community

AB_optimization / app.py

Bernd-Ebenhoch

Update app.py

7ab7f7c over 2 years ago

raw

history blame contribute delete

5.65 kB

	# -- coding: utf-8 --
	"""
	Created on Mon May 1 07:55:45 2023

	@author: Bernd Ebenhoch
	"""


	import numpy as np
	import tensorflow as tf
	from tensorflow import keras
	import matplotlib.pyplot as plt
	import streamlit as st
	import copy
	plt.style.use('mystyle.mplstyle')

	# Defining the neural network as the agent to chose ad scheme A (0) or B (1)
	model = tf.keras.models.Sequential()
	model.add(tf.keras.layers.Dense(1, activation="sigmoid", input_shape=(1,)))
	model.summary()




	@tf.function()
	def action_selection(model):
	# Using GgradientTape to automatically build gradients with TensorFlow
	with tf.GradientTape() as tape:

	# As we have no information about the user viewer the ad,
	# the input in the neural network is always the same: 0
	output = model(np.array([[0.0]])) # [0 ... 1]

	# The output of the neural network is considered as probability for
	# taking action A (0) or B (1)
	# We compare the output with a uniform random variable
	# For example, if the output is 0.8,
	# we have 80% chance that random variable is smaller, taking action B (1)
	# and 20% chance that the random variable is larger, taking action A (0)
	action = (tf.random.uniform((1, 1)) < output) # [0 oder 1]

	# The loss value measures the difference between the output and the action
	loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(action, output))

	# We are creating the gradients [dloss/dw, dloss/db]
	grads = tape.gradient(loss, model.trainable_variables)
	return output, action, loss, grads


	st.markdown(
	'Simulate A/B optimization with policy gradient reinforcement learning')


	lr = float(st.text_input('Learning rate', value=0.5))

	prob_A = float(st.text_input('Click probability of ad A', value=0.3))

	prob_B = float(st.text_input('Click probability of ad B', value=0.4))

	steps = int(st.text_input('Number of ad impressions (steps)', value=1000))

	information_for_plotting = np.zeros((steps, 10))

	if st.button('Run the ad campaign and display the results'):

	with st.spinner('Simulating the ad campaign may take a few seconds ...'):

	for step in range(steps):

	# The neural network is used to choose the action
	# To display the learning progress, we also record the
	# model output, loss and gradients
	output, action, loss, grads = action_selection(model)

	# Next we are applying the action by displaying ad A or B
	# As we do not want to wait if a user clicks the ad,
	# we are simulating a click rate
	# Ad A has with 40% click rate a lower chance of being clicked
	# than Ad B with 50% click rate
	# We consider the click rate as a measure of the reward for training
	if action == False: # Action A
	reward = float(np.random.random() < prob_A)

	if action == True: # Action B
	reward = float(np.random.random() < prob_B)

	# The gradients obtained above are multiplied with the acquired reward
	# Gradients for actions that lead to clicks are kept unchanged,
	# whereas gradients for actions that do not lead to clicks are reversed
	grads_adjusted = []
	for var_index in range(len(model.trainable_variables)):
	grads_adjusted.append((reward-0.5)2 grads[var_index])

	# Using standard backpropagation, we apply the gradients to update the model parameters
	model.trainable_variables[0].assign(
	model.trainable_variables[0]-lr*grads_adjusted[0])
	model.trainable_variables[1].assign(
	model.trainable_variables[1]-lr*grads_adjusted[1])

	information_for_plotting[step, 0] = output.numpy()[0]
	information_for_plotting[step, 1] = action.numpy()[0].astype(int)
	information_for_plotting[step, 2] = loss
	information_for_plotting[step, 3] = grads[0]
	information_for_plotting[step, 4] = grads[1]
	information_for_plotting[step, 5] = reward
	information_for_plotting[step, 6] = grads_adjusted[0]
	information_for_plotting[step, 7] = grads_adjusted[1]
	information_for_plotting[step, 8] = copy.deepcopy(model.trainable_variables[0])
	information_for_plotting[step, 9] = copy.deepcopy(model.trainable_variables[1])

	# Plot the results
	titles = ['Model Output', 'Action', 'Loss', 'Gradients', 'Rewards',
	'Adjusted Gradients', 'Model Parameters']
	plus = [0, 0, 0, 0, 1, 1, 2]

	fig = plt.figure(figsize=(12, 26))
	fig.subplots(7, 1, sharex=True)
	for i in range(7):
	plt.subplot(7, 1, i+1)
	plt.subplots_adjust(hspace=.0)

	if i in [0, 1, 2, 4]:
	plt.plot(information_for_plotting[:, i+plus[i]])
	plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))

	else:
	plt.plot(information_for_plotting[:, i+1+plus[i]], label='Bias')
	plt.plot(information_for_plotting[:, i+plus[i]], label='Weight')

	plt.legend(loc="upper left")
	plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))
	plt.ylabel(titles[i])

	plt.xlabel('Step')
	plt.show()

	# Sum of the total clicks obtained
	st.markdown('Your ad campaign received **' +
	str(int(information_for_plotting[:, 5].sum())) + '** clicks in total.')

	st.pyplot(fig)