Merge pull request #3 from app2scale/testing
Browse files- agent/backend/load.py +15 -0
- agent/backend/policy.py +36 -0
- agent/backend/reward.py +28 -0
- agent/dashboard/inference.py +180 -0
agent/backend/load.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class ConstantLoad():
|
| 2 |
+
'''Constant load profile'''
|
| 3 |
+
|
| 4 |
+
def __init__(self, load):
|
| 5 |
+
self.label = f"""Constant load: {load}"""
|
| 6 |
+
self.load = load
|
| 7 |
+
|
| 8 |
+
def __iter__(self):
|
| 9 |
+
self.step = 0
|
| 10 |
+
return self
|
| 11 |
+
|
| 12 |
+
def __next__(self):
|
| 13 |
+
self.step += 1
|
| 14 |
+
return self.load
|
| 15 |
+
|
agent/backend/policy.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from .utils import predict_dict
|
| 3 |
+
|
| 4 |
+
class Policy1():
|
| 5 |
+
'''Choose input for which max reward is obtained. Pseudocode:
|
| 6 |
+
|
| 7 |
+
* Determine the input features of the models a.k.a. state variables
|
| 8 |
+
* Determine all possible state combinations
|
| 9 |
+
* For each state, run the model, obtain the output and calculate reward
|
| 10 |
+
* Return the state giving the maximum reward
|
| 11 |
+
|
| 12 |
+
#### Formula
|
| 13 |
+
|
| 14 |
+
$$ \max_{state} reward(model(state)) $$
|
| 15 |
+
|
| 16 |
+
'''
|
| 17 |
+
|
| 18 |
+
def __init__(self):
|
| 19 |
+
self.label = """PolicyArgMax: finds the best state giving max reward """
|
| 20 |
+
|
| 21 |
+
def choose(self, model, ds, inputs, reward_fn):
|
| 22 |
+
input_df, output_df = predict_dict(model, ds, inputs)
|
| 23 |
+
|
| 24 |
+
print('Policy choose')
|
| 25 |
+
print(input_df)
|
| 26 |
+
print(output_df)
|
| 27 |
+
|
| 28 |
+
io_df = pd.concat([input_df, output_df],axis=1)
|
| 29 |
+
|
| 30 |
+
io_df['reward'] = io_df.apply(lambda row: reward_fn.calculate(row), axis=1)
|
| 31 |
+
print(io_df)
|
| 32 |
+
print(io_df.columns)
|
| 33 |
+
max_reward_index = io_df['reward'].argmax()
|
| 34 |
+
best_state = io_df.loc[max_reward_index].to_dict()
|
| 35 |
+
print('best state', best_state)
|
| 36 |
+
return best_state
|
agent/backend/reward.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Reward1():
|
| 2 |
+
'''Reward is simply CPU utilization, nothing more: higher cpu utilization, higher reward.
|
| 3 |
+
|
| 4 |
+
#### Formula
|
| 5 |
+
$$ r = c $$
|
| 6 |
+
where $r$ and $c$ are reward and cpu utilizations, respectively.
|
| 7 |
+
'''
|
| 8 |
+
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.label = """Reward1: prefer high cpu utilization"""
|
| 11 |
+
|
| 12 |
+
def calculate(self, state):
|
| 13 |
+
return state["cpu_usage"]
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class Reward2():
|
| 17 |
+
'''Reward is the negative of CPU utilization: hence lower cpu utilization, higher reward.
|
| 18 |
+
|
| 19 |
+
#### Formula
|
| 20 |
+
$$ r = - c $$
|
| 21 |
+
where $r$ and $c$ are reward and cpu utilizations, respectively.
|
| 22 |
+
'''
|
| 23 |
+
|
| 24 |
+
def __init__(self):
|
| 25 |
+
self.label = """Reward2: prefer low cpu utilization"""
|
| 26 |
+
|
| 27 |
+
def calculate(self, state):
|
| 28 |
+
return -state["cpu_usage"]
|
agent/dashboard/inference.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import solara
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from ..backend.reward import Reward1, Reward2
|
| 4 |
+
from ..backend.policy import Policy1
|
| 5 |
+
from ..backend.load import ConstantLoad
|
| 6 |
+
from .training import local_state as training_state
|
| 7 |
+
from .data import state as data_state
|
| 8 |
+
from ..backend.utils import predict_dict, Plot1D
|
| 9 |
+
|
| 10 |
+
local_state = solara.reactive(
|
| 11 |
+
{
|
| 12 |
+
'render_count': solara.reactive(0),
|
| 13 |
+
'inference_plot_data': solara.reactive({}),
|
| 14 |
+
|
| 15 |
+
}
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
reward_objects = [Reward1(), Reward2()]
|
| 19 |
+
reward_labels = [r.label for r in reward_objects]
|
| 20 |
+
selected_reward_label = solara.reactive(reward_labels[0])
|
| 21 |
+
|
| 22 |
+
policy_objects = [Policy1()]
|
| 23 |
+
policy_labels = [p.label for p in policy_objects]
|
| 24 |
+
selected_policy_label = solara.reactive(policy_labels[0])
|
| 25 |
+
|
| 26 |
+
load_objects = [ConstantLoad(24), ConstantLoad(72), ConstantLoad(168)]
|
| 27 |
+
load_labels = [p.label for p in load_objects]
|
| 28 |
+
selected_load_label = solara.reactive(load_labels[0])
|
| 29 |
+
|
| 30 |
+
nsteps = solara.reactive(10)
|
| 31 |
+
|
| 32 |
+
inference_history = solara.reactive({})
|
| 33 |
+
|
| 34 |
+
def force_render():
|
| 35 |
+
local_state.value['render_count'].set(1 + local_state.value['render_count'].value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@solara.component
|
| 41 |
+
def InferencePlots(render_count):
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def execute():
|
| 46 |
+
#print(selected_policy_label, selected_reward_label, selected_load_label)
|
| 47 |
+
|
| 48 |
+
model = training_state.value['model'].value
|
| 49 |
+
input_cols = training_state.value['input_cols'].value
|
| 50 |
+
output_cols = training_state.value['output_cols'].value
|
| 51 |
+
chosen_load_index = load_labels.index(selected_load_label.value)
|
| 52 |
+
chosen_load = load_objects[chosen_load_index]
|
| 53 |
+
chosen_policy_index = policy_labels.index(selected_policy_label.value)
|
| 54 |
+
chosen_policy = policy_objects[chosen_policy_index]
|
| 55 |
+
chosen_reward_index = reward_labels.index(selected_reward_label.value)
|
| 56 |
+
chosen_reward = reward_objects[chosen_reward_index]
|
| 57 |
+
ds = training_state.value['ds'].value
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
df = ds.df
|
| 61 |
+
# get all possible values for inputs
|
| 62 |
+
input = {col: list(pd.unique(df[col])) for col in input_cols}
|
| 63 |
+
|
| 64 |
+
# Step through load profile
|
| 65 |
+
load_profile = chosen_load
|
| 66 |
+
step = 0
|
| 67 |
+
cur_hist = {}
|
| 68 |
+
for load in load_profile:
|
| 69 |
+
if step > nsteps.value:
|
| 70 |
+
break
|
| 71 |
+
# the model uses load as an input, supply with it
|
| 72 |
+
if 'expected_tps' in input.keys():
|
| 73 |
+
input['expected_tps'] = [load]
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
best_state = chosen_policy.choose(model, ds, input, chosen_reward)
|
| 77 |
+
for state, value in best_state.items():
|
| 78 |
+
if state in cur_hist.keys():
|
| 79 |
+
cur_hist[state]['y'].append(value)
|
| 80 |
+
cur_hist[state]['x'].append(step)
|
| 81 |
+
cur_hist[state]['title'] = state
|
| 82 |
+
cur_hist[state]['xlabel'] = 'step'
|
| 83 |
+
cur_hist[state]['ylabel'] = state
|
| 84 |
+
else:
|
| 85 |
+
cur_hist[state] = {}
|
| 86 |
+
cur_hist[state]['y'] = [value]
|
| 87 |
+
cur_hist[state]['x'] = [step]
|
| 88 |
+
cur_hist[state]['title'] = state
|
| 89 |
+
cur_hist[state]['xlabel'] = 'step'
|
| 90 |
+
cur_hist[state]['ylabel'] = state
|
| 91 |
+
#print(cur_hist)
|
| 92 |
+
|
| 93 |
+
local_state.value['inference_plot_data'].set(cur_hist)
|
| 94 |
+
force_render()
|
| 95 |
+
|
| 96 |
+
step += 1
|
| 97 |
+
#print(local_state.value['inference_plot_data'].value)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
solara.InputInt(label='Number of steps', value=nsteps.value, on_value=nsteps.set)
|
| 101 |
+
model = training_state.value['model'].value
|
| 102 |
+
if model is None:
|
| 103 |
+
solara.Warning("Model is not ready yet!")
|
| 104 |
+
|
| 105 |
+
solara.Button(label="Execute", on_click=execute, disabled=model is None)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
#print('Interence plots')
|
| 109 |
+
for col, content in local_state.value['inference_plot_data'].value.items():
|
| 110 |
+
options = {
|
| 111 |
+
'title': {
|
| 112 |
+
'text': content['title'],
|
| 113 |
+
'left': 'center'},
|
| 114 |
+
'tooltip': {
|
| 115 |
+
'trigger': 'axis',
|
| 116 |
+
'axisPointer': {
|
| 117 |
+
'type': 'cross'
|
| 118 |
+
}
|
| 119 |
+
},
|
| 120 |
+
'xAxis': {
|
| 121 |
+
'axisTick': {
|
| 122 |
+
'alignWithLabel': True
|
| 123 |
+
},
|
| 124 |
+
'data': content['x'],
|
| 125 |
+
'name': content['xlabel'],
|
| 126 |
+
'nameLocation': 'middle',
|
| 127 |
+
'nameTextStyle': {'verticalAlign': 'top','padding': [10, 0, 0, 0]}
|
| 128 |
+
},
|
| 129 |
+
'yAxis': [
|
| 130 |
+
{
|
| 131 |
+
'type': 'value',
|
| 132 |
+
'name': content['ylabel'],
|
| 133 |
+
'position': 'left',
|
| 134 |
+
'alignTicks': True,
|
| 135 |
+
'axisLine': {
|
| 136 |
+
'show': True,
|
| 137 |
+
'lineStyle': {'color': 'green'}}
|
| 138 |
+
},
|
| 139 |
+
],
|
| 140 |
+
'series': [
|
| 141 |
+
{
|
| 142 |
+
'name': content['ylabel'],
|
| 143 |
+
'data': content['y'],
|
| 144 |
+
'type': 'line',
|
| 145 |
+
'yAxisIndex': 0
|
| 146 |
+
},
|
| 147 |
+
],
|
| 148 |
+
}
|
| 149 |
+
solara.FigureEcharts(option=options)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
@solara.component
|
| 153 |
+
def Page():
|
| 154 |
+
solara.Title("Inference")
|
| 155 |
+
with solara.Sidebar():
|
| 156 |
+
with solara.lab.Tabs():
|
| 157 |
+
with solara.lab.Tab("REWARD"):
|
| 158 |
+
with solara.Card(title="Reward Selection", subtitle="Choose an appropriate reward from the list."):
|
| 159 |
+
solara.Select(label="choose reward", value=selected_reward_label.value, values=reward_labels,
|
| 160 |
+
on_value=selected_reward_label.set)
|
| 161 |
+
chosen_reward_index = reward_labels.index(selected_reward_label.value)
|
| 162 |
+
chosen_reward = reward_objects[chosen_reward_index]
|
| 163 |
+
solara.Markdown(md_text=chosen_reward.__doc__)
|
| 164 |
+
|
| 165 |
+
with solara.lab.Tab("POLICY"):
|
| 166 |
+
with solara.Card(title="Policy Selection", subtitle="Choose an appropriate policy from the list."):
|
| 167 |
+
solara.Select(label="choose policy", value=selected_policy_label.value, values=policy_labels,
|
| 168 |
+
on_value=selected_policy_label.set)
|
| 169 |
+
chosen_policy_index = policy_labels.index(selected_policy_label.value)
|
| 170 |
+
chosen_policy = policy_objects[chosen_policy_index]
|
| 171 |
+
solara.Markdown(md_text=chosen_policy.__doc__)
|
| 172 |
+
|
| 173 |
+
with solara.lab.Tab("LOAD"):
|
| 174 |
+
with solara.Card(title="Load Profile Selection", subtitle="Choose an appropriate load profile from the list."):
|
| 175 |
+
solara.Select(label="choose load profile", value=selected_load_label.value, values=load_labels,
|
| 176 |
+
on_value=selected_load_label.set)
|
| 177 |
+
chosen_load_index = load_labels.index(selected_load_label.value)
|
| 178 |
+
chosen_load = load_objects[chosen_load_index]
|
| 179 |
+
solara.Markdown(md_text=chosen_load.__doc__)
|
| 180 |
+
InferencePlots(local_state.value['render_count'].value)
|