hkayabilisim commited on
Commit
7e9ee6e
·
1 Parent(s): 9d0ef93

features: inference mechanism

Browse files
agent/assets/custom.css ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ header {
2
+ background-image: url('/static/public/app2scale-logo.png'); /* Replace 'your-image.jpg' with the path to your image file */
3
+ background-position: center; /* Center the background image */
4
+ background-repeat: no-repeat; /* Prevent image from repeating */
5
+ }
agent/assets/theme2.js ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ vuetifyThemes = {
2
+ light: {
3
+ primary: '#1c4220',
4
+ }
5
+ }
agent/backend/load.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  class ConstantLoad():
2
  '''Constant load profile'''
3
 
@@ -13,3 +15,18 @@ class ConstantLoad():
13
  self.step += 1
14
  return self.load
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
  class ConstantLoad():
4
  '''Constant load profile'''
5
 
 
15
  self.step += 1
16
  return self.load
17
 
18
+ class SinusLoad():
19
+ '''Periodic load profile'''
20
+
21
+ def __init__(self, amplitude, period):
22
+ self.label = f"""Sinusodial load"""
23
+ self.amplitude = amplitude
24
+ self.period = period
25
+
26
+ def __iter__(self):
27
+ self.step = 0
28
+ return self
29
+
30
+ def __next__(self):
31
+ self.step += 1
32
+ return max(0, self.amplitude + self.amplitude * np.sin(2 * np.pi * (self.step % self.period) / self.period ))
agent/backend/policy.py CHANGED
@@ -1,7 +1,7 @@
1
  import pandas as pd
2
  from .utils import predict_dict
3
 
4
- class Policy1():
5
  '''Choose input for which max reward is obtained. Pseudocode:
6
 
7
  * Determine the input features of the models a.k.a. state variables
@@ -16,21 +16,40 @@ $$ \max_{state} reward(model(state)) $$
16
  '''
17
 
18
  def __init__(self):
19
- self.label = """PolicyArgMax: finds the best state giving max reward """
20
 
21
- def choose(self, model, ds, inputs, reward_fn):
22
- input_df, output_df = predict_dict(model, ds, inputs)
 
 
23
 
24
- print('Policy choose')
25
- print(input_df)
26
- print(output_df)
27
-
28
  io_df = pd.concat([input_df, output_df],axis=1)
29
 
30
  io_df['reward'] = io_df.apply(lambda row: reward_fn.calculate(row), axis=1)
31
- print(io_df)
32
- print(io_df.columns)
33
  max_reward_index = io_df['reward'].argmax()
34
- best_state = io_df.loc[max_reward_index].to_dict()
35
- print('best state', best_state)
36
- return best_state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  from .utils import predict_dict
3
 
4
+ class PolicyArgMax():
5
  '''Choose input for which max reward is obtained. Pseudocode:
6
 
7
  * Determine the input features of the models a.k.a. state variables
 
16
  '''
17
 
18
  def __init__(self):
19
+ self.label = """PolicyArgMax: pursue max reward """
20
 
21
+ def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn):
22
+ print(input_ranges)
23
+
24
+ input_df, output_df = predict_dict(model, ds, input_ranges)
25
 
 
 
 
 
26
  io_df = pd.concat([input_df, output_df],axis=1)
27
 
28
  io_df['reward'] = io_df.apply(lambda row: reward_fn.calculate(row), axis=1)
 
 
29
  max_reward_index = io_df['reward'].argmax()
30
+ next_state = io_df.loc[max_reward_index].to_dict()
31
+ print('next state', next_state)
32
+ return next_state
33
+
34
+
35
+ class PolicyHPA():
36
+ '''Increase/decrease replica if cpu usage is above/below threshold
37
+
38
+ **Remarks**
39
+
40
+ * reward functions are not used.
41
+ '''
42
+
43
+ def __init__(self, threshold = 0.4):
44
+ self.label = f"""Kubernetes HPA (cpu threshold={threshold})"""
45
+ self.threshold = threshold
46
+
47
+ def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn):
48
+ next_state = cur_state.copy()
49
+
50
+ if cur_metrics['cpu_usage'] > self.threshold:
51
+ next_state['replica'] = min(cur_state['replica'] + 1, max(input_ranges['replica']))
52
+ elif cur_metrics['cpu_usage'] < self.threshold:
53
+ next_state['replica'] = max(cur_state['replica'] - 1, min(input_ranges['replica']))
54
+
55
+ return next_state
agent/backend/reward.py CHANGED
@@ -1,28 +1,34 @@
1
- class Reward1():
2
- '''Reward is simply CPU utilization, nothing more: higher cpu utilization, higher reward.
 
 
 
3
 
4
  #### Formula
5
  $$ r = c $$
6
- where $r$ and $c$ are reward and cpu utilizations, respectively.
7
  '''
8
 
9
  def __init__(self):
10
- self.label = """Reward1: prefer high cpu utilization"""
11
 
12
  def calculate(self, state):
13
  return state["cpu_usage"]
14
 
 
 
15
 
16
- class Reward2():
17
- '''Reward is the negative of CPU utilization: hence lower cpu utilization, higher reward.
18
 
19
  #### Formula
20
  $$ r = - c $$
21
- where $r$ and $c$ are reward and cpu utilizations, respectively.
22
  '''
23
 
24
  def __init__(self):
25
- self.label = """Reward2: prefer low cpu utilization"""
26
 
27
  def calculate(self, state):
28
- return -state["cpu_usage"]
 
 
1
+ class RewardHighCPUUsage():
2
+ '''Promotes high CPU usage.
3
+
4
+ Reward is simply equivalent to cpu usage. When there is more than one pod running,
5
+ the cpu usage is the average cpu usage over pods.
6
 
7
  #### Formula
8
  $$ r = c $$
9
+ where $r$ and $c$ are reward and cpu usage, respectively.
10
  '''
11
 
12
  def __init__(self):
13
+ self.label = """RewardHighCPUUsage"""
14
 
15
  def calculate(self, state):
16
  return state["cpu_usage"]
17
 
18
+ class RewardLowCPUUsage():
19
+ '''Promotes low CPU usage.
20
 
21
+ Reward is simply equivalent to the negative of cpu usage. When there is more than one pod running,
22
+ the cpu usage is the average cpu usage over pods.
23
 
24
  #### Formula
25
  $$ r = - c $$
26
+ where $r$ and $c$ are reward and cpu usage, respectively.
27
  '''
28
 
29
  def __init__(self):
30
+ self.label = """RewardLowCPUUsage"""
31
 
32
  def calculate(self, state):
33
+ return -state["cpu_usage"]
34
+
agent/backend/utils.py CHANGED
@@ -36,7 +36,27 @@ def predict_dict(model, ds, inputs: Dict[str, Union[List[int], List[float]]]):
36
  #print(output_df_transformed)
37
  return input_df, output_df_transformed
38
 
39
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  def train(ds: ExplorationDataset, model_name, trn_ratio,
42
  batch_size_trn, batch_size_val, optimizer_name, learning_rate,
 
36
  #print(output_df_transformed)
37
  return input_df, output_df_transformed
38
 
39
+ def estimate_metrics(model, ds, cur_state):
40
+ input_ranges = {key: [value] for key, value in cur_state.items()}
41
+ input_df, output_df = predict_dict(model, ds, input_ranges)
42
+ est_metrics = {metric: output_df.loc[0,metric] for metric in output_df.columns}
43
+ return est_metrics
44
+
45
+ def read_metrics(df, cur_state):
46
+ cols = list(df.columns)
47
+
48
+ dff = df[cols]
49
+ #for col, value in cur_state.items():
50
+ # print(f'{col} = {value}', dff.columns)
51
+ # dff = dff.query(f'{col} = {value}')
52
+ dff = dff.query('replica == 1')
53
+ output_df = dff.sample(1)
54
+ for feature in ['replica','cpu','expected_tps']:
55
+ if feature in cols:
56
+ cols.remove(feature)
57
+ output_df = output_df[cols].reset_index(drop=True)
58
+ metrics = {metric: output_df.loc[0,metric] for metric in output_df.columns}
59
+ return metrics
60
 
61
  def train(ds: ExplorationDataset, model_name, trn_ratio,
62
  batch_size_trn, batch_size_val, optimizer_name, learning_rate,
agent/dashboard/data.py CHANGED
@@ -7,6 +7,7 @@ import numpy as np
7
  def read_data():
8
  df = pd.read_csv('agent/data/averaged_full_state_data.csv')
9
  df = df.infer_objects()
 
10
  for col in df.columns:
11
  if df.dtypes[col] == np.float64:
12
  df[col] = df[col].apply(lambda x: round(x, 6))
@@ -88,6 +89,7 @@ def DataViewer(df):
88
  size_max=state.value['size_max'].value,
89
  log_x=state.value['logx'].value,
90
  log_y=state.value['logy'].value,
 
91
  )
92
  else:
93
  solara.Warning("Select x and y columns")
 
7
  def read_data():
8
  df = pd.read_csv('agent/data/averaged_full_state_data.csv')
9
  df = df.infer_objects()
10
+ df['step'] = df.index
11
  for col in df.columns:
12
  if df.dtypes[col] == np.float64:
13
  df[col] = df[col].apply(lambda x: round(x, 6))
 
89
  size_max=state.value['size_max'].value,
90
  log_x=state.value['logx'].value,
91
  log_y=state.value['logy'].value,
92
+ width=800,
93
  )
94
  else:
95
  solara.Warning("Select x and y columns")
agent/dashboard/inference.py CHANGED
@@ -1,11 +1,11 @@
1
  import solara
2
  import pandas as pd
3
- from ..backend.reward import Reward1, Reward2
4
- from ..backend.policy import Policy1
5
- from ..backend.load import ConstantLoad
6
  from .training import local_state as training_state
7
  from .data import state as data_state
8
- from ..backend.utils import predict_dict, Plot1D
9
 
10
  local_state = solara.reactive(
11
  {
@@ -15,19 +15,23 @@ local_state = solara.reactive(
15
  }
16
  )
17
 
18
- reward_objects = [Reward1(), Reward2()]
19
  reward_labels = [r.label for r in reward_objects]
20
  selected_reward_label = solara.reactive(reward_labels[0])
21
 
22
- policy_objects = [Policy1()]
23
  policy_labels = [p.label for p in policy_objects]
24
  selected_policy_label = solara.reactive(policy_labels[0])
25
 
26
- load_objects = [ConstantLoad(24), ConstantLoad(72), ConstantLoad(168)]
27
  load_labels = [p.label for p in load_objects]
28
  selected_load_label = solara.reactive(load_labels[0])
29
 
30
  nsteps = solara.reactive(10)
 
 
 
 
31
 
32
  inference_history = solara.reactive({})
33
 
@@ -53,28 +57,38 @@ def InferencePlots(render_count):
53
  chosen_policy_index = policy_labels.index(selected_policy_label.value)
54
  chosen_policy = policy_objects[chosen_policy_index]
55
  chosen_reward_index = reward_labels.index(selected_reward_label.value)
56
- chosen_reward = reward_objects[chosen_reward_index]
57
  ds = training_state.value['ds'].value
58
 
59
 
60
  df = ds.df
 
61
  # get all possible values for inputs
62
- input = {col: list(pd.unique(df[col])) for col in input_cols}
63
 
64
  # Step through load profile
65
  load_profile = chosen_load
66
  step = 0
67
  cur_hist = {}
 
 
68
  for load in load_profile:
69
  if step > nsteps.value:
70
  break
71
  # the model uses load as an input, supply with it
72
- if 'expected_tps' in input.keys():
73
- input['expected_tps'] = [load]
74
-
75
-
76
- best_state = chosen_policy.choose(model, ds, input, chosen_reward)
77
- for state, value in best_state.items():
 
 
 
 
 
 
 
78
  if state in cur_hist.keys():
79
  cur_hist[state]['y'].append(value)
80
  cur_hist[state]['x'].append(step)
@@ -88,16 +102,29 @@ def InferencePlots(render_count):
88
  cur_hist[state]['title'] = state
89
  cur_hist[state]['xlabel'] = 'step'
90
  cur_hist[state]['ylabel'] = state
91
- #print(cur_hist)
92
-
93
  local_state.value['inference_plot_data'].set(cur_hist)
94
  force_render()
95
 
 
 
 
 
 
 
96
  step += 1
97
- #print(local_state.value['inference_plot_data'].value)
98
 
99
 
100
- solara.InputInt(label='Number of steps', value=nsteps.value, on_value=nsteps.set)
 
 
 
 
 
 
 
 
 
 
101
  model = training_state.value['model'].value
102
  if model is None:
103
  solara.Warning("Model is not ready yet!")
@@ -106,48 +133,50 @@ def InferencePlots(render_count):
106
 
107
 
108
  #print('Interence plots')
109
- for col, content in local_state.value['inference_plot_data'].value.items():
110
- options = {
111
- 'title': {
112
- 'text': content['title'],
113
- 'left': 'center'},
114
- 'tooltip': {
115
- 'trigger': 'axis',
116
- 'axisPointer': {
117
- 'type': 'cross'
118
- }
119
- },
120
- 'xAxis': {
121
- 'axisTick': {
122
- 'alignWithLabel': True
123
  },
124
- 'data': content['x'],
125
- 'name': content['xlabel'],
126
- 'nameLocation': 'middle',
127
- 'nameTextStyle': {'verticalAlign': 'top','padding': [10, 0, 0, 0]}
128
- },
129
- 'yAxis': [
130
- {
131
- 'type': 'value',
132
- 'name': content['ylabel'],
133
- 'position': 'left',
134
- 'alignTicks': True,
135
- 'axisLine': {
136
- 'show': True,
137
- 'lineStyle': {'color': 'green'}}
138
  },
139
- ],
140
- 'series': [
141
- {
142
- 'name': content['ylabel'],
143
- 'data': content['y'],
144
- 'type': 'line',
145
- 'yAxisIndex': 0
146
- },
147
- ],
148
- }
149
- solara.FigureEcharts(option=options)
150
-
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  @solara.component
153
  def Page():
 
1
  import solara
2
  import pandas as pd
3
+ from ..backend.reward import RewardHighCPUUsage, RewardLowCPUUsage
4
+ from ..backend.policy import PolicyArgMax, PolicyHPA
5
+ from ..backend.load import ConstantLoad, SinusLoad
6
  from .training import local_state as training_state
7
  from .data import state as data_state
8
+ from ..backend.utils import estimate_metrics, read_metrics
9
 
10
  local_state = solara.reactive(
11
  {
 
15
  }
16
  )
17
 
18
+ reward_objects = [RewardHighCPUUsage(), RewardLowCPUUsage()]
19
  reward_labels = [r.label for r in reward_objects]
20
  selected_reward_label = solara.reactive(reward_labels[0])
21
 
22
+ policy_objects = [PolicyArgMax(), PolicyHPA(0.2), PolicyHPA(0.4), PolicyHPA(0.6), PolicyHPA(0.8)]
23
  policy_labels = [p.label for p in policy_objects]
24
  selected_policy_label = solara.reactive(policy_labels[0])
25
 
26
+ load_objects = [ConstantLoad(24), ConstantLoad(72), ConstantLoad(168), SinusLoad(180, 100)]
27
  load_labels = [p.label for p in load_objects]
28
  selected_load_label = solara.reactive(load_labels[0])
29
 
30
  nsteps = solara.reactive(10)
31
+ initial_replica = solara.reactive(1)
32
+ initial_cpu = solara.reactive(4)
33
+
34
+ use_model_to_estimate_metrics = solara.reactive(False)
35
 
36
  inference_history = solara.reactive({})
37
 
 
57
  chosen_policy_index = policy_labels.index(selected_policy_label.value)
58
  chosen_policy = policy_objects[chosen_policy_index]
59
  chosen_reward_index = reward_labels.index(selected_reward_label.value)
60
+ chosen_reward_fn = reward_objects[chosen_reward_index]
61
  ds = training_state.value['ds'].value
62
 
63
 
64
  df = ds.df
65
+ print(df.columns)
66
  # get all possible values for inputs
67
+ input_ranges = {col: list(pd.unique(df[col])) for col in input_cols}
68
 
69
  # Step through load profile
70
  load_profile = chosen_load
71
  step = 0
72
  cur_hist = {}
73
+ replica = initial_replica.value
74
+ cpu = initial_cpu.value
75
  for load in load_profile:
76
  if step > nsteps.value:
77
  break
78
  # the model uses load as an input, supply with it
79
+ if 'expected_tps' in input_ranges.keys():
80
+ input_ranges['expected_tps'] = [load]
81
+
82
+ cur_state = {"replica": replica, "cpu": cpu, "expected_tps": load}
83
+
84
+ if use_model_to_estimate_metrics.value:
85
+ cur_metrics = estimate_metrics(model, ds, cur_state)
86
+ print(cur_metrics)
87
+ else:
88
+ cur_metrics = read_metrics(df, cur_state)
89
+
90
+ combined_data = cur_state | cur_metrics
91
+ for state, value in combined_data.items():
92
  if state in cur_hist.keys():
93
  cur_hist[state]['y'].append(value)
94
  cur_hist[state]['x'].append(step)
 
102
  cur_hist[state]['title'] = state
103
  cur_hist[state]['xlabel'] = 'step'
104
  cur_hist[state]['ylabel'] = state
 
 
105
  local_state.value['inference_plot_data'].set(cur_hist)
106
  force_render()
107
 
108
+ next_state = chosen_policy.choose(model, ds, input_ranges, cur_state, cur_metrics, chosen_reward_fn)
109
+ if 'replica' in next_state.keys():
110
+ replica = next_state['replica']
111
+ if 'cpu' in next_state.keys():
112
+ cpu = next_state['cpu']
113
+
114
  step += 1
 
115
 
116
 
117
+ with solara.Row():
118
+ solara.InputInt(label='Number of steps', value=nsteps.value, on_value=nsteps.set)
119
+ solara.InputInt(label="Initial replica", value=initial_replica)
120
+ solara.InputInt(label="Initial CPU", value=initial_cpu)
121
+ if set(training_state.value['input_cols'].value) == set(['replica','cpu','expected_tps']):
122
+ solara.Checkbox(label='Use twin model to estimate metrics', value=use_model_to_estimate_metrics)
123
+ else:
124
+ with solara.Column():
125
+ solara.Checkbox(label='Use twin model to estimate metrics', value=use_model_to_estimate_metrics, disabled=True)
126
+ solara.Info('twin model is not suitable for metric estimation')
127
+
128
  model = training_state.value['model'].value
129
  if model is None:
130
  solara.Warning("Model is not ready yet!")
 
133
 
134
 
135
  #print('Interence plots')
136
+ with solara.ColumnsResponsive():
137
+ for col, content in local_state.value['inference_plot_data'].value.items():
138
+ options = {
139
+ 'title': {
140
+ 'text': content['title'],
141
+ 'left': 'center'},
142
+ 'tooltip': {
143
+ 'trigger': 'axis',
144
+ 'axisPointer': {
145
+ 'type': 'cross'
146
+ }
 
 
 
147
  },
148
+ 'xAxis': {
149
+ 'axisTick': {
150
+ 'alignWithLabel': True
151
+ },
152
+ 'data': content['x'],
153
+ 'name': content['xlabel'],
154
+ 'nameLocation': 'middle',
155
+ 'nameTextStyle': {'verticalAlign': 'top','padding': [10, 0, 0, 0]}
 
 
 
 
 
 
156
  },
157
+ 'yAxis': [
158
+ {
159
+ 'type': 'value',
160
+ 'name': content['ylabel'],
161
+ 'position': 'left',
162
+ 'alignTicks': True,
163
+ 'axisLine': {
164
+ 'show': True,
165
+ 'lineStyle': {'color': 'green'}}
166
+ },
167
+ ],
168
+ 'series': [
169
+ {
170
+ 'name': content['ylabel'],
171
+ 'data': content['y'],
172
+ 'type': 'line',
173
+ 'yAxisIndex': 0
174
+ },
175
+ ],
176
+ }
177
+ solara.FigureEcharts(option=options, attributes={"style": "height: 300px; width: 300px"})
178
+
179
+
180
 
181
  @solara.component
182
  def Page():
agent/dashboard/training.py CHANGED
@@ -9,7 +9,7 @@ from ..backend.loss import loss_mape
9
 
10
  local_state = solara.reactive(
11
  {
12
- 'input_cols': solara.reactive(['replica']),
13
  'output_cols': solara.reactive(['cpu_usage']),
14
  'trn_ratio' : solara.reactive(0.8),
15
  'learning_rate_log10': solara.reactive(-3),
@@ -17,7 +17,7 @@ local_state = solara.reactive(
17
  'batch_size_val': solara.reactive(16),
18
  'model_name': solara.reactive("Perceptron"),
19
  'optimizer_name': solara.reactive("Adam"),
20
- 'max_epoch': solara.reactive(100),
21
  'loss_name': solara.reactive('mape'),
22
  'loss_plot_data': solara.reactive({'epoch': [], 'trn_loss': [], 'val_loss': []}),
23
  'render_count': solara.reactive(0),
 
9
 
10
  local_state = solara.reactive(
11
  {
12
+ 'input_cols': solara.reactive(['replica','cpu','expected_tps']),
13
  'output_cols': solara.reactive(['cpu_usage']),
14
  'trn_ratio' : solara.reactive(0.8),
15
  'learning_rate_log10': solara.reactive(-3),
 
17
  'batch_size_val': solara.reactive(16),
18
  'model_name': solara.reactive("Perceptron"),
19
  'optimizer_name': solara.reactive("Adam"),
20
+ 'max_epoch': solara.reactive(30),
21
  'loss_name': solara.reactive('mape'),
22
  'loss_plot_data': solara.reactive({'epoch': [], 'trn_loss': [], 'val_loss': []}),
23
  'render_count': solara.reactive(0),
agent/public/app2scale-logo.png ADDED