Spaces:

app2scale
/

dashboard

Runtime error

App Files Files Community

hkayabilisim commited on Apr 22, 2024

Commit

7e9ee6e

1 Parent(s): 9d0ef93

features: inference mechanism

Browse files

Files changed (10) hide show

agent/assets/custom.css +5 -0
agent/assets/theme2.js +5 -0
agent/backend/load.py +17 -0
agent/backend/policy.py +32 -13
agent/backend/reward.py +15 -9
agent/backend/utils.py +21 -1
agent/dashboard/data.py +2 -0
agent/dashboard/inference.py +88 -59
agent/dashboard/training.py +2 -2
agent/public/app2scale-logo.png +0 -0

agent/assets/custom.css ADDED Viewed

	@@ -0,0 +1,5 @@

+header {
+    background-image: url('/static/public/app2scale-logo.png'); /* Replace 'your-image.jpg' with the path to your image file */
+    background-position: center; /* Center the background image */
+    background-repeat: no-repeat; /* Prevent image from repeating */
+}

agent/assets/theme2.js ADDED Viewed

	@@ -0,0 +1,5 @@

+vuetifyThemes = {
+    light: {
+        primary: '#1c4220',
+    }
+}

agent/backend/load.py CHANGED Viewed

@@ -1,3 +1,5 @@
 class ConstantLoad():
     '''Constant load profile'''
@@ -13,3 +15,18 @@ class ConstantLoad():
         self.step += 1
         return self.load

+import numpy as np
 class ConstantLoad():
     '''Constant load profile'''
         self.step += 1
         return self.load
+class SinusLoad():
+    '''Periodic load profile'''
+    def __init__(self, amplitude, period):
+        self.label = f"""Sinusodial load"""
+        self.amplitude = amplitude
+        self.period = period
+    def __iter__(self):
+        self.step = 0
+        return self
+    def __next__(self):
+        self.step += 1
+        return max(0, self.amplitude + self.amplitude * np.sin(2 * np.pi * (self.step % self.period) / self.period ))

agent/backend/policy.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import pandas as pd
 from .utils import predict_dict
-class Policy1():
     '''Choose input for which max reward is obtained. Pseudocode:
 * Determine the input features of the models a.k.a. state variables
@@ -16,21 +16,40 @@ $$ \max_{state} reward(model(state)) $$
     '''
     def __init__(self):
-        self.label = """PolicyArgMax: finds the best state giving max reward """
-    def choose(self, model, ds, inputs, reward_fn):
-        input_df, output_df = predict_dict(model, ds, inputs)
-        print('Policy choose')
-        print(input_df)
-        print(output_df)
         io_df = pd.concat([input_df, output_df],axis=1)
         io_df['reward'] = io_df.apply(lambda row: reward_fn.calculate(row), axis=1)
-        print(io_df)
-        print(io_df.columns)
         max_reward_index = io_df['reward'].argmax()
-        best_state = io_df.loc[max_reward_index].to_dict()
-        print('best state', best_state)
-        return best_state

 import pandas as pd
 from .utils import predict_dict
+class PolicyArgMax():
     '''Choose input for which max reward is obtained. Pseudocode:
 * Determine the input features of the models a.k.a. state variables
     '''
     def __init__(self):
+        self.label = """PolicyArgMax: pursue max reward """
+    def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn):
+        print(input_ranges)
+        input_df, output_df = predict_dict(model, ds, input_ranges)
         io_df = pd.concat([input_df, output_df],axis=1)
         io_df['reward'] = io_df.apply(lambda row: reward_fn.calculate(row), axis=1)
         max_reward_index = io_df['reward'].argmax()
+        next_state = io_df.loc[max_reward_index].to_dict()
+        print('next state', next_state)
+        return next_state
+class PolicyHPA():
+    '''Increase/decrease replica if cpu usage is above/below threshold
+**Remarks**
+* reward functions are not used.
+    '''
+    def __init__(self, threshold = 0.4):
+        self.label = f"""Kubernetes HPA (cpu threshold={threshold})"""
+        self.threshold = threshold
+    def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn):
+        next_state = cur_state.copy()
+        if cur_metrics['cpu_usage'] > self.threshold:
+            next_state['replica'] = min(cur_state['replica'] + 1, max(input_ranges['replica']))
+        elif cur_metrics['cpu_usage'] < self.threshold:
+            next_state['replica'] = max(cur_state['replica'] - 1, min(input_ranges['replica']))
+        return next_state

agent/backend/reward.py CHANGED Viewed

@@ -1,28 +1,34 @@
-class Reward1():
-    '''Reward is simply CPU utilization, nothing more: higher cpu utilization, higher reward.
 #### Formula
 $$ r = c $$
-where $r$ and $c$ are reward and cpu utilizations, respectively.
     '''
     def __init__(self):
-        self.label = """Reward1: prefer high cpu utilization"""
     def calculate(self, state):
         return state["cpu_usage"]
-class Reward2():
-    '''Reward is the negative of CPU utilization: hence lower cpu utilization, higher reward.
 #### Formula
 $$ r = - c $$
-where $r$ and $c$ are reward and cpu utilizations, respectively.
     '''
     def __init__(self):
-        self.label = """Reward2: prefer low cpu utilization"""
     def calculate(self, state):
-        return -state["cpu_usage"]

+class RewardHighCPUUsage():
+    '''Promotes high CPU usage.
+Reward is simply equivalent to cpu usage. When there is more than one pod running,
+the cpu usage is the average cpu usage over pods.
 #### Formula
 $$ r = c $$
+where $r$ and $c$ are reward and cpu usage, respectively.
     '''
     def __init__(self):
+        self.label = """RewardHighCPUUsage"""
     def calculate(self, state):
         return state["cpu_usage"]
+class RewardLowCPUUsage():
+    '''Promotes low CPU usage.
+Reward is simply equivalent to the negative of cpu usage. When there is more than one pod running,
+the cpu usage is the average cpu usage over pods.
 #### Formula
 $$ r = - c $$
+where $r$ and $c$ are reward and cpu usage, respectively.
     '''
     def __init__(self):
+        self.label = """RewardLowCPUUsage"""
     def calculate(self, state):
+        return -state["cpu_usage"]

agent/backend/utils.py CHANGED Viewed

@@ -36,7 +36,27 @@ def predict_dict(model, ds, inputs: Dict[str, Union[List[int], List[float]]]):
     #print(output_df_transformed)
     return input_df, output_df_transformed
 def train(ds: ExplorationDataset, model_name, trn_ratio,
           batch_size_trn, batch_size_val, optimizer_name, learning_rate,

     #print(output_df_transformed)
     return input_df, output_df_transformed
+def estimate_metrics(model, ds, cur_state):
+    input_ranges = {key: [value] for key, value in cur_state.items()}
+    input_df, output_df = predict_dict(model, ds, input_ranges)
+    est_metrics = {metric: output_df.loc[0,metric] for metric in output_df.columns}
+    return est_metrics
+def read_metrics(df, cur_state):
+    cols = list(df.columns)
+    dff = df[cols]
+    #for col, value in cur_state.items():
+    #    print(f'{col} = {value}', dff.columns)
+    #    dff = dff.query(f'{col} = {value}')
+    dff = dff.query('replica == 1')
+    output_df = dff.sample(1)
+    for feature in ['replica','cpu','expected_tps']:
+        if feature in cols:
+            cols.remove(feature)
+    output_df = output_df[cols].reset_index(drop=True)
+    metrics = {metric: output_df.loc[0,metric] for metric in output_df.columns}
+    return metrics
 def train(ds: ExplorationDataset, model_name, trn_ratio,
           batch_size_trn, batch_size_val, optimizer_name, learning_rate,

agent/dashboard/data.py CHANGED Viewed

@@ -7,6 +7,7 @@ import numpy as np
 def read_data():
     df = pd.read_csv('agent/data/averaged_full_state_data.csv')
     df = df.infer_objects()
     for col in df.columns:
         if df.dtypes[col] == np.float64:
             df[col] = df[col].apply(lambda x: round(x, 6))
@@ -88,6 +89,7 @@ def DataViewer(df):
             size_max=state.value['size_max'].value,
             log_x=state.value['logx'].value,
             log_y=state.value['logy'].value,
         )
     else:
         solara.Warning("Select x and y columns")

 def read_data():
     df = pd.read_csv('agent/data/averaged_full_state_data.csv')
     df = df.infer_objects()
+    df['step'] = df.index
     for col in df.columns:
         if df.dtypes[col] == np.float64:
             df[col] = df[col].apply(lambda x: round(x, 6))
             size_max=state.value['size_max'].value,
             log_x=state.value['logx'].value,
             log_y=state.value['logy'].value,
+            width=800,
         )
     else:
         solara.Warning("Select x and y columns")

agent/dashboard/inference.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import solara
 import pandas as pd
-from ..backend.reward import Reward1, Reward2
-from ..backend.policy import Policy1
-from ..backend.load import ConstantLoad
 from .training import local_state as training_state
 from .data import state as data_state
-from ..backend.utils import predict_dict, Plot1D
 local_state = solara.reactive(
     {
@@ -15,19 +15,23 @@ local_state = solara.reactive(
     }
     )
-reward_objects = [Reward1(), Reward2()]
 reward_labels = [r.label for r in reward_objects]
 selected_reward_label = solara.reactive(reward_labels[0])
-policy_objects = [Policy1()]
 policy_labels = [p.label for p in policy_objects]
 selected_policy_label = solara.reactive(policy_labels[0])
-load_objects = [ConstantLoad(24), ConstantLoad(72), ConstantLoad(168)]
 load_labels = [p.label for p in load_objects]
 selected_load_label = solara.reactive(load_labels[0])
 nsteps = solara.reactive(10)
 inference_history = solara.reactive({})
@@ -53,28 +57,38 @@ def InferencePlots(render_count):
         chosen_policy_index = policy_labels.index(selected_policy_label.value)
         chosen_policy = policy_objects[chosen_policy_index]
         chosen_reward_index = reward_labels.index(selected_reward_label.value)
-        chosen_reward = reward_objects[chosen_reward_index]
         ds = training_state.value['ds'].value
         df = ds.df
         # get all possible values for inputs
-        input = {col: list(pd.unique(df[col])) for col in input_cols}
         # Step through load profile
         load_profile = chosen_load
         step = 0
         cur_hist = {}
         for load in load_profile:
             if step > nsteps.value:
                 break
             # the model uses load as an input, supply with it
-            if 'expected_tps' in input.keys():
-                input['expected_tps'] = [load]
-            best_state = chosen_policy.choose(model, ds, input, chosen_reward)
-            for state, value in best_state.items():
                 if state in cur_hist.keys():
                     cur_hist[state]['y'].append(value)
                     cur_hist[state]['x'].append(step)
@@ -88,16 +102,29 @@ def InferencePlots(render_count):
                     cur_hist[state]['title'] = state
                     cur_hist[state]['xlabel'] = 'step'
                     cur_hist[state]['ylabel'] = state
-            #print(cur_hist)
             local_state.value['inference_plot_data'].set(cur_hist)
             force_render()
             step += 1
-        #print(local_state.value['inference_plot_data'].value)
-    solara.InputInt(label='Number of steps', value=nsteps.value, on_value=nsteps.set)
     model = training_state.value['model'].value
     if model is None:
         solara.Warning("Model is not ready yet!")
@@ -106,48 +133,50 @@ def InferencePlots(render_count):
     #print('Interence plots')
-    for col, content in local_state.value['inference_plot_data'].value.items():
-        options = {
-            'title': {
-                'text': content['title'],
-                'left': 'center'},
-            'tooltip': {
-                'trigger': 'axis',
-                'axisPointer': {
-                    'type': 'cross'
-                }
-            },
-            'xAxis': {
-                'axisTick': {
-                    'alignWithLabel': True
                 },
-                'data': content['x'],
-                'name': content['xlabel'],
-                'nameLocation': 'middle',
-                'nameTextStyle': {'verticalAlign': 'top','padding': [10, 0, 0, 0]}
-            },
-            'yAxis': [
-                {
-                    'type': 'value',
-                    'name': content['ylabel'],
-                    'position': 'left',
-                    'alignTicks': True,
-                    'axisLine': {
-                        'show': True,
-                        'lineStyle': {'color': 'green'}}
                 },
-            ],
-            'series': [
-                {
-                'name': content['ylabel'],
-                'data': content['y'],
-                'type': 'line',
-                'yAxisIndex': 0
-                },
-            ],
-        }
-        solara.FigureEcharts(option=options)
 @solara.component
 def Page():

 import solara
 import pandas as pd
+from ..backend.reward import RewardHighCPUUsage, RewardLowCPUUsage
+from ..backend.policy import PolicyArgMax, PolicyHPA
+from ..backend.load import ConstantLoad, SinusLoad
 from .training import local_state as training_state
 from .data import state as data_state
+from ..backend.utils import estimate_metrics, read_metrics
 local_state = solara.reactive(
     {
     }
     )
+reward_objects = [RewardHighCPUUsage(), RewardLowCPUUsage()]
 reward_labels = [r.label for r in reward_objects]
 selected_reward_label = solara.reactive(reward_labels[0])
+policy_objects = [PolicyArgMax(), PolicyHPA(0.2), PolicyHPA(0.4), PolicyHPA(0.6), PolicyHPA(0.8)]
 policy_labels = [p.label for p in policy_objects]
 selected_policy_label = solara.reactive(policy_labels[0])
+load_objects = [ConstantLoad(24), ConstantLoad(72), ConstantLoad(168), SinusLoad(180, 100)]
 load_labels = [p.label for p in load_objects]
 selected_load_label = solara.reactive(load_labels[0])
 nsteps = solara.reactive(10)
+initial_replica = solara.reactive(1)
+initial_cpu = solara.reactive(4)
+use_model_to_estimate_metrics = solara.reactive(False)
 inference_history = solara.reactive({})
         chosen_policy_index = policy_labels.index(selected_policy_label.value)
         chosen_policy = policy_objects[chosen_policy_index]
         chosen_reward_index = reward_labels.index(selected_reward_label.value)
+        chosen_reward_fn = reward_objects[chosen_reward_index]
         ds = training_state.value['ds'].value
         df = ds.df
+        print(df.columns)
         # get all possible values for inputs
+        input_ranges = {col: list(pd.unique(df[col])) for col in input_cols}
         # Step through load profile
         load_profile = chosen_load
         step = 0
         cur_hist = {}
+        replica = initial_replica.value
+        cpu = initial_cpu.value
         for load in load_profile:
             if step > nsteps.value:
                 break
             # the model uses load as an input, supply with it
+            if 'expected_tps' in input_ranges.keys():
+                input_ranges['expected_tps'] = [load]
+            cur_state = {"replica": replica, "cpu": cpu, "expected_tps": load}
+            if use_model_to_estimate_metrics.value:
+                cur_metrics = estimate_metrics(model, ds, cur_state)
+                print(cur_metrics)
+            else:
+                cur_metrics = read_metrics(df, cur_state)
+            combined_data = cur_state | cur_metrics
+            for state, value in combined_data.items():
                 if state in cur_hist.keys():
                     cur_hist[state]['y'].append(value)
                     cur_hist[state]['x'].append(step)
                     cur_hist[state]['title'] = state
                     cur_hist[state]['xlabel'] = 'step'
                     cur_hist[state]['ylabel'] = state
             local_state.value['inference_plot_data'].set(cur_hist)
             force_render()
+            next_state = chosen_policy.choose(model, ds, input_ranges, cur_state, cur_metrics, chosen_reward_fn)
+            if 'replica' in next_state.keys():
+                replica = next_state['replica']
+            if 'cpu' in next_state.keys():
+                cpu = next_state['cpu']
             step += 1
+    with solara.Row():
+        solara.InputInt(label='Number of steps', value=nsteps.value, on_value=nsteps.set)
+        solara.InputInt(label="Initial replica", value=initial_replica)
+        solara.InputInt(label="Initial CPU", value=initial_cpu)
+        if set(training_state.value['input_cols'].value) == set(['replica','cpu','expected_tps']):
+            solara.Checkbox(label='Use twin model to estimate metrics', value=use_model_to_estimate_metrics)
+        else:
+            with solara.Column():
+                solara.Checkbox(label='Use twin model to estimate metrics', value=use_model_to_estimate_metrics, disabled=True)
+                solara.Info('twin model is not suitable for metric estimation')
     model = training_state.value['model'].value
     if model is None:
         solara.Warning("Model is not ready yet!")
     #print('Interence plots')
+    with solara.ColumnsResponsive():
+        for col, content in local_state.value['inference_plot_data'].value.items():
+            options = {
+                'title': {
+                    'text': content['title'],
+                    'left': 'center'},
+                'tooltip': {
+                    'trigger': 'axis',
+                    'axisPointer': {
+                        'type': 'cross'
+                    }
                 },
+                'xAxis': {
+                    'axisTick': {
+                        'alignWithLabel': True
+                    },
+                    'data': content['x'],
+                    'name': content['xlabel'],
+                    'nameLocation': 'middle',
+                    'nameTextStyle': {'verticalAlign': 'top','padding': [10, 0, 0, 0]}
                 },
+                'yAxis': [
+                    {
+                        'type': 'value',
+                        'name': content['ylabel'],
+                        'position': 'left',
+                        'alignTicks': True,
+                        'axisLine': {
+                            'show': True,
+                            'lineStyle': {'color': 'green'}}
+                    },
+                ],
+                'series': [
+                    {
+                    'name': content['ylabel'],
+                    'data': content['y'],
+                    'type': 'line',
+                    'yAxisIndex': 0
+                    },
+                ],
+            }
+            solara.FigureEcharts(option=options, attributes={"style": "height: 300px; width: 300px"})
 @solara.component
 def Page():

agent/dashboard/training.py CHANGED Viewed

@@ -9,7 +9,7 @@ from ..backend.loss import loss_mape
 local_state = solara.reactive(
     {
-        'input_cols': solara.reactive(['replica']),
         'output_cols': solara.reactive(['cpu_usage']),
         'trn_ratio' : solara.reactive(0.8),
         'learning_rate_log10': solara.reactive(-3),
@@ -17,7 +17,7 @@ local_state = solara.reactive(
         'batch_size_val': solara.reactive(16),
         'model_name': solara.reactive("Perceptron"),
         'optimizer_name': solara.reactive("Adam"),
-        'max_epoch': solara.reactive(100),
         'loss_name': solara.reactive('mape'),
         'loss_plot_data': solara.reactive({'epoch': [], 'trn_loss': [], 'val_loss': []}),
         'render_count': solara.reactive(0),

 local_state = solara.reactive(
     {
+        'input_cols': solara.reactive(['replica','cpu','expected_tps']),
         'output_cols': solara.reactive(['cpu_usage']),
         'trn_ratio' : solara.reactive(0.8),
         'learning_rate_log10': solara.reactive(-3),
         'batch_size_val': solara.reactive(16),
         'model_name': solara.reactive("Perceptron"),
         'optimizer_name': solara.reactive("Adam"),
+        'max_epoch': solara.reactive(30),
         'loss_name': solara.reactive('mape'),
         'loss_plot_data': solara.reactive({'epoch': [], 'trn_loss': [], 'val_loss': []}),
         'render_count': solara.reactive(0),

agent/public/app2scale-logo.png ADDED Viewed