| import pandas as pd |
| from .utils import predict_dict |
|
|
| class PolicyArgMax(): |
| '''Choose input for which max reward is obtained. Pseudocode: |
| |
| * Determine the input features of the models a.k.a. state variables |
| * Determine all possible state combinations |
| * For each state, run the model, obtain the output and calculate reward |
| * Return the state giving the maximum reward |
| |
| #### Formula |
| |
| $$ \max_{state} reward(model(state)) $$ |
| |
| ''' |
|
|
| def __init__(self): |
| self.label = """PolicyArgMax: pursue max reward """ |
|
|
| def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn): |
| |
|
|
| input_df, output_df = predict_dict(model, ds, input_ranges) |
|
|
| io_df = pd.concat([input_df, output_df],axis=1) |
|
|
| io_df['reward'] = io_df.apply(lambda row: reward_fn.calculate(row), axis=1) |
| max_reward_index = io_df['reward'].argmax() |
| next_state = io_df.loc[max_reward_index].to_dict() |
| |
| return next_state |
|
|
|
|
| class PolicyHPA(): |
| '''Increase/decrease replica if cpu usage is above/below threshold |
| |
| **Remarks** |
| |
| * reward functions are not used. |
| ''' |
|
|
| def __init__(self, threshold = 0.4): |
| self.label = f"""Kubernetes HPA (cpu threshold={threshold})""" |
| self.threshold = threshold |
|
|
| def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn): |
| next_state = cur_state.copy() |
|
|
| if cur_metrics['cpu_usage'] > self.threshold: |
| next_state['replica'] = min(cur_state['replica'] + 1, max(input_ranges['replica'])) |
| elif cur_metrics['cpu_usage'] < self.threshold: |
| next_state['replica'] = max(cur_state['replica'] - 1, min(input_ranges['replica'])) |
| |
| return next_state |