import pandas as pd from .utils import predict_dict class PolicyArgMax(): '''Choose input for which max reward is obtained. Pseudocode: * Determine the input features of the models a.k.a. state variables * Determine all possible state combinations * For each state, run the model, obtain the output and calculate reward * Return the state giving the maximum reward #### Formula $$ \max_{state} reward(model(state)) $$ ''' def __init__(self): self.label = """PolicyArgMax: pursue max reward """ def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn): #print(input_ranges) input_df, output_df = predict_dict(model, ds, input_ranges) io_df = pd.concat([input_df, output_df],axis=1) io_df['reward'] = io_df.apply(lambda row: reward_fn.calculate(row), axis=1) max_reward_index = io_df['reward'].argmax() next_state = io_df.loc[max_reward_index].to_dict() #print('next state', next_state) return next_state class PolicyHPA(): '''Increase/decrease replica if cpu usage is above/below threshold **Remarks** * reward functions are not used. ''' def __init__(self, threshold = 0.4): self.label = f"""Kubernetes HPA (cpu threshold={threshold})""" self.threshold = threshold def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn): next_state = cur_state.copy() if cur_metrics['cpu_usage'] > self.threshold: next_state['replica'] = min(cur_state['replica'] + 1, max(input_ranges['replica'])) elif cur_metrics['cpu_usage'] < self.threshold: next_state['replica'] = max(cur_state['replica'] - 1, min(input_ranges['replica'])) return next_state