|
|
import pandas as pd |
|
|
from .utils import predict_dict |
|
|
|
|
|
class PolicyArgMax(): |
|
|
'''Choose input for which max reward is obtained. Pseudocode: |
|
|
|
|
|
* Determine the input features of the models a.k.a. state variables |
|
|
* Determine all possible state combinations |
|
|
* For each state, run the model, obtain the output and calculate reward |
|
|
* Return the state giving the maximum reward |
|
|
|
|
|
#### Formula |
|
|
|
|
|
$$ \max_{state} reward(model(state)) $$ |
|
|
|
|
|
''' |
|
|
|
|
|
def __init__(self): |
|
|
self.label = """PolicyArgMax: pursue max reward """ |
|
|
|
|
|
def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn): |
|
|
|
|
|
|
|
|
input_df, output_df = predict_dict(model, ds, input_ranges) |
|
|
|
|
|
io_df = pd.concat([input_df, output_df],axis=1) |
|
|
|
|
|
io_df['reward'] = io_df.apply(lambda row: reward_fn.calculate(row), axis=1) |
|
|
max_reward_index = io_df['reward'].argmax() |
|
|
next_state = io_df.loc[max_reward_index].to_dict() |
|
|
|
|
|
return next_state |
|
|
|
|
|
|
|
|
class PolicyHPA(): |
|
|
'''Increase/decrease replica if cpu usage is above/below threshold |
|
|
|
|
|
**Remarks** |
|
|
|
|
|
* reward functions are not used. |
|
|
''' |
|
|
|
|
|
def __init__(self, threshold = 0.4): |
|
|
self.label = f"""Kubernetes HPA (cpu threshold={threshold})""" |
|
|
self.threshold = threshold |
|
|
|
|
|
def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn): |
|
|
next_state = cur_state.copy() |
|
|
|
|
|
if cur_metrics['cpu_usage'] > self.threshold: |
|
|
next_state['replica'] = min(cur_state['replica'] + 1, max(input_ranges['replica'])) |
|
|
elif cur_metrics['cpu_usage'] < self.threshold: |
|
|
next_state['replica'] = max(cur_state['replica'] - 1, min(input_ranges['replica'])) |
|
|
|
|
|
return next_state |