Spaces:

app2scale
/

dashboard

Runtime error

hkayabilisim commited on Apr 22, 2024

Commit

e42f9e7

1 Parent(s): 0080e32

feature: estimates metrics for unavailable states

- during inference phase, the metrics are read from the offline
dataset by querying the current state. When there is no record
corresponding to the state, then the metrics are estimated by using
the twin model.

Files changed (3) hide show

agent/backend/policy.py +2 -2
agent/backend/utils.py +5 -4
agent/dashboard/inference.py +11 -4

agent/backend/policy.py CHANGED Viewed

@@ -19,7 +19,7 @@ $$ \max_{state} reward(model(state)) $$
         self.label = """PolicyArgMax: pursue max reward """
     def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn):
-        print(input_ranges)
         input_df, output_df = predict_dict(model, ds, input_ranges)
@@ -28,7 +28,7 @@ $$ \max_{state} reward(model(state)) $$
         io_df['reward'] = io_df.apply(lambda row: reward_fn.calculate(row), axis=1)
         max_reward_index = io_df['reward'].argmax()
         next_state = io_df.loc[max_reward_index].to_dict()
-        print('next state', next_state)
         return next_state

         self.label = """PolicyArgMax: pursue max reward """
     def choose(self, model, ds, input_ranges, cur_state, cur_metrics, reward_fn):
+        #print(input_ranges)
         input_df, output_df = predict_dict(model, ds, input_ranges)
         io_df['reward'] = io_df.apply(lambda row: reward_fn.calculate(row), axis=1)
         max_reward_index = io_df['reward'].argmax()
         next_state = io_df.loc[max_reward_index].to_dict()
+        #print('next state', next_state)
         return next_state

agent/backend/utils.py CHANGED Viewed

@@ -46,10 +46,11 @@ def read_metrics(df, cur_state):
     cols = list(df.columns)
     dff = df[cols]
-    #for col, value in cur_state.items():
-    #    print(f'{col} = {value}', dff.columns)
-    #    dff = dff.query(f'{col} = {value}')
-    dff = dff.query('replica == 1')
     output_df = dff.sample(1)
     for feature in ['replica','cpu','expected_tps']:
         if feature in cols:

     cols = list(df.columns)
     dff = df[cols]
+    for col, value in cur_state.items():
+        #print(f'{col} = {value}', dff.columns)
+        dff = dff.query(f'{col} == {value}')
+    if len(dff) == 0:
+        return None
     output_df = dff.sample(1)
     for feature in ['replica','cpu','expected_tps']:
         if feature in cols:

agent/dashboard/inference.py CHANGED Viewed

@@ -44,7 +44,7 @@ def force_render():
 @solara.component
 def InferencePlots(render_count):
     def execute():
         #print(selected_policy_label, selected_reward_label, selected_load_label)
@@ -62,7 +62,7 @@ def InferencePlots(render_count):
         df = ds.df
-        print(df.columns)
         # get all possible values for inputs
         input_ranges = {col: list(pd.unique(df[col])) for col in input_cols}
@@ -72,6 +72,7 @@ def InferencePlots(render_count):
         cur_hist = {}
         replica = initial_replica.value
         cpu = initial_cpu.value
         for load, eod in load_profile:
             if step > nsteps.value:
                 break
@@ -83,9 +84,13 @@ def InferencePlots(render_count):
             if use_model_to_estimate_metrics.value:
                 cur_metrics = estimate_metrics(model, ds, cur_state)
-                print(cur_metrics)
             else:
                 cur_metrics = read_metrics(df, cur_state)
             combined_data = cur_state | cur_metrics
             for state, value in combined_data.items():
@@ -112,6 +117,7 @@ def InferencePlots(render_count):
                 cpu = next_state['cpu']
             step += 1
     with solara.Row():
@@ -130,7 +136,8 @@ def InferencePlots(render_count):
         solara.Warning("There is no trained model ye, please train one!")
     solara.Button(label="Execute", on_click=execute, disabled=model is None)
     #print('Interence plots')
     with solara.ColumnsResponsive():

 @solara.component
 def InferencePlots(render_count):
+    unavailable_states_in_data, set_unavailable_states_in_data = solara.use_state_or_update(0)
     def execute():
         #print(selected_policy_label, selected_reward_label, selected_load_label)
         df = ds.df
+        #print(df.columns)
         # get all possible values for inputs
         input_ranges = {col: list(pd.unique(df[col])) for col in input_cols}
         cur_hist = {}
         replica = initial_replica.value
         cpu = initial_cpu.value
+        state_not_found_in_data = 0
         for load, eod in load_profile:
             if step > nsteps.value:
                 break
             if use_model_to_estimate_metrics.value:
                 cur_metrics = estimate_metrics(model, ds, cur_state)
+                #print(cur_metrics)
             else:
                 cur_metrics = read_metrics(df, cur_state)
+                if cur_metrics is None:
+                    print('there is no data for this state',cur_state)
+                    state_not_found_in_data += 1
+                    cur_metrics = estimate_metrics(model, ds, cur_state)
             combined_data = cur_state | cur_metrics
             for state, value in combined_data.items():
                 cpu = next_state['cpu']
             step += 1
+            set_unavailable_states_in_data(state_not_found_in_data)
     with solara.Row():
         solara.Warning("There is no trained model ye, please train one!")
     solara.Button(label="Execute", on_click=execute, disabled=model is None)
+    if unavailable_states_in_data > 0:
+        solara.Warning(f'There are {unavailable_states_in_data} unavailable states in data. Estimatated versions are used!')
     #print('Interence plots')
     with solara.ColumnsResponsive():