atodorov284 commited on
Commit
0573e79
·
1 Parent(s): d15094c

add statistics computation to init so it is only done once

Browse files
streamlit_src/controllers/admin_controller.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  from views.admin_view import AdminView
3
  import streamlit as st
4
  from controllers.user_controller import UserController
@@ -17,6 +18,9 @@ class AdminController(UserController):
17
  """
18
  super().__init__()
19
  self._view = AdminView()
 
 
 
20
 
21
  def show_dashboard(self) -> None:
22
  """
@@ -38,6 +42,57 @@ class AdminController(UserController):
38
  """
39
  pass
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def _make_custom_predictions(self) -> None:
42
  """
43
  Makes a custom prediction for the admin interface.
@@ -178,54 +233,11 @@ class AdminController(UserController):
178
  Returns:
179
  bool: True if the input data is out of distribution, False otherwise.
180
  """
181
- current_dir = os.path.dirname(os.path.abspath(__file__))
182
- parent_dir = os.path.dirname(current_dir)
183
- grandparent_dir = os.path.dirname(parent_dir)
184
- distribution_data = pd.read_csv(
185
- os.path.join(
186
- grandparent_dir,
187
- "data",
188
- "processed/",
189
- "v2_merged_selected_features_with_missing.csv",
190
- ),
191
- index_col=0,
192
- )
193
-
194
  input_data.drop("date", axis=1, inplace=True)
195
 
196
- distribution_means = (
197
- distribution_data.mean().reset_index(drop=False).transpose()
198
- )
199
- distribution_means.columns = distribution_means.iloc[0]
200
- distribution_means = distribution_means[1:]
201
-
202
- distribution_stds = distribution_data.std().reset_index(drop=False).transpose()
203
- distribution_stds.columns = distribution_stds.iloc[0]
204
- distribution_stds = distribution_stds[1:]
205
-
206
- formatted_means = pd.concat(
207
- [
208
- distribution_means.add_suffix(" - day 0"),
209
- distribution_means.add_suffix(" - day 1"),
210
- distribution_means.add_suffix(" - day 2"),
211
- ],
212
- axis=1,
213
- )
214
-
215
- formatted_stds = pd.concat(
216
- [
217
- distribution_stds.add_suffix(" - day 0"),
218
- distribution_stds.add_suffix(" - day 1"),
219
- distribution_stds.add_suffix(" - day 2"),
220
- ],
221
- axis=1,
222
- )
223
-
224
  z_scores = (
225
- input_data - formatted_means.values.squeeze()
226
- ) / formatted_stds.values.squeeze()
227
-
228
- self._view.display_datatable(z_scores, "")
229
 
230
  out_of_distribution_flags = z_scores.abs() > threshold
231
 
 
1
  import os
2
+ from typing import Tuple
3
  from views.admin_view import AdminView
4
  import streamlit as st
5
  from controllers.user_controller import UserController
 
18
  """
19
  super().__init__()
20
  self._view = AdminView()
21
+ self._distribution_means, self._distribution_stds = (
22
+ self._compute_distribution_statistics()
23
+ )
24
 
25
  def show_dashboard(self) -> None:
26
  """
 
42
  """
43
  pass
44
 
45
+ def _compute_distribution_statistics(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
46
+ """
47
+ Computes the means and standard deviations of the features in the dataset.
48
+
49
+ Returns:
50
+ A tuple of two DataFrames. The first DataFrame contains the means of the features
51
+ and the second DataFrame contains the standard deviations of the features.
52
+ """
53
+ current_dir = os.path.dirname(os.path.abspath(__file__))
54
+ parent_dir = os.path.dirname(current_dir)
55
+ grandparent_dir = os.path.dirname(parent_dir)
56
+ distribution_data = pd.read_csv(
57
+ os.path.join(
58
+ grandparent_dir,
59
+ "data",
60
+ "processed/",
61
+ "v2_merged_selected_features_with_missing.csv",
62
+ ),
63
+ index_col=0,
64
+ )
65
+
66
+ distribution_means = (
67
+ distribution_data.mean().reset_index(drop=False).transpose()
68
+ )
69
+ distribution_means.columns = distribution_means.iloc[0]
70
+ distribution_means = distribution_means[1:]
71
+
72
+ distribution_stds = distribution_data.std().reset_index(drop=False).transpose()
73
+ distribution_stds.columns = distribution_stds.iloc[0]
74
+ distribution_stds = distribution_stds[1:]
75
+
76
+ formatted_means = pd.concat(
77
+ [
78
+ distribution_means.add_suffix(" - day 0"),
79
+ distribution_means.add_suffix(" - day 1"),
80
+ distribution_means.add_suffix(" - day 2"),
81
+ ],
82
+ axis=1,
83
+ )
84
+
85
+ formatted_stds = pd.concat(
86
+ [
87
+ distribution_stds.add_suffix(" - day 0"),
88
+ distribution_stds.add_suffix(" - day 1"),
89
+ distribution_stds.add_suffix(" - day 2"),
90
+ ],
91
+ axis=1,
92
+ )
93
+
94
+ return formatted_means, formatted_stds
95
+
96
  def _make_custom_predictions(self) -> None:
97
  """
98
  Makes a custom prediction for the admin interface.
 
233
  Returns:
234
  bool: True if the input data is out of distribution, False otherwise.
235
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  input_data.drop("date", axis=1, inplace=True)
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  z_scores = (
239
+ input_data - self._distribution_means.values.squeeze()
240
+ ) / self._distribution_stds.values.squeeze()
 
 
241
 
242
  out_of_distribution_flags = z_scores.abs() > threshold
243