Daniel Varga commited on
Commit
b1d06e1
·
1 Parent(s): c93812a

new method of reading and interpolating datasets that hopefully generalizes

Browse files
Files changed (3) hide show
  1. v2/app.py +15 -2
  2. v2/architecture.py +1 -1
  3. v2/data_processing.py +55 -7
v2/app.py CHANGED
@@ -20,8 +20,9 @@ from bess import BatteryParameters, BatteryModel
20
  # !wget "https://static.renyi.hu/ai-shared/daniel/pq/PL_44527.19-21.csv.gz"
21
  # !wget "https://static.renyi.hu/ai-shared/daniel/pq/pq_terheles_2021_adatok.tsv"
22
 
 
23
 
24
- met_2021_data, cons_2021_data = read_datasets()
25
 
26
 
27
  # TODO some gui to upload consumption data.
@@ -37,7 +38,19 @@ def recalculate(ui):
37
  print("Hm, we should do something with this CSV.", ui.consumption_csv.name)
38
 
39
  add_production_field(met_2021_data, solar_parameters)
40
- all_data = interpolate_and_join(met_2021_data, cons_2021_data)
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  time_interval_min = all_data.index.freq.n
43
  time_interval_h = time_interval_min / 60
 
20
  # !wget "https://static.renyi.hu/ai-shared/daniel/pq/PL_44527.19-21.csv.gz"
21
  # !wget "https://static.renyi.hu/ai-shared/daniel/pq/pq_terheles_2021_adatok.tsv"
22
 
23
+ OLD_DATASET = False
24
 
25
+ met_2021_data, cons_2021_data = read_datasets(old_dataset=OLD_DATASET)
26
 
27
 
28
  # TODO some gui to upload consumption data.
 
38
  print("Hm, we should do something with this CSV.", ui.consumption_csv.name)
39
 
40
  add_production_field(met_2021_data, solar_parameters)
41
+
42
+ if OLD_DATASET:
43
+ # i've obsoleted this, but it's not a 100% replacement,
44
+ # it treats daylight savings changes differently.
45
+ # new version drops repeated hour and interpolates skipped hour.
46
+ all_data = interpolate_and_join(met_2021_data, cons_2021_data)
47
+ else:
48
+ all_data = join_consumption_meteorology(
49
+ cons_2021_data,
50
+ met_2021_data,
51
+ target_freq="5min",
52
+ )
53
+
54
 
55
  time_interval_min = all_data.index.freq.n
56
  time_interval_h = time_interval_min / 60
v2/architecture.py CHANGED
@@ -335,7 +335,7 @@ def main():
335
 
336
  solar_parameters = SolarParameters()
337
 
338
- met_2021_data, cons_2021_data = read_datasets()
339
  add_production_field(met_2021_data, solar_parameters)
340
  all_data = interpolate_and_join(met_2021_data, cons_2021_data)
341
 
 
335
 
336
  solar_parameters = SolarParameters()
337
 
338
+ met_2021_data, cons_2021_data = read_datasets(old_dataset=True)
339
  add_production_field(met_2021_data, solar_parameters)
340
  all_data = interpolate_and_join(met_2021_data, cons_2021_data)
341
 
v2/data_processing.py CHANGED
@@ -10,7 +10,10 @@ START = f"2021-01-01"
10
  END = f"2022-01-01"
11
 
12
 
13
- def read_datasets(mini=False):
 
 
 
14
  if mini:
15
  met_filename = 'PL_44527.2101.csv.gz'
16
  cons_filename = 'pq_terheles_202101_adatok.tsv'
@@ -31,14 +34,58 @@ def read_datasets(mini=False):
31
  cons_data = cons_data.set_index('Time')
32
  cons_data['Consumption'] = cons_data['Hatásos teljesítmény [kW]']
33
 
34
- # consumption data is at 14 29 44 59 minutes, we move it by 1 minute
35
- # to sync it with production data:
36
- cons_data.index = cons_data.index + pd.DateOffset(minutes=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- met_2021_data = met_data[(met_data.index >= START) & (met_data.index < END)]
39
- cons_2021_data = cons_data[(cons_data.index >= START) & (cons_data.index < END)]
 
40
 
41
- return met_2021_data, cons_2021_data
42
 
43
 
44
  # BESS parameters are now in BatteryModel
@@ -63,6 +110,7 @@ def add_production_field(met_2021_data, parameters):
63
 
64
 
65
  def interpolate_and_join(met_2021_data, cons_2021_data):
 
66
  applicable = 24*60*365 - 15 + 5
67
 
68
  demand_f = interp1d(range(0, 365*24*60, 15), cons_2021_data['Consumption'])
 
10
  END = f"2022-01-01"
11
 
12
 
13
+ def read_datasets(mini=False, old_dataset=False):
14
+ # old_dataset mode is needed if we plug this into interpolate_and_join()
15
+ # rather than join_consumption_meteorology().
16
+
17
  if mini:
18
  met_filename = 'PL_44527.2101.csv.gz'
19
  cons_filename = 'pq_terheles_202101_adatok.tsv'
 
34
  cons_data = cons_data.set_index('Time')
35
  cons_data['Consumption'] = cons_data['Hatásos teljesítmény [kW]']
36
 
37
+ if old_dataset:
38
+ # consumption data is at 14 29 44 59 minutes, we move it by 1 minute
39
+ # to sync it with production data:
40
+ cons_data.index = cons_data.index + pd.DateOffset(minutes=1)
41
+
42
+ met_2021_data = met_data[(met_data.index >= START) & (met_data.index < END)]
43
+ cons_2021_data = cons_data[(cons_data.index >= START) & (cons_data.index < END)]
44
+
45
+ return met_2021_data, cons_2021_data
46
+ else:
47
+ return met_data, cons_data
48
+
49
+
50
+
51
+ def interpolate(df, target_idx):
52
+ return (df # 1. start with your data
53
+ .reindex(target_idx) # 2. align to the desired timestamps
54
+ .interpolate(method="time") # 3. interpolate *within* the range
55
+ .ffill().bfill() # 4. forward- and backward-fill anything still missing
56
+ )
57
+
58
+
59
+ def join_consumption_meteorology(
60
+ cons_data: pd.DataFrame,
61
+ met_data: pd.DataFrame,
62
+ target_freq: str = "5min",
63
+ ) -> pd.DataFrame:
64
+ interp_method = "time"
65
+
66
+ met = met_data[["Production", "sr", "r", "t", "fs"]]
67
+ cons = cons_data[["Consumption"]]
68
+
69
+ cons.index = cons.index + pd.DateOffset(minutes=1)
70
+
71
+ start = max(cons.index.min(), met.index.min())
72
+ end = min(cons.index.max(), met.index.max())
73
+ cons = cons.loc[start:end].copy()
74
+ met = met .loc[start:end].copy()
75
+
76
+ # there are dupes because of daylight savings time.
77
+ cons = cons[~cons.index.duplicated(keep="last")]
78
+
79
+ common_idx = pd.date_range(start, end, freq=target_freq)[:-2]
80
+
81
+ cons_interp = interpolate(cons, common_idx)
82
+ met_interp = interpolate(met, common_idx)
83
 
84
+ # stitch together
85
+ # joined = pd.concat([cons_interp["Consumption"], met_interp["Production"]], axis=1)
86
+ joined = pd.concat([cons_interp, met_interp], axis=1)
87
 
88
+ return joined
89
 
90
 
91
  # BESS parameters are now in BatteryModel
 
110
 
111
 
112
  def interpolate_and_join(met_2021_data, cons_2021_data):
113
+ print("this is obsoleted by join_consumption_meteorology(), do not use")
114
  applicable = 24*60*365 - 15 + 5
115
 
116
  demand_f = interp1d(range(0, 365*24*60, 15), cons_2021_data['Consumption'])