Spaces:
Sleeping
Sleeping
Daniel Varga commited on
Commit ·
b1d06e1
1
Parent(s): c93812a
new method of reading and interpolating datasets that hopefully generalizes
Browse files- v2/app.py +15 -2
- v2/architecture.py +1 -1
- v2/data_processing.py +55 -7
v2/app.py
CHANGED
|
@@ -20,8 +20,9 @@ from bess import BatteryParameters, BatteryModel
|
|
| 20 |
# !wget "https://static.renyi.hu/ai-shared/daniel/pq/PL_44527.19-21.csv.gz"
|
| 21 |
# !wget "https://static.renyi.hu/ai-shared/daniel/pq/pq_terheles_2021_adatok.tsv"
|
| 22 |
|
|
|
|
| 23 |
|
| 24 |
-
met_2021_data, cons_2021_data = read_datasets()
|
| 25 |
|
| 26 |
|
| 27 |
# TODO some gui to upload consumption data.
|
|
@@ -37,7 +38,19 @@ def recalculate(ui):
|
|
| 37 |
print("Hm, we should do something with this CSV.", ui.consumption_csv.name)
|
| 38 |
|
| 39 |
add_production_field(met_2021_data, solar_parameters)
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
time_interval_min = all_data.index.freq.n
|
| 43 |
time_interval_h = time_interval_min / 60
|
|
|
|
| 20 |
# !wget "https://static.renyi.hu/ai-shared/daniel/pq/PL_44527.19-21.csv.gz"
|
| 21 |
# !wget "https://static.renyi.hu/ai-shared/daniel/pq/pq_terheles_2021_adatok.tsv"
|
| 22 |
|
| 23 |
+
OLD_DATASET = False
|
| 24 |
|
| 25 |
+
met_2021_data, cons_2021_data = read_datasets(old_dataset=OLD_DATASET)
|
| 26 |
|
| 27 |
|
| 28 |
# TODO some gui to upload consumption data.
|
|
|
|
| 38 |
print("Hm, we should do something with this CSV.", ui.consumption_csv.name)
|
| 39 |
|
| 40 |
add_production_field(met_2021_data, solar_parameters)
|
| 41 |
+
|
| 42 |
+
if OLD_DATASET:
|
| 43 |
+
# i've obsoleted this, but it's not a 100% replacement,
|
| 44 |
+
# it treats daylight savings changes differently.
|
| 45 |
+
# new version drops repeated hour and interpolates skipped hour.
|
| 46 |
+
all_data = interpolate_and_join(met_2021_data, cons_2021_data)
|
| 47 |
+
else:
|
| 48 |
+
all_data = join_consumption_meteorology(
|
| 49 |
+
cons_2021_data,
|
| 50 |
+
met_2021_data,
|
| 51 |
+
target_freq="5min",
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
|
| 55 |
time_interval_min = all_data.index.freq.n
|
| 56 |
time_interval_h = time_interval_min / 60
|
v2/architecture.py
CHANGED
|
@@ -335,7 +335,7 @@ def main():
|
|
| 335 |
|
| 336 |
solar_parameters = SolarParameters()
|
| 337 |
|
| 338 |
-
met_2021_data, cons_2021_data = read_datasets()
|
| 339 |
add_production_field(met_2021_data, solar_parameters)
|
| 340 |
all_data = interpolate_and_join(met_2021_data, cons_2021_data)
|
| 341 |
|
|
|
|
| 335 |
|
| 336 |
solar_parameters = SolarParameters()
|
| 337 |
|
| 338 |
+
met_2021_data, cons_2021_data = read_datasets(old_dataset=True)
|
| 339 |
add_production_field(met_2021_data, solar_parameters)
|
| 340 |
all_data = interpolate_and_join(met_2021_data, cons_2021_data)
|
| 341 |
|
v2/data_processing.py
CHANGED
|
@@ -10,7 +10,10 @@ START = f"2021-01-01"
|
|
| 10 |
END = f"2022-01-01"
|
| 11 |
|
| 12 |
|
| 13 |
-
def read_datasets(mini=False):
|
|
|
|
|
|
|
|
|
|
| 14 |
if mini:
|
| 15 |
met_filename = 'PL_44527.2101.csv.gz'
|
| 16 |
cons_filename = 'pq_terheles_202101_adatok.tsv'
|
|
@@ -31,14 +34,58 @@ def read_datasets(mini=False):
|
|
| 31 |
cons_data = cons_data.set_index('Time')
|
| 32 |
cons_data['Consumption'] = cons_data['Hatásos teljesítmény [kW]']
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
|
|
|
| 40 |
|
| 41 |
-
return
|
| 42 |
|
| 43 |
|
| 44 |
# BESS parameters are now in BatteryModel
|
|
@@ -63,6 +110,7 @@ def add_production_field(met_2021_data, parameters):
|
|
| 63 |
|
| 64 |
|
| 65 |
def interpolate_and_join(met_2021_data, cons_2021_data):
|
|
|
|
| 66 |
applicable = 24*60*365 - 15 + 5
|
| 67 |
|
| 68 |
demand_f = interp1d(range(0, 365*24*60, 15), cons_2021_data['Consumption'])
|
|
|
|
| 10 |
END = f"2022-01-01"
|
| 11 |
|
| 12 |
|
| 13 |
+
def read_datasets(mini=False, old_dataset=False):
|
| 14 |
+
# old_dataset mode is needed if we plug this into interpolate_and_join()
|
| 15 |
+
# rather than join_consumption_meteorology().
|
| 16 |
+
|
| 17 |
if mini:
|
| 18 |
met_filename = 'PL_44527.2101.csv.gz'
|
| 19 |
cons_filename = 'pq_terheles_202101_adatok.tsv'
|
|
|
|
| 34 |
cons_data = cons_data.set_index('Time')
|
| 35 |
cons_data['Consumption'] = cons_data['Hatásos teljesítmény [kW]']
|
| 36 |
|
| 37 |
+
if old_dataset:
|
| 38 |
+
# consumption data is at 14 29 44 59 minutes, we move it by 1 minute
|
| 39 |
+
# to sync it with production data:
|
| 40 |
+
cons_data.index = cons_data.index + pd.DateOffset(minutes=1)
|
| 41 |
+
|
| 42 |
+
met_2021_data = met_data[(met_data.index >= START) & (met_data.index < END)]
|
| 43 |
+
cons_2021_data = cons_data[(cons_data.index >= START) & (cons_data.index < END)]
|
| 44 |
+
|
| 45 |
+
return met_2021_data, cons_2021_data
|
| 46 |
+
else:
|
| 47 |
+
return met_data, cons_data
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def interpolate(df, target_idx):
|
| 52 |
+
return (df # 1. start with your data
|
| 53 |
+
.reindex(target_idx) # 2. align to the desired timestamps
|
| 54 |
+
.interpolate(method="time") # 3. interpolate *within* the range
|
| 55 |
+
.ffill().bfill() # 4. forward- and backward-fill anything still missing
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def join_consumption_meteorology(
|
| 60 |
+
cons_data: pd.DataFrame,
|
| 61 |
+
met_data: pd.DataFrame,
|
| 62 |
+
target_freq: str = "5min",
|
| 63 |
+
) -> pd.DataFrame:
|
| 64 |
+
interp_method = "time"
|
| 65 |
+
|
| 66 |
+
met = met_data[["Production", "sr", "r", "t", "fs"]]
|
| 67 |
+
cons = cons_data[["Consumption"]]
|
| 68 |
+
|
| 69 |
+
cons.index = cons.index + pd.DateOffset(minutes=1)
|
| 70 |
+
|
| 71 |
+
start = max(cons.index.min(), met.index.min())
|
| 72 |
+
end = min(cons.index.max(), met.index.max())
|
| 73 |
+
cons = cons.loc[start:end].copy()
|
| 74 |
+
met = met .loc[start:end].copy()
|
| 75 |
+
|
| 76 |
+
# there are dupes because of daylight savings time.
|
| 77 |
+
cons = cons[~cons.index.duplicated(keep="last")]
|
| 78 |
+
|
| 79 |
+
common_idx = pd.date_range(start, end, freq=target_freq)[:-2]
|
| 80 |
+
|
| 81 |
+
cons_interp = interpolate(cons, common_idx)
|
| 82 |
+
met_interp = interpolate(met, common_idx)
|
| 83 |
|
| 84 |
+
# stitch together
|
| 85 |
+
# joined = pd.concat([cons_interp["Consumption"], met_interp["Production"]], axis=1)
|
| 86 |
+
joined = pd.concat([cons_interp, met_interp], axis=1)
|
| 87 |
|
| 88 |
+
return joined
|
| 89 |
|
| 90 |
|
| 91 |
# BESS parameters are now in BatteryModel
|
|
|
|
| 110 |
|
| 111 |
|
| 112 |
def interpolate_and_join(met_2021_data, cons_2021_data):
|
| 113 |
+
print("this is obsoleted by join_consumption_meteorology(), do not use")
|
| 114 |
applicable = 24*60*365 - 15 + 5
|
| 115 |
|
| 116 |
demand_f = interp1d(range(0, 365*24*60, 15), cons_2021_data['Consumption'])
|