dmarr commited on
Commit
8ad04d8
·
1 Parent(s): 11e7be0

Update nucpy with the rte fix

Browse files
Files changed (2) hide show
  1. app.py +203 -267
  2. app_all.py +280 -216
app.py CHANGED
@@ -24,48 +24,80 @@ def mongo_unavs_call(user_input_start_date, user_input_end_date, user_input_past
24
  passw = "tN9XpCCQM2MtYDme"
25
  host = "nucmonitordata.xxcwx9k.mongodb.net"
26
  client = pymongo.MongoClient(
27
- f"mongodb+srv://{user}:{passw}@{host}/?retryWrites=true&w=majority&connectTimeoutMS=5000"
28
  )
29
 
30
  db = client["data"]
31
  collection_past_unavs = db["unavs"]
32
- collection_unavs =db["unavs_update"]
33
 
34
  start_date = f"{user_input_start_date}T00:00:00"
35
  end_date = f"{user_input_end_date}T23:59:59"
36
  past_date = f"{user_input_past_date}T23:59:59"
37
-
38
- pipeline = [
 
 
 
 
 
 
 
 
39
  {
40
- "$unwind": "$results"
 
 
 
 
 
 
 
 
41
  },
 
 
42
  {
43
- "$unwind": "$results.generation_unavailabilities"
44
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  {
46
  "$match": {
47
- "results.generation_unavailabilities.production_type": "NUCLEAR",
48
- # "results.generation_unavailabilities.start_date": {"$lte": end_date},
49
- # "results.generation_unavailabilities.end_date": {"$gte": start_date},
50
- # "results.generation_unavailabilities.updated_date": {"$lte": end_date}
51
- "results.generation_unavailabilities.updated_date": {"$lte": past_date}
 
52
  }
53
  },
 
 
54
  {
55
- "$project": {
56
- "_id": 0,
57
- "generation_unavailabilities": "$results.generation_unavailabilities"
58
  }
59
  }
60
  ]
61
 
62
- result1 = list(collection_past_unavs.aggregate(pipeline))
63
- result2 = list(collection_unavs.aggregate(pipeline))
 
 
64
 
65
- # Merge the two lists of JSON results
66
- merged_result = result1 + result2
67
-
68
- return merged_result
69
 
70
  # --------------------------------------------------------------------------------------- #
71
 
@@ -125,281 +157,187 @@ def nuc_monitor(usr_start_date, usr_end_date, past_date, mongo_db_data):
125
  # print(mongo_db_data)
126
  mongo_df = pd.DataFrame(mongo_db_data)
127
 
128
- # print(mongo_df)
129
- # Unpack the dictionaries into separate columns
130
- mongo_df_unpacked = pd.json_normalize(mongo_df['generation_unavailabilities'])
 
131
 
132
- # Concatenate the unpacked columns with the original DataFrame
133
- mongo_df_result = pd.concat([mongo_df, mongo_df_unpacked], axis=1)
 
134
 
135
- # Drop the original column
136
- mongo_df_result.drop(columns=['generation_unavailabilities'], inplace=True)
137
 
138
- mongo_df_result['start_date'] = mongo_df_result['values'].apply(lambda x: x[0]['start_date'])
139
- mongo_df_result['end_date'] = mongo_df_result['values'].apply(lambda x: x[0]['end_date'])
140
- mongo_df_result['available_capacity'] = mongo_df_result['values'].apply(lambda x: x[0]['available_capacity'])
141
- mongo_df_result['unavailable_capacity'] = mongo_df_result['values'].apply(lambda x: x[0]['unavailable_capacity'])
142
- # print(mongo_df_result)
143
- # print(mongo_df_result.columns)
144
- # Drop the original 'values' column
145
- mongo_df_result.drop('values', axis=1, inplace=True)
146
- mongo_df2 = mongo_df_result
147
- mongo_df2.rename(columns=lambda col: col.replace('unit.', ''), inplace=True)
148
-
149
- # --------------------- INITIAL DATA CLEANING FOR MONGO DATA ------------------------ #
150
 
151
- # Make the two dataframes have the same columns
152
- mongo_unavs = mongo_df2.copy()
153
- mongo_unavs.drop(columns="type", inplace=True)
 
 
 
 
 
 
 
 
 
154
 
155
- # merged_df['updated_date'] = merged_df['updated_date'].astype(str)
 
 
 
 
 
 
 
 
156
 
157
- # --------------------------- HERE IS THE CHANGE TO GET ONLY ACTIVE OR ACTIVE AND INACTIVE --------------------------- #
158
- # start_date_str = usr_start_date.strftime("%Y-%m-%d")
159
- start_date_str = str(usr_start_date)
160
- # end_date_str = usr_end_date.strftime("%Y-%m-%d")
161
- end_date_str = str(usr_end_date)
162
- current_datetime = datetime.datetime.now()
163
- past_date_str = str(past_date.strftime("%Y-%m-%dT%H:%M:%S%z"))
164
- current_datetime_str = current_datetime.strftime("%Y-%m-%d")
165
 
166
- # nuclear_unav = mongo_unavs.copy()[(mongo_unavs.copy()["production_type"] == "NUCLEAR") & (mongo_unavs.copy()["updated_date"] <= past_date_str)]
167
- # print(past_date_str)
168
- # Sort by updated date
169
- sorted_df = mongo_unavs.copy().sort_values(by='updated_date')
170
 
171
- sorted_df = sorted_df.copy().reset_index(drop=True)
 
 
172
 
173
- # cruas_2 = sorted_df.copy()[(sorted_df.copy()["name"] == "ST ALBAN 2") & (sorted_df.copy()["end_date"] >= start_date_str)]
174
- # print(cruas_2[['updated_date', 'end_date', 'available_capacity']])
 
 
 
 
175
 
176
- # Filter to get identifiers
177
- filtered_id_df = sorted_df.copy()
178
 
179
- # I commented this out
180
- filtered_id_df = filtered_id_df.drop_duplicates(subset='identifier', keep='last')
181
 
182
- # cruas_2 = filtered_id_df.copy()[(filtered_id_df.copy()["name"] == "ST ALBAN 2") & (filtered_id_df.copy()["end_date"] >= start_date_str)]
183
- # print(cruas_2[['updated_date', 'end_date', 'available_capacity']])
184
 
185
- filtered_id_df = filtered_id_df.copy().reset_index(drop=True)
 
 
186
 
187
- filtered_df = filtered_id_df[
188
- (filtered_id_df["production_type"] == "NUCLEAR") &
189
- # (mongo_unavs["updated_date"] <= past_date_str) &
190
- (filtered_id_df["status"] != "DISMISSED")]
191
 
192
- # if photo_date == True:
193
- # nuclear_unav = merged_df.copy()[(merged_df.copy()["production_type"] == "NUCLEAR") & (merged_df.copy()["updated_date"] <= past_date_str)]
194
- # photo_date = True
195
- # else: # need to add updated_date as a conditional to get the newest for that day
196
- # nuclear_unav = merged_df.copy()[(merged_df.copy()["production_type"] == "NUCLEAR") & (merged_df.copy()["updated_date"] <= end_date_str)]
197
 
198
- # --------------------------- HERE IS THE CHANGE TO GET ONLY ACTIVE OR ACTIVE AND INACTIVE --------------------------- #
199
 
200
- # --------------------- SECOND DATA CLEANING ------------------------ #
201
- # This filter should take only the most recent id and discard the rest
202
 
 
 
 
 
 
 
203
 
 
 
204
 
205
- # This filter should take all the dates with unavs that include days with unavs in the range of the start and end date
 
206
 
 
 
 
207
 
208
- # This filter might take out the most recent identifiers (Message ID) that change the dates of unavailability of a plant.
209
- # This means that the actual unavailability is something else
210
- # filtered_df = filtered_id_df.copy()[(filtered_id_df.copy()['start_date'] <= end_date_str) & (filtered_id_df.copy()['end_date'] >= start_date_str)]
211
-
212
- # Need to eventually do a filter that takes the most restrictive updated identifier instead of the most recent when there
213
- # is an overlap
214
-
215
- # Update available_capacity where the condition is True
216
-
217
- # Standardize datetime in dataframe
218
- filtered_df2 = filtered_df.copy() # This code will just standardize datetime stuff
219
- filtered_df2['creation_date'] = pd.to_datetime(filtered_df2['creation_date'], utc=True)
220
- filtered_df2['updated_date'] = pd.to_datetime(filtered_df2['updated_date'], utc=True)
221
- filtered_df2['start_date'] = pd.to_datetime(filtered_df2['start_date'], utc=True)
222
- filtered_df2['end_date'] = pd.to_datetime(filtered_df2['end_date'], utc=True)
223
-
224
- # Drop the duplicates
225
- filtered_df3 = filtered_df2.copy().drop_duplicates()
226
-
227
- # start_date_datetime = pd.to_datetime(start_date_str, utc=True) # Remove timezone info
228
- start_date_datetime = pd.Timestamp(start_date_str, tz='UTC')
229
- # end_date_datetime = pd.to_datetime(end_date_str, utc=True)
230
- end_date_datetime = pd.Timestamp(end_date_str, tz='UTC')
231
-
232
- # Turn df into dict for json processing
233
- filtered_unavs = filtered_df3.copy().to_dict(orient='records')
234
-
235
- results = {}
236
-
237
- for unav in filtered_unavs:
238
- plant_name = unav['name']
239
- if plant_name in results:
240
- # If the key is already in the dictionary, append unavailability to the list
241
- results[plant_name].append({'status': unav['status'],
242
- 'id': unav['message_id'],
243
- 'creation_date': unav['creation_date'],
244
- 'updated_date': unav['updated_date'],
245
- 'start_date': unav['start_date'],
246
- 'end_date': unav['end_date'],
247
- 'available_capacity': unav['available_capacity']})
248
- else:
249
- # if the key of the plant is not there yet, create a new element of the dictionary
250
-
251
- # Get message_id instead of identifier, easier to identify stuff with it
252
- results[plant_name] = [{'status': unav['status'],
253
- 'id': unav['message_id'],
254
- 'creation_date': unav['creation_date'],
255
- 'updated_date': unav['updated_date'],
256
- 'start_date': unav['start_date'],
257
- 'end_date': unav['end_date'],
258
- 'available_capacity': unav['available_capacity']}]
259
-
260
- # Custom encoder to handle datetime objects
261
- class DateTimeEncoder(json.JSONEncoder):
262
- def default(self, o):
263
- if isinstance(o, datetime.datetime):
264
- return o.isoformat()
265
- return super().default(o)
266
-
267
- results_holder = results
268
-
269
- # Create new dict with each plant only having start_date less than user_end_date and an end_date greater than user_start_date
270
- # should just be doing the same as above in the df for filtering only dates that inclued the start and end date
271
- start_date = start_date_datetime.date()
272
- end_date = end_date_datetime.date()
273
- results_filtered = results_holder
274
- for key, value in results_filtered.items():
275
- filtered_values = []
276
- for item in value:
277
- item_start_date = item['start_date'].date()
278
- item_end_date = item['end_date'].date()
279
- identifier = item['id']
280
- if item_start_date < end_date and item_end_date > start_date and identifier not in filtered_values:
281
- filtered_values.append(item)
282
- results_filtered[key] = filtered_values
283
-
284
-
285
- sorted_results = results_filtered
286
- # --------------------- SECOND DATA CLEANING ------------------------ #
287
-
288
- # --------------------------- HERE IS THE FINAL PROCESS --------------------------- #
289
-
290
- for key, value in sorted_results.items():
291
- sorted_results[key] = sorted(value, key=lambda x: x['updated_date'])
292
-
293
- results_sorted = sorted_results
294
-
295
- dates_of_interest = [start_date] # We are creating a list of dates ranging from user specified start and end dates
296
- date_plus_one = start_date
297
-
298
- while date_plus_one < end_date:
299
- date_plus_one = date_plus_one + datetime.timedelta(days=1)
300
- dates_of_interest.append(date_plus_one)
301
-
302
- # This is to standardize the datetimes. Without this, the datetime calculations for each power plant will not work
303
- # This is just getting the plant metadata and giving it updated_date????? With an amount of items based on the length of the
304
- # date range????
305
- results_plants = {plant_name: {date: {"available_capacity": power, "updated_date": pd.to_datetime("1970-01-01", utc=True)} for date in dates_of_interest}
306
- for plant_name, power in plants_metadata.items()}
307
-
308
- # print(results_sorted)
309
- for plant, unavailabilities in results_sorted.items():
310
- # Get the full power of a given plant according to the sorted results
311
- original_power = plants_metadata[plant]
312
- # Get all the unavailabilities scheduled for the plant.
313
- # This is actually apparently just getting the metadata though???
314
- results_current_plant = results_plants[plant]
315
 
316
- for unavailability in unavailabilities:
317
- # For each unavailability, the resulting power, start and end datetime are collected. Need to collect updated_date
318
- power_unavailability = unavailability["available_capacity"]
319
- updated_date_unav = unavailability["updated_date"]
320
- # The date comes as a string
321
- start_datetime_unav = unavailability["start_date"]
322
- end_datetime_unav = unavailability["end_date"]
323
- start_date_unav = start_datetime_unav.date() # Extract date part
324
- end_date_unav = end_datetime_unav.date() # Extract date part
325
-
326
- # For the current unavailability, we want to find which days it affects
327
- for day in dates_of_interest:
328
-
329
- start_hour = start_datetime_unav.hour
330
- start_minute = start_datetime_unav.minute
331
- end_hour = end_datetime_unav.hour
332
- end_minute = end_datetime_unav.minute
333
-
334
- if start_date_unav <= day <= end_date_unav:
335
- # Check if the day is already updated with a later update_date
336
-
337
- if day in results_current_plant and updated_date_unav <= results_current_plant[day]["updated_date"]:
338
- # Here is likely where we can do the filter for worst case scenario
339
- # --------------------------- !!!!!!CREATE NEW FILTER THAT KEEPS ONLY MOST RESTRICTIVE OVERLAP!!!!!! --------------------------- #
340
- # if power_unavailability < results_current_plant[day]['available_capacity']:
341
-
342
- # # Calculate the % of the day that the plant is under maintenance
343
- # if start_date_unav == day and day == end_date_unav:
344
- # # The unavailability starts and ends on the same day
345
- # percentage_of_day = (end_hour * 60 + end_minute - start_hour * 60 - start_minute) / (24 * 60)
346
- # elif start_date_unav == day:
347
- # # The unavailability starts on the current day but ends on a later day
348
- # percentage_of_day = (24 * 60 - (start_hour * 60 + start_minute)) / (24 * 60)
349
- # elif day == end_date_unav:
350
- # # The unavailability starts on a previous day and ends on the current day
351
- # percentage_of_day = (end_hour * 60 + end_minute) / (24 * 60)
352
- # else:
353
- # # The unavailability covers the entire day
354
- # percentage_of_day = 1
355
-
356
- # --------------------------- !!!!!!CREATE NEW FILTER THAT KEEPS ONLY MOST RESTRICTIVE OVERLAP!!!!!! --------------------------- #
357
- # else:
358
-
359
- continue # Skip to the next loop if there is already information for a later update_date
360
-
361
- # Calculate the % of the day that the plant is under maintenance
362
- if start_date_unav == day and day == end_date_unav:
363
- # The unavailability starts and ends on the same day
364
- percentage_of_day = (end_hour * 60 + end_minute - start_hour * 60 - start_minute) / (24 * 60)
365
- elif start_date_unav == day:
366
- # The unavailability starts on the current day but ends on a later day
367
- percentage_of_day = (24 * 60 - (start_hour * 60 + start_minute)) / (24 * 60)
368
- elif day == end_date_unav:
369
- # The unavailability starts on a previous day and ends on the current day
370
- percentage_of_day = (end_hour * 60 + end_minute) / (24 * 60)
371
- else:
372
- # The unavailability covers the entire day
373
- percentage_of_day = 1
374
-
375
- # The average power of the day is calculated
376
- power_of_day = percentage_of_day * power_unavailability + (1 - percentage_of_day) * original_power
377
-
378
- # Update the available_capacity for the day only if it's not already updated with a later update_date
379
- if (day not in results_current_plant):
380
- results_current_plant[day] = {"available_capacity": power_of_day, "updated_date": updated_date_unav}
381
-
382
- elif (day in results_current_plant) and (updated_date_unav > results_current_plant[day]["updated_date"]) and (power_of_day < results_current_plant[day]['available_capacity']):
383
- results_current_plant[day] = {"available_capacity": power_of_day, "updated_date": updated_date_unav}
384
-
385
- else:
386
- continue
387
 
 
388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
  output_results = {}
391
  for plant, plant_data in results_plants.items():
392
  available_capacity_per_day = {str(date): data["available_capacity"] for date, data in plant_data.items()}
393
  output_results[plant] = available_capacity_per_day
394
 
395
- # print(output_results)
396
  add_total(output_results)
397
- # print("Done")
398
- # print(results_plants)
399
- # Convert datetime key to string to store in mongodb
400
  output_results = {plant: {str(date): power for date, power in plant_data.items()} for plant, plant_data in output_results.items()}
401
  output_results = pd.DataFrame(output_results)
402
- print(output_results)
403
 
404
  # -------------------------------------------------
405
  # Calculate the average of each column excluding the last row
@@ -407,7 +345,7 @@ def nuc_monitor(usr_start_date, usr_end_date, past_date, mongo_db_data):
407
 
408
  # Replace the last row with the calculated averages
409
  output_results.iloc[-1, :] = averages
410
-
411
  output_results = output_results.to_dict()
412
 
413
  def turn_total_row_to_avg(data):
@@ -418,8 +356,6 @@ def nuc_monitor(usr_start_date, usr_end_date, past_date, mongo_db_data):
418
 
419
  turn_total_row_to_avg(output_results)
420
 
421
- # print(output_results)
422
-
423
  json_data = json.dumps(output_results)
424
  # print(json_data)
425
  return json_data
@@ -537,10 +473,10 @@ def run_app():
537
  df_photo_date_2.index = pd.to_datetime(df_photo_date_2.index)
538
 
539
  # Calculate monthly averages with date in yyyy-mm format
540
- monthly_average_nucmonitor = df_nucmonitor_2.resample('M').mean()
541
  monthly_average_nucmonitor.index = monthly_average_nucmonitor.index.strftime('%Y-%m')
542
 
543
- monthly_average_photo_date = df_photo_date_2.resample('M').mean()
544
  monthly_average_photo_date.index = monthly_average_photo_date.index.strftime('%Y-%m')
545
 
546
 
 
24
  passw = "tN9XpCCQM2MtYDme"
25
  host = "nucmonitordata.xxcwx9k.mongodb.net"
26
  client = pymongo.MongoClient(
27
+ f"mongodb+srv://{user}:{passw}@{host}/?retryWrites=true&w=majority&connectTimeoutMS=100000"
28
  )
29
 
30
  db = client["data"]
31
  collection_past_unavs = db["unavs"]
32
+ collection_unavs = db["unavs_update"]
33
 
34
  start_date = f"{user_input_start_date}T00:00:00"
35
  end_date = f"{user_input_end_date}T23:59:59"
36
  past_date = f"{user_input_past_date}T23:59:59"
37
+
38
+
39
+ pipeline_v4 = [
40
+ # 1) Expand each results element into its own doc
41
+ { "$unwind": "$results" },
42
+
43
+ # 2) Expand each generation_unavailabilities element
44
+ { "$unwind": "$results.generation_unavailabilities" },
45
+
46
+ # 3) Keep only those that match your fuel_type + date criteria
47
  {
48
+ "$match": {
49
+ "results.generation_unavailabilities.production_type": "NUCLEAR",
50
+ "results.generation_unavailabilities.updated_date": { "$lte": past_date },
51
+ "results.generation_unavailabilities.start_date": { "$lte": end_date },
52
+ "results.generation_unavailabilities.start_date": { "$gte": start_date },
53
+ "results.generation_unavailabilities.end_date": { "$gte": start_date },
54
+ "results.generation_unavailabilities.end_date": { "$lte": end_date }
55
+
56
+ }
57
  },
58
+
59
+ # 4) Replace the entire document with just that sub-doc
60
  {
61
+ "$replaceRoot": {
62
+ "newRoot": "$results.generation_unavailabilities"
63
+ }
64
+ }
65
+ ]
66
+
67
+
68
+ pipeline_v6 = [
69
+ # 1) Expand each results element into its own doc
70
+ { "$unwind": "$results" },
71
+
72
+ # 2) Expand each generation_unavailabilities element
73
+ { "$unwind": "$results.generation_unavailabilities" },
74
+
75
+ # 3) Keep only those that match your fuel_type + date criteria
76
  {
77
  "$match": {
78
+ "results.generation_unavailabilities.fuel_type": "NUCLEAR",
79
+ "results.generation_unavailabilities.publication_date": { "$lte": past_date },
80
+ "results.generation_unavailabilities.start_date": { "$lte": end_date },
81
+ # "results.generation_unavailabilities.start_date": { "$gte": start_date },
82
+ "results.generation_unavailabilities.end_date": { "$gte": start_date },
83
+ # "results.generation_unavailabilities.end_date": { "$lte": end_date }
84
  }
85
  },
86
+
87
+ # 4) Replace the entire document with just that sub-doc
88
  {
89
+ "$replaceRoot": {
90
+ "newRoot": "$results.generation_unavailabilities"
 
91
  }
92
  }
93
  ]
94
 
95
+ result1 = list(collection_past_unavs.aggregate(pipeline_v4))
96
+ result2 = list(collection_unavs.aggregate(pipeline_v4))
97
+ result_v6 = list(collection_unavs.aggregate(pipeline_v6))
98
+ merge_results = result1 + result2 + result_v6
99
 
100
+ return merge_results
 
 
 
101
 
102
  # --------------------------------------------------------------------------------------- #
103
 
 
157
  # print(mongo_db_data)
158
  mongo_df = pd.DataFrame(mongo_db_data)
159
 
160
+ mongo_df = mongo_df[['identifier', 'version', 'updated_date', 'type', 'production_type', 'message_id', 'unit', 'status', 'values',
161
+ 'publication_date', 'unavailability_type', 'fuel_type',
162
+ 'affected_asset_or_unit_name',
163
+ 'affected_asset_or_unit_installed_capacity', 'event_status']]
164
 
165
+ # 1. Normalize “unit” into a DataFrame of its own
166
+ unit_expanded = pd.json_normalize(mongo_df["unit"])
167
+ # values_expanded = pd.json_normalize(mongo_df["values"])
168
 
169
+ # (This produces a new DF with columns “eic_code” and “name”.)
 
170
 
171
+ # 2. Concatenate those new columns back onto df, then drop the old “unit” column
172
+ mongo_df_2 = pd.concat([mongo_df.drop(columns=["unit"]), unit_expanded], axis=1)
173
+ # mongo_df_2 = pd.concat([mongo_df_2.drop(columns=["values"]), values_expanded], axis=1)
174
+ # 1. Create a temporary column that is “the first dict” of each list (or {} if empty/NaN)
175
+ mongo_df_2["values_first"] = mongo_df_2["values"].apply(
176
+ lambda lst: lst[0] if isinstance(lst, list) and len(lst) > 0 else {}
177
+ )
 
 
 
 
 
178
 
179
+ # 2. Normalize that dict into separate columns
180
+ values_expanded = pd.json_normalize(mongo_df_2["values_first"])
181
+ # e.g. this produces columns like “start_date”, “end_date”, etc.
182
+
183
+ # 3. Concatenate back and drop the originals
184
+ mongo_df_2 = pd.concat(
185
+ [
186
+ mongo_df_2.drop(columns=["values", "values_first"]),
187
+ values_expanded
188
+ ],
189
+ axis=1
190
+ )
191
 
192
+ mongo_df_2["fuel_type"] = mongo_df_2["fuel_type"].combine_first(mongo_df_2["production_type"])
193
+ mongo_df_2["publication_date"] = mongo_df_2["publication_date"].combine_first(mongo_df_2["updated_date"])
194
+ mongo_df_2["event_status"] = mongo_df_2["event_status"].combine_first(mongo_df_2["status"])
195
+ mongo_df_2["affected_asset_or_unit_installed_capacity"] = mongo_df_2["affected_asset_or_unit_installed_capacity"].combine_first(mongo_df_2["installed_capacity"])
196
+ mongo_df_2["affected_asset_or_unit_name"] = mongo_df_2["affected_asset_or_unit_name"].combine_first(mongo_df_2["name"])
197
+ mongo_df_2["unavailability_type"] = (
198
+ mongo_df_2["unavailability_type"]
199
+ .combine_first(mongo_df_2.loc[:, "type"].iloc[:, 0])
200
+ )
201
 
202
+ mongo_df_2 = mongo_df_2.drop(columns=["production_type", "updated_date", "status", "installed_capacity", "name", "type", "eic_code"])
 
 
 
 
 
 
 
203
 
204
+ # Convert the date columns to datetime objects
205
+ for col in ["publication_date", "start_date", "end_date"]:
206
+ mongo_df_2[col] = pd.to_datetime(mongo_df_2[col], utc=True)
 
207
 
208
+ # # Now convert everything to French time (CET/CEST):
209
+ # for col in ["publication_date", "start_date", "end_date"]:
210
+ # mongo_df_2[col] = mongo_df_2[col].dt.tz_convert("Europe/Paris")
211
 
212
+ # mongo_df_2 = mongo_df_2.drop_duplicates(subset='identifier', keep='first')
213
+ mongo_df_2['version'] = mongo_df_2['version'].astype(int)
214
+ # Sort by identifier and version to ensure the latest version is at the top
215
+ # Method 1: Use groupby + idxmax to pick the row with the largest version per identifier
216
+ idx = mongo_df_2.groupby("identifier")["version"].idxmax()
217
+ mongo_df_2 = mongo_df_2.loc[idx].reset_index(drop=True)
218
 
219
+ mongo_df_2 = mongo_df_2[mongo_df_2['event_status'] != 'DISMISSED']
 
220
 
 
 
221
 
222
+ # Create the final dataframe
223
+ final_df = pd.DataFrame()
224
 
225
+ # Create the date column, with date range from start_date to end_date in daily granularity
226
+ final_df['Date'] = pd.date_range(start=usr_start_date, end=usr_end_date, freq='D')
227
+ final_df['Date'] = [ts.strftime("%Y-%m-%d") for ts in final_df['Date']]
228
 
229
+ # For each plant create a new column with the plant name
230
+ for plant, capacity in plants_metadata.items():
231
+ # Create a new column for each plant
232
+ final_df[plant] = np.nan # Initialize with zeros
233
 
234
+ mongo_df_3 = mongo_df_2.copy()
 
 
 
 
235
 
236
+ dates_of_interest = list(pd.date_range(start=usr_start_date, end=usr_end_date, freq="D"))
237
 
238
+ # Now convert each Timestamp → “YYYY-MM-DD” string:
239
+ dates_of_interest = [ts.strftime("%Y-%m-%d") for ts in dates_of_interest]
240
 
241
+ mongo_df_3['start_day'] = mongo_df_3['start_date'].dt.day
242
+ mongo_df_3['start_hour'] = mongo_df_3['start_date'].dt.hour
243
+ mongo_df_3['start_minute'] = mongo_df_3['start_date'].dt.minute
244
+ mongo_df_3['end_day'] = mongo_df_3['end_date'].dt.day
245
+ mongo_df_3['end_hour'] = mongo_df_3['end_date'].dt.hour
246
+ mongo_df_3['end_minute'] = mongo_df_3['end_date'].dt.minute
247
 
248
+ # mongo_df_3 = mongo_df_3.sort_values(by=['publication_date'], ascending=False)
249
+ mongo_df_3 = mongo_df_3.sort_values(by=['publication_date'])
250
 
251
+ # results_plants = {plant_name: {date: {"available_capacity": power, "publication_date": pd.to_datetime("1970-01-01", utc=True)} for date in dates_of_interest}
252
+ # for plant_name, power in plants_metadata.items()}
253
 
254
+ results_plants = {plant_name: {date: {"available_capacity": power, "publication_date": pd.to_datetime("1970-01-01", utc=True)}
255
+ for date in dates_of_interest}
256
+ for plant_name, power in plants_metadata.items()}
257
 
258
+ for row in mongo_df_3.itertuples():
259
+ # Get the start and end dates for the unavailability
260
+ row_start_date = str(row.start_date.date())
261
+ row_end_date = str(row.end_date.date())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
+ # Get the plant name and capacity
264
+ plant_name = row.affected_asset_or_unit_name
265
+ plant_capacity = plants_metadata.get(plant_name, 0) # Default to 0 if not found
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
+ results_current_plant = results_plants[plant_name]
268
 
269
+ power_unavailability = row.available_capacity
270
+ publication_date_unav = row.publication_date
271
+
272
+ for day in dates_of_interest:
273
+ # percentage_of_day = results_current_plant[day]["percentage_of_day"]
274
+
275
+
276
+ if row_start_date <= day <= row_end_date:
277
+
278
+ # Check if the day is already updated with a later (more recent) update_date; by sorting the DataFrame by publication_date,
279
+ # we ensure that the latest unavailability is applied
280
+ # Get the percentage of day that the plant is unavailable
281
+
282
+ # if day in final_df['Date'] and pd.notna(final_df.loc[final_df['Date'] == day, plant_name]).any():
283
+ if (day in results_current_plant) and (publication_date_unav <= results_current_plant[day]["publication_date"]):
284
+ # If the plant's capacity for that day is already set, skip to the next day
285
+ continue
286
+
287
+ # The unavailability starts and ends on the same day
288
+ if row_start_date == day and day == row_end_date:
289
+ percentage_of_day = (row.end_hour * 60 + row.end_minute - row.start_hour * 60 - row.start_minute) / (24 * 60)
290
+ # results_current_plant[day]["percentage_of_day"] += percentage_of_day
291
+ # power_of_day = percentage_of_day * row.available_capacity + (1 - percentage_of_day) * plant_capacity
292
+ # final_df.loc[final_df['Date'] == day, plant_name] = power_of_day
293
+
294
+ # The unavailability starts on the current day but ends on a later day
295
+ elif row_start_date == day and day < row_end_date:
296
+ percentage_of_day = (24 * 60 - (row.start_hour * 60 + row.start_minute)) / (24 * 60)
297
+ # results_current_plant[day]["percentage_of_day"] += percentage_of_day
298
+
299
+ # power_of_day = percentage_of_day * row.available_capacity + (1 - percentage_of_day) * plant_capacity
300
+ # final_df.loc[final_df['Date'] == day, plant_name] = power_of_day
301
+
302
+ # # The unavailability starts on a previous day and ends on the current day
303
+ elif row_end_date == day and row_start_date < day:
304
+ percentage_of_day = (row.end_hour * 60 + row.end_minute) / (24 * 60)
305
+ # results_current_plant[day]["percentage_of_day"] += percentage_of_day
306
+
307
+ # power_of_day = percentage_of_day * row.available_capacity + (1 - percentage_of_day) * plant_capacity
308
+ # final_df.loc[final_df['Date'] == day, plant_name] = power_of_day
309
+
310
+ else:
311
+ # The unavailability starts on a previous day and ends on a later day
312
+ percentage_of_day = 1
313
+ # power_of_day = percentage_of_day * row.available_capacity + (1 - percentage_of_day) * plant_capacity
314
+ # final_df.loc[final_df['Date'] == day, plant_name] = power_of_day
315
+
316
+ power_of_day = percentage_of_day * power_unavailability + (1 - percentage_of_day) * plant_capacity
317
+
318
+ # Update the available_capacity for the day only if it's not already updated with a later update_date
319
+ if (day not in results_current_plant):
320
+ results_current_plant[day] = {"available_capacity": power_of_day, "publication_date": publication_date_unav}
321
+
322
+ elif (day in results_current_plant) and (publication_date_unav > results_current_plant[day]["publication_date"]) \
323
+ and (power_of_day < results_current_plant[day]['available_capacity']):
324
+ # results_current_plant[day]["available_capacity"] *= power_of_day
325
+ # results_current_plant[day]["publication_date"] = publication_date_unav
326
+
327
+ results_current_plant[day] = {"available_capacity": power_of_day, "publication_date": publication_date_unav}
328
+
329
+ else:
330
+ continue
331
 
332
  output_results = {}
333
  for plant, plant_data in results_plants.items():
334
  available_capacity_per_day = {str(date): data["available_capacity"] for date, data in plant_data.items()}
335
  output_results[plant] = available_capacity_per_day
336
 
 
337
  add_total(output_results)
338
+
 
 
339
  output_results = {plant: {str(date): power for date, power in plant_data.items()} for plant, plant_data in output_results.items()}
340
  output_results = pd.DataFrame(output_results)
 
341
 
342
  # -------------------------------------------------
343
  # Calculate the average of each column excluding the last row
 
345
 
346
  # Replace the last row with the calculated averages
347
  output_results.iloc[-1, :] = averages
348
+
349
  output_results = output_results.to_dict()
350
 
351
  def turn_total_row_to_avg(data):
 
356
 
357
  turn_total_row_to_avg(output_results)
358
 
 
 
359
  json_data = json.dumps(output_results)
360
  # print(json_data)
361
  return json_data
 
473
  df_photo_date_2.index = pd.to_datetime(df_photo_date_2.index)
474
 
475
  # Calculate monthly averages with date in yyyy-mm format
476
+ monthly_average_nucmonitor = df_nucmonitor_2.resample('ME').mean()
477
  monthly_average_nucmonitor.index = monthly_average_nucmonitor.index.strftime('%Y-%m')
478
 
479
+ monthly_average_photo_date = df_photo_date_2.resample('ME').mean()
480
  monthly_average_photo_date.index = monthly_average_photo_date.index.strftime('%Y-%m')
481
 
482
 
app_all.py CHANGED
@@ -24,80 +24,48 @@ def mongo_unavs_call(user_input_start_date, user_input_end_date, user_input_past
24
  passw = "tN9XpCCQM2MtYDme"
25
  host = "nucmonitordata.xxcwx9k.mongodb.net"
26
  client = pymongo.MongoClient(
27
- f"mongodb+srv://{user}:{passw}@{host}/?retryWrites=true&w=majority&connectTimeoutMS=100000"
28
  )
29
 
30
  db = client["data"]
31
  collection_past_unavs = db["unavs"]
32
- collection_unavs = db["unavs_update"]
33
 
34
  start_date = f"{user_input_start_date}T00:00:00"
35
  end_date = f"{user_input_end_date}T23:59:59"
36
  past_date = f"{user_input_past_date}T23:59:59"
37
-
38
-
39
- pipeline_v4 = [
40
- # 1) Expand each results element into its own doc
41
- { "$unwind": "$results" },
42
-
43
- # 2) Expand each generation_unavailabilities element
44
- { "$unwind": "$results.generation_unavailabilities" },
45
-
46
- # 3) Keep only those that match your fuel_type + date criteria
47
  {
48
- "$match": {
49
- "results.generation_unavailabilities.production_type": "NUCLEAR",
50
- "results.generation_unavailabilities.updated_date": { "$lte": past_date },
51
- "results.generation_unavailabilities.start_date": { "$lte": end_date },
52
- "results.generation_unavailabilities.start_date": { "$gte": start_date },
53
- "results.generation_unavailabilities.end_date": { "$gte": start_date },
54
- "results.generation_unavailabilities.end_date": { "$lte": end_date }
55
-
56
- }
57
  },
58
-
59
- # 4) Replace the entire document with just that sub-doc
60
  {
61
- "$replaceRoot": {
62
- "newRoot": "$results.generation_unavailabilities"
63
- }
64
- }
65
- ]
66
-
67
-
68
- pipeline_v6 = [
69
- # 1) Expand each results element into its own doc
70
- { "$unwind": "$results" },
71
-
72
- # 2) Expand each generation_unavailabilities element
73
- { "$unwind": "$results.generation_unavailabilities" },
74
-
75
- # 3) Keep only those that match your fuel_type + date criteria
76
  {
77
  "$match": {
78
- "results.generation_unavailabilities.fuel_type": "NUCLEAR",
79
- "results.generation_unavailabilities.publication_date": { "$lte": past_date },
80
- "results.generation_unavailabilities.start_date": { "$lte": end_date },
81
- # "results.generation_unavailabilities.start_date": { "$gte": start_date },
82
- "results.generation_unavailabilities.end_date": { "$gte": start_date },
83
- # "results.generation_unavailabilities.end_date": { "$lte": end_date }
84
  }
85
  },
86
-
87
- # 4) Replace the entire document with just that sub-doc
88
  {
89
- "$replaceRoot": {
90
- "newRoot": "$results.generation_unavailabilities"
 
91
  }
92
  }
93
  ]
94
 
95
- result1 = list(collection_past_unavs.aggregate(pipeline_v4))
96
- result2 = list(collection_unavs.aggregate(pipeline_v4))
97
- result_v6 = list(collection_unavs.aggregate(pipeline_v6))
98
- merge_results = result1 + result2 + result_v6
99
 
100
- return merge_results
 
 
 
101
 
102
  # --------------------------------------------------------------------------------------- #
103
 
@@ -135,19 +103,19 @@ def nuc_monitor(usr_start_date, usr_end_date, past_date, mongo_db_data):
135
  # # Slightly changed metadata to fit the data from the RTE API: ST-LAURENT B 2 --> ST LAURENT 2, ....
136
 
137
  plants_metadata = {"BELLEVILLE 1": 1310.0, "BELLEVILLE 2": 1310.0, "BLAYAIS 1": 910.0, "BLAYAIS 2": 910.0,
138
- "BLAYAIS 3": 910.0, "BLAYAIS 4": 910.0, "BUGEY 2": 910.0, "BUGEY 3": 910.0, "BUGEY 4": 880.0,
139
- "BUGEY 5": 880.0, "CATTENOM 1": 1300.0, "CATTENOM 2": 1300.0, "CATTENOM 3": 1300.0,
140
- "CATTENOM 4": 1300.0, "CHINON 1": 905.0, "CHINON 2": 905.0, "CHINON 3": 905.0,
141
- "CHINON 4": 905.0, "CHOOZ 1": 1500.0, "CHOOZ 2": 1500.0, "CIVAUX 1": 1495.0,
142
- "CIVAUX 2": 1495.0, "CRUAS 1": 915.0, "CRUAS 2": 915.0, "CRUAS 3": 915.0, "CRUAS 4": 915.0,
143
- "DAMPIERRE 1": 890.0, "DAMPIERRE 2": 890.0, "DAMPIERRE 3": 890.0, "DAMPIERRE 4": 890.0,
144
- "FLAMANVILLE 1": 1330.0, "FLAMANVILLE 2": 1330.0, "FLAMANVILLE 3": 1620.0, "GOLFECH 1": 1310.0, "GOLFECH 2": 1310.0,
145
- "GRAVELINES 1": 910.0, "GRAVELINES 2": 910.0, "GRAVELINES 3": 910.0, "GRAVELINES 4": 910.0,
146
- "GRAVELINES 5": 910.0, "GRAVELINES 6": 910.0, "NOGENT 1": 1310.0, "NOGENT 2": 1310.0,
147
- "PALUEL 1": 1330.0, "PALUEL 2": 1330.0, "PALUEL 3": 1330.0, "PALUEL 4": 1330.0, "PENLY 1": 1330.0,
148
- "PENLY 2": 1330.0, "ST ALBAN 1": 1335.0, "ST ALBAN 2": 1335.0, "ST LAURENT 1": 915.0,
149
- "ST LAURENT 2": 915.0, "TRICASTIN 1": 915.0, "TRICASTIN 2": 915.0, "TRICASTIN 3": 915.0,
150
- "TRICASTIN 4": 915.0, "FESSENHEIM 1": 0.0, "FESSENHEIM 2": 0.0}
151
 
152
  # --------------------- INITIAL DATA CLEANING FOR MONGO DATA ------------------------ #
153
 
@@ -157,187 +125,281 @@ def nuc_monitor(usr_start_date, usr_end_date, past_date, mongo_db_data):
157
  # print(mongo_db_data)
158
  mongo_df = pd.DataFrame(mongo_db_data)
159
 
160
- mongo_df = mongo_df[['identifier', 'version', 'updated_date', 'type', 'production_type', 'message_id', 'unit', 'status', 'values',
161
- 'publication_date', 'unavailability_type', 'fuel_type',
162
- 'affected_asset_or_unit_name',
163
- 'affected_asset_or_unit_installed_capacity', 'event_status']]
164
 
165
- # 1. Normalize “unit” into a DataFrame of its own
166
- unit_expanded = pd.json_normalize(mongo_df["unit"])
167
- # values_expanded = pd.json_normalize(mongo_df["values"])
168
 
169
- # (This produces a new DF with columns “eic_code” and “name”.)
 
170
 
171
- # 2. Concatenate those new columns back onto df, then drop the old “unit” column
172
- mongo_df_2 = pd.concat([mongo_df.drop(columns=["unit"]), unit_expanded], axis=1)
173
- # mongo_df_2 = pd.concat([mongo_df_2.drop(columns=["values"]), values_expanded], axis=1)
174
- # 1. Create a temporary column that is “the first dict” of each list (or {} if empty/NaN)
175
- mongo_df_2["values_first"] = mongo_df_2["values"].apply(
176
- lambda lst: lst[0] if isinstance(lst, list) and len(lst) > 0 else {}
177
- )
 
 
 
178
 
179
- # 2. Normalize that dict into separate columns
180
- values_expanded = pd.json_normalize(mongo_df_2["values_first"])
181
- # e.g. this produces columns like “start_date”, “end_date”, etc.
182
-
183
- # 3. Concatenate back and drop the originals
184
- mongo_df_2 = pd.concat(
185
- [
186
- mongo_df_2.drop(columns=["values", "values_first"]),
187
- values_expanded
188
- ],
189
- axis=1
190
- )
191
 
192
- mongo_df_2["fuel_type"] = mongo_df_2["fuel_type"].combine_first(mongo_df_2["production_type"])
193
- mongo_df_2["publication_date"] = mongo_df_2["publication_date"].combine_first(mongo_df_2["updated_date"])
194
- mongo_df_2["event_status"] = mongo_df_2["event_status"].combine_first(mongo_df_2["status"])
195
- mongo_df_2["affected_asset_or_unit_installed_capacity"] = mongo_df_2["affected_asset_or_unit_installed_capacity"].combine_first(mongo_df_2["installed_capacity"])
196
- mongo_df_2["affected_asset_or_unit_name"] = mongo_df_2["affected_asset_or_unit_name"].combine_first(mongo_df_2["name"])
197
- mongo_df_2["unavailability_type"] = (
198
- mongo_df_2["unavailability_type"]
199
- .combine_first(mongo_df_2.loc[:, "type"].iloc[:, 0])
200
- )
201
 
202
- mongo_df_2 = mongo_df_2.drop(columns=["production_type", "updated_date", "status", "installed_capacity", "name", "type", "eic_code"])
 
 
 
 
 
 
 
203
 
204
- # Convert the date columns to datetime objects
205
- for col in ["publication_date", "start_date", "end_date"]:
206
- mongo_df_2[col] = pd.to_datetime(mongo_df_2[col], utc=True)
 
207
 
208
- # # Now convert everything to French time (CET/CEST):
209
- # for col in ["publication_date", "start_date", "end_date"]:
210
- # mongo_df_2[col] = mongo_df_2[col].dt.tz_convert("Europe/Paris")
211
 
212
- # mongo_df_2 = mongo_df_2.drop_duplicates(subset='identifier', keep='first')
213
- mongo_df_2['version'] = mongo_df_2['version'].astype(int)
214
- # Sort by identifier and version to ensure the latest version is at the top
215
- # Method 1: Use groupby + idxmax to pick the row with the largest version per identifier
216
- idx = mongo_df_2.groupby("identifier")["version"].idxmax()
217
- mongo_df_2 = mongo_df_2.loc[idx].reset_index(drop=True)
218
 
219
- mongo_df_2 = mongo_df_2[mongo_df_2['event_status'] != 'DISMISSED']
 
220
 
 
 
221
 
222
- # Create the final dataframe
223
- final_df = pd.DataFrame()
224
 
225
- # Create the date column, with date range from start_date to end_date in daily granularity
226
- final_df['Date'] = pd.date_range(start=usr_start_date, end=usr_end_date, freq='D')
227
- final_df['Date'] = [ts.strftime("%Y-%m-%d") for ts in final_df['Date']]
228
 
229
- # For each plant create a new column with the plant name
230
- for plant, capacity in plants_metadata.items():
231
- # Create a new column for each plant
232
- final_df[plant] = np.nan # Initialize with zeros
233
 
234
- mongo_df_3 = mongo_df_2.copy()
 
 
 
 
235
 
236
- dates_of_interest = list(pd.date_range(start=usr_start_date, end=usr_end_date, freq="D"))
237
 
238
- # Now convert each Timestamp → “YYYY-MM-DD” string:
239
- dates_of_interest = [ts.strftime("%Y-%m-%d") for ts in dates_of_interest]
240
 
241
- mongo_df_3['start_day'] = mongo_df_3['start_date'].dt.day
242
- mongo_df_3['start_hour'] = mongo_df_3['start_date'].dt.hour
243
- mongo_df_3['start_minute'] = mongo_df_3['start_date'].dt.minute
244
- mongo_df_3['end_day'] = mongo_df_3['end_date'].dt.day
245
- mongo_df_3['end_hour'] = mongo_df_3['end_date'].dt.hour
246
- mongo_df_3['end_minute'] = mongo_df_3['end_date'].dt.minute
247
 
248
- # mongo_df_3 = mongo_df_3.sort_values(by=['publication_date'], ascending=False)
249
- mongo_df_3 = mongo_df_3.sort_values(by=['publication_date'])
250
 
251
- # results_plants = {plant_name: {date: {"available_capacity": power, "publication_date": pd.to_datetime("1970-01-01", utc=True)} for date in dates_of_interest}
252
- # for plant_name, power in plants_metadata.items()}
253
 
254
- results_plants = {plant_name: {date: {"available_capacity": power, "publication_date": pd.to_datetime("1970-01-01", utc=True)}
255
- for date in dates_of_interest}
256
- for plant_name, power in plants_metadata.items()}
257
 
258
- for row in mongo_df_3.itertuples():
259
- # Get the start and end dates for the unavailability
260
- row_start_date = str(row.start_date.date())
261
- row_end_date = str(row.end_date.date())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
- # Get the plant name and capacity
264
- plant_name = row.affected_asset_or_unit_name
265
- plant_capacity = plants_metadata.get(plant_name, 0) # Default to 0 if not found
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
- results_current_plant = results_plants[plant_name]
268
 
269
- power_unavailability = row.available_capacity
270
- publication_date_unav = row.publication_date
271
-
272
- for day in dates_of_interest:
273
- # percentage_of_day = results_current_plant[day]["percentage_of_day"]
274
-
275
-
276
- if row_start_date <= day <= row_end_date:
277
-
278
- # Check if the day is already updated with a later (more recent) update_date; by sorting the DataFrame by publication_date,
279
- # we ensure that the latest unavailability is applied
280
- # Get the percentage of day that the plant is unavailable
281
-
282
- # if day in final_df['Date'] and pd.notna(final_df.loc[final_df['Date'] == day, plant_name]).any():
283
- if (day in results_current_plant) and (publication_date_unav <= results_current_plant[day]["publication_date"]):
284
- # If the plant's capacity for that day is already set, skip to the next day
285
- continue
286
-
287
- # The unavailability starts and ends on the same day
288
- if row_start_date == day and day == row_end_date:
289
- percentage_of_day = (row.end_hour * 60 + row.end_minute - row.start_hour * 60 - row.start_minute) / (24 * 60)
290
- # results_current_plant[day]["percentage_of_day"] += percentage_of_day
291
- # power_of_day = percentage_of_day * row.available_capacity + (1 - percentage_of_day) * plant_capacity
292
- # final_df.loc[final_df['Date'] == day, plant_name] = power_of_day
293
-
294
- # The unavailability starts on the current day but ends on a later day
295
- elif row_start_date == day and day < row_end_date:
296
- percentage_of_day = (24 * 60 - (row.start_hour * 60 + row.start_minute)) / (24 * 60)
297
- # results_current_plant[day]["percentage_of_day"] += percentage_of_day
298
-
299
- # power_of_day = percentage_of_day * row.available_capacity + (1 - percentage_of_day) * plant_capacity
300
- # final_df.loc[final_df['Date'] == day, plant_name] = power_of_day
301
-
302
- # # The unavailability starts on a previous day and ends on the current day
303
- elif row_end_date == day and row_start_date < day:
304
- percentage_of_day = (row.end_hour * 60 + row.end_minute) / (24 * 60)
305
- # results_current_plant[day]["percentage_of_day"] += percentage_of_day
306
-
307
- # power_of_day = percentage_of_day * row.available_capacity + (1 - percentage_of_day) * plant_capacity
308
- # final_df.loc[final_df['Date'] == day, plant_name] = power_of_day
309
-
310
- else:
311
- # The unavailability starts on a previous day and ends on a later day
312
- percentage_of_day = 1
313
- # power_of_day = percentage_of_day * row.available_capacity + (1 - percentage_of_day) * plant_capacity
314
- # final_df.loc[final_df['Date'] == day, plant_name] = power_of_day
315
-
316
- power_of_day = percentage_of_day * power_unavailability + (1 - percentage_of_day) * plant_capacity
317
-
318
- # Update the available_capacity for the day only if it's not already updated with a later update_date
319
- if (day not in results_current_plant):
320
- results_current_plant[day] = {"available_capacity": power_of_day, "publication_date": publication_date_unav}
321
-
322
- elif (day in results_current_plant) and (publication_date_unav > results_current_plant[day]["publication_date"]) \
323
- and (power_of_day < results_current_plant[day]['available_capacity']):
324
- # results_current_plant[day]["available_capacity"] *= power_of_day
325
- # results_current_plant[day]["publication_date"] = publication_date_unav
326
-
327
- results_current_plant[day] = {"available_capacity": power_of_day, "publication_date": publication_date_unav}
328
-
329
- else:
330
- continue
331
 
332
  output_results = {}
333
  for plant, plant_data in results_plants.items():
334
  available_capacity_per_day = {str(date): data["available_capacity"] for date, data in plant_data.items()}
335
  output_results[plant] = available_capacity_per_day
336
 
 
337
  add_total(output_results)
338
-
 
 
339
  output_results = {plant: {str(date): power for date, power in plant_data.items()} for plant, plant_data in output_results.items()}
340
  output_results = pd.DataFrame(output_results)
 
341
 
342
  # -------------------------------------------------
343
  # Calculate the average of each column excluding the last row
@@ -345,7 +407,7 @@ def nuc_monitor(usr_start_date, usr_end_date, past_date, mongo_db_data):
345
 
346
  # Replace the last row with the calculated averages
347
  output_results.iloc[-1, :] = averages
348
-
349
  output_results = output_results.to_dict()
350
 
351
  def turn_total_row_to_avg(data):
@@ -356,6 +418,8 @@ def nuc_monitor(usr_start_date, usr_end_date, past_date, mongo_db_data):
356
 
357
  turn_total_row_to_avg(output_results)
358
 
 
 
359
  json_data = json.dumps(output_results)
360
  # print(json_data)
361
  return json_data
@@ -473,10 +537,10 @@ def run_app():
473
  df_photo_date_2.index = pd.to_datetime(df_photo_date_2.index)
474
 
475
  # Calculate monthly averages with date in yyyy-mm format
476
- monthly_average_nucmonitor = df_nucmonitor_2.resample('ME').mean()
477
  monthly_average_nucmonitor.index = monthly_average_nucmonitor.index.strftime('%Y-%m')
478
 
479
- monthly_average_photo_date = df_photo_date_2.resample('ME').mean()
480
  monthly_average_photo_date.index = monthly_average_photo_date.index.strftime('%Y-%m')
481
 
482
 
 
24
  passw = "tN9XpCCQM2MtYDme"
25
  host = "nucmonitordata.xxcwx9k.mongodb.net"
26
  client = pymongo.MongoClient(
27
+ f"mongodb+srv://{user}:{passw}@{host}/?retryWrites=true&w=majority&connectTimeoutMS=5000"
28
  )
29
 
30
  db = client["data"]
31
  collection_past_unavs = db["unavs"]
32
+ collection_unavs =db["unavs_update"]
33
 
34
  start_date = f"{user_input_start_date}T00:00:00"
35
  end_date = f"{user_input_end_date}T23:59:59"
36
  past_date = f"{user_input_past_date}T23:59:59"
37
+
38
+ pipeline = [
 
 
 
 
 
 
 
 
39
  {
40
+ "$unwind": "$results"
 
 
 
 
 
 
 
 
41
  },
 
 
42
  {
43
+ "$unwind": "$results.generation_unavailabilities"
44
+ },
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  {
46
  "$match": {
47
+ "results.generation_unavailabilities.production_type": "NUCLEAR",
48
+ # "results.generation_unavailabilities.start_date": {"$lte": end_date},
49
+ # "results.generation_unavailabilities.end_date": {"$gte": start_date},
50
+ # "results.generation_unavailabilities.updated_date": {"$lte": end_date}
51
+ "results.generation_unavailabilities.updated_date": {"$lte": past_date}
 
52
  }
53
  },
 
 
54
  {
55
+ "$project": {
56
+ "_id": 0,
57
+ "generation_unavailabilities": "$results.generation_unavailabilities"
58
  }
59
  }
60
  ]
61
 
62
+ result1 = list(collection_past_unavs.aggregate(pipeline))
63
+ result2 = list(collection_unavs.aggregate(pipeline))
 
 
64
 
65
+ # Merge the two lists of JSON results
66
+ merged_result = result1 + result2
67
+
68
+ return merged_result
69
 
70
  # --------------------------------------------------------------------------------------- #
71
 
 
103
  # # Slightly changed metadata to fit the data from the RTE API: ST-LAURENT B 2 --> ST LAURENT 2, ....
104
 
105
  plants_metadata = {"BELLEVILLE 1": 1310.0, "BELLEVILLE 2": 1310.0, "BLAYAIS 1": 910.0, "BLAYAIS 2": 910.0,
106
+ "BLAYAIS 3": 910.0, "BLAYAIS 4": 910.0, "BUGEY 2": 910.0, "BUGEY 3": 910.0, "BUGEY 4": 880.0,
107
+ "BUGEY 5": 880.0, "CATTENOM 1": 1300.0, "CATTENOM 2": 1300.0, "CATTENOM 3": 1300.0,
108
+ "CATTENOM 4": 1300.0, "CHINON 1": 905.0, "CHINON 2": 905.0, "CHINON 3": 905.0,
109
+ "CHINON 4": 905.0, "CHOOZ 1": 1500.0, "CHOOZ 2": 1500.0, "CIVAUX 1": 1495.0,
110
+ "CIVAUX 2": 1495.0, "CRUAS 1": 915.0, "CRUAS 2": 915.0, "CRUAS 3": 915.0, "CRUAS 4": 915.0,
111
+ "DAMPIERRE 1": 890.0, "DAMPIERRE 2": 890.0, "DAMPIERRE 3": 890.0, "DAMPIERRE 4": 890.0,
112
+ "FLAMANVILLE 1": 1330.0, "FLAMANVILLE 2": 1330.0, "GOLFECH 1": 1310.0, "GOLFECH 2": 1310.0,
113
+ "GRAVELINES 1": 910.0, "GRAVELINES 2": 910.0, "GRAVELINES 3": 910.0, "GRAVELINES 4": 910.0,
114
+ "GRAVELINES 5": 910.0, "GRAVELINES 6": 910.0, "NOGENT 1": 1310.0, "NOGENT 2": 1310.0,
115
+ "PALUEL 1": 1330.0, "PALUEL 2": 1330.0, "PALUEL 3": 1330.0, "PALUEL 4": 1330.0, "PENLY 1": 1330.0,
116
+ "PENLY 2": 1330.0, "ST ALBAN 1": 1335.0, "ST ALBAN 2": 1335.0, "ST LAURENT 1": 915.0,
117
+ "ST LAURENT 2": 915.0, "TRICASTIN 1": 915.0, "TRICASTIN 2": 915.0, "TRICASTIN 3": 915.0,
118
+ "TRICASTIN 4": 915.0, "FESSENHEIM 1": 880.0, "FESSENHEIM 2": 880.0}
119
 
120
  # --------------------- INITIAL DATA CLEANING FOR MONGO DATA ------------------------ #
121
 
 
125
  # print(mongo_db_data)
126
  mongo_df = pd.DataFrame(mongo_db_data)
127
 
128
+ # print(mongo_df)
129
+ # Unpack the dictionaries into separate columns
130
+ mongo_df_unpacked = pd.json_normalize(mongo_df['generation_unavailabilities'])
 
131
 
132
+ # Concatenate the unpacked columns with the original DataFrame
133
+ mongo_df_result = pd.concat([mongo_df, mongo_df_unpacked], axis=1)
 
134
 
135
+ # Drop the original column
136
+ mongo_df_result.drop(columns=['generation_unavailabilities'], inplace=True)
137
 
138
+ mongo_df_result['start_date'] = mongo_df_result['values'].apply(lambda x: x[0]['start_date'])
139
+ mongo_df_result['end_date'] = mongo_df_result['values'].apply(lambda x: x[0]['end_date'])
140
+ mongo_df_result['available_capacity'] = mongo_df_result['values'].apply(lambda x: x[0]['available_capacity'])
141
+ mongo_df_result['unavailable_capacity'] = mongo_df_result['values'].apply(lambda x: x[0]['unavailable_capacity'])
142
+ # print(mongo_df_result)
143
+ # print(mongo_df_result.columns)
144
+ # Drop the original 'values' column
145
+ mongo_df_result.drop('values', axis=1, inplace=True)
146
+ mongo_df2 = mongo_df_result
147
+ mongo_df2.rename(columns=lambda col: col.replace('unit.', ''), inplace=True)
148
 
149
+ # --------------------- INITIAL DATA CLEANING FOR MONGO DATA ------------------------ #
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ # Make the two dataframes have the same columns
152
+ mongo_unavs = mongo_df2.copy()
153
+ mongo_unavs.drop(columns="type", inplace=True)
154
+
155
+ # merged_df['updated_date'] = merged_df['updated_date'].astype(str)
 
 
 
 
156
 
157
+ # --------------------------- HERE IS THE CHANGE TO GET ONLY ACTIVE OR ACTIVE AND INACTIVE --------------------------- #
158
+ # start_date_str = usr_start_date.strftime("%Y-%m-%d")
159
+ start_date_str = str(usr_start_date)
160
+ # end_date_str = usr_end_date.strftime("%Y-%m-%d")
161
+ end_date_str = str(usr_end_date)
162
+ current_datetime = datetime.datetime.now()
163
+ past_date_str = str(past_date.strftime("%Y-%m-%dT%H:%M:%S%z"))
164
+ current_datetime_str = current_datetime.strftime("%Y-%m-%d")
165
 
166
+ # nuclear_unav = mongo_unavs.copy()[(mongo_unavs.copy()["production_type"] == "NUCLEAR") & (mongo_unavs.copy()["updated_date"] <= past_date_str)]
167
+ # print(past_date_str)
168
+ # Sort by updated date
169
+ sorted_df = mongo_unavs.copy().sort_values(by='updated_date')
170
 
171
+ sorted_df = sorted_df.copy().reset_index(drop=True)
 
 
172
 
173
+ # cruas_2 = sorted_df.copy()[(sorted_df.copy()["name"] == "ST ALBAN 2") & (sorted_df.copy()["end_date"] >= start_date_str)]
174
+ # print(cruas_2[['updated_date', 'end_date', 'available_capacity']])
 
 
 
 
175
 
176
+ # Filter to get identifiers
177
+ filtered_id_df = sorted_df.copy()
178
 
179
+ # I commented this out
180
+ filtered_id_df = filtered_id_df.drop_duplicates(subset='identifier', keep='last')
181
 
182
+ # cruas_2 = filtered_id_df.copy()[(filtered_id_df.copy()["name"] == "ST ALBAN 2") & (filtered_id_df.copy()["end_date"] >= start_date_str)]
183
+ # print(cruas_2[['updated_date', 'end_date', 'available_capacity']])
184
 
185
+ filtered_id_df = filtered_id_df.copy().reset_index(drop=True)
 
 
186
 
187
+ filtered_df = filtered_id_df[
188
+ (filtered_id_df["production_type"] == "NUCLEAR") &
189
+ # (mongo_unavs["updated_date"] <= past_date_str) &
190
+ (filtered_id_df["status"] != "DISMISSED")]
191
 
192
+ # if photo_date == True:
193
+ # nuclear_unav = merged_df.copy()[(merged_df.copy()["production_type"] == "NUCLEAR") & (merged_df.copy()["updated_date"] <= past_date_str)]
194
+ # photo_date = True
195
+ # else: # need to add updated_date as a conditional to get the newest for that day
196
+ # nuclear_unav = merged_df.copy()[(merged_df.copy()["production_type"] == "NUCLEAR") & (merged_df.copy()["updated_date"] <= end_date_str)]
197
 
198
+ # --------------------------- HERE IS THE CHANGE TO GET ONLY ACTIVE OR ACTIVE AND INACTIVE --------------------------- #
199
 
200
+ # --------------------- SECOND DATA CLEANING ------------------------ #
201
+ # This filter should take only the most recent id and discard the rest
202
 
 
 
 
 
 
 
203
 
 
 
204
 
205
+ # This filter should take all the dates with unavs that include days with unavs in the range of the start and end date
 
206
 
 
 
 
207
 
208
+ # This filter might take out the most recent identifiers (Message ID) that change the dates of unavailability of a plant.
209
+ # This means that the actual unavailability is something else
210
+ # filtered_df = filtered_id_df.copy()[(filtered_id_df.copy()['start_date'] <= end_date_str) & (filtered_id_df.copy()['end_date'] >= start_date_str)]
211
+
212
+ # Need to eventually do a filter that takes the most restrictive updated identifier instead of the most recent when there
213
+ # is an overlap
214
+
215
+ # Update available_capacity where the condition is True
216
+
217
+ # Standardize datetime in dataframe
218
+ filtered_df2 = filtered_df.copy() # This code will just standardize datetime stuff
219
+ filtered_df2['creation_date'] = pd.to_datetime(filtered_df2['creation_date'], utc=True)
220
+ filtered_df2['updated_date'] = pd.to_datetime(filtered_df2['updated_date'], utc=True)
221
+ filtered_df2['start_date'] = pd.to_datetime(filtered_df2['start_date'], utc=True)
222
+ filtered_df2['end_date'] = pd.to_datetime(filtered_df2['end_date'], utc=True)
223
+
224
+ # Drop the duplicates
225
+ filtered_df3 = filtered_df2.copy().drop_duplicates()
226
+
227
+ # start_date_datetime = pd.to_datetime(start_date_str, utc=True) # Remove timezone info
228
+ start_date_datetime = pd.Timestamp(start_date_str, tz='UTC')
229
+ # end_date_datetime = pd.to_datetime(end_date_str, utc=True)
230
+ end_date_datetime = pd.Timestamp(end_date_str, tz='UTC')
231
+
232
+ # Turn df into dict for json processing
233
+ filtered_unavs = filtered_df3.copy().to_dict(orient='records')
234
+
235
+ results = {}
236
+
237
+ for unav in filtered_unavs:
238
+ plant_name = unav['name']
239
+ if plant_name in results:
240
+ # If the key is already in the dictionary, append unavailability to the list
241
+ results[plant_name].append({'status': unav['status'],
242
+ 'id': unav['message_id'],
243
+ 'creation_date': unav['creation_date'],
244
+ 'updated_date': unav['updated_date'],
245
+ 'start_date': unav['start_date'],
246
+ 'end_date': unav['end_date'],
247
+ 'available_capacity': unav['available_capacity']})
248
+ else:
249
+ # if the key of the plant is not there yet, create a new element of the dictionary
250
+
251
+ # Get message_id instead of identifier, easier to identify stuff with it
252
+ results[plant_name] = [{'status': unav['status'],
253
+ 'id': unav['message_id'],
254
+ 'creation_date': unav['creation_date'],
255
+ 'updated_date': unav['updated_date'],
256
+ 'start_date': unav['start_date'],
257
+ 'end_date': unav['end_date'],
258
+ 'available_capacity': unav['available_capacity']}]
259
+
260
+ # Custom encoder to handle datetime objects
261
+ class DateTimeEncoder(json.JSONEncoder):
262
+ def default(self, o):
263
+ if isinstance(o, datetime.datetime):
264
+ return o.isoformat()
265
+ return super().default(o)
266
+
267
+ results_holder = results
268
+
269
+ # Create new dict with each plant only having start_date less than user_end_date and an end_date greater than user_start_date
270
+ # should just be doing the same as above in the df for filtering only dates that inclued the start and end date
271
+ start_date = start_date_datetime.date()
272
+ end_date = end_date_datetime.date()
273
+ results_filtered = results_holder
274
+ for key, value in results_filtered.items():
275
+ filtered_values = []
276
+ for item in value:
277
+ item_start_date = item['start_date'].date()
278
+ item_end_date = item['end_date'].date()
279
+ identifier = item['id']
280
+ if item_start_date < end_date and item_end_date > start_date and identifier not in filtered_values:
281
+ filtered_values.append(item)
282
+ results_filtered[key] = filtered_values
283
+
284
+
285
+ sorted_results = results_filtered
286
+ # --------------------- SECOND DATA CLEANING ------------------------ #
287
+
288
+ # --------------------------- HERE IS THE FINAL PROCESS --------------------------- #
289
+
290
+ for key, value in sorted_results.items():
291
+ sorted_results[key] = sorted(value, key=lambda x: x['updated_date'])
292
+
293
+ results_sorted = sorted_results
294
+
295
+ dates_of_interest = [start_date] # We are creating a list of dates ranging from user specified start and end dates
296
+ date_plus_one = start_date
297
+
298
+ while date_plus_one < end_date:
299
+ date_plus_one = date_plus_one + datetime.timedelta(days=1)
300
+ dates_of_interest.append(date_plus_one)
301
+
302
+ # This is to standardize the datetimes. Without this, the datetime calculations for each power plant will not work
303
+ # This is just getting the plant metadata and giving it updated_date????? With an amount of items based on the length of the
304
+ # date range????
305
+ results_plants = {plant_name: {date: {"available_capacity": power, "updated_date": pd.to_datetime("1970-01-01", utc=True)} for date in dates_of_interest}
306
+ for plant_name, power in plants_metadata.items()}
307
+
308
+ # print(results_sorted)
309
+ for plant, unavailabilities in results_sorted.items():
310
+ # Get the full power of a given plant according to the sorted results
311
+ original_power = plants_metadata[plant]
312
+ # Get all the unavailabilities scheduled for the plant.
313
+ # This is actually apparently just getting the metadata though???
314
+ results_current_plant = results_plants[plant]
315
 
316
+ for unavailability in unavailabilities:
317
+ # For each unavailability, the resulting power, start and end datetime are collected. Need to collect updated_date
318
+ power_unavailability = unavailability["available_capacity"]
319
+ updated_date_unav = unavailability["updated_date"]
320
+ # The date comes as a string
321
+ start_datetime_unav = unavailability["start_date"]
322
+ end_datetime_unav = unavailability["end_date"]
323
+ start_date_unav = start_datetime_unav.date() # Extract date part
324
+ end_date_unav = end_datetime_unav.date() # Extract date part
325
+
326
+ # For the current unavailability, we want to find which days it affects
327
+ for day in dates_of_interest:
328
+
329
+ start_hour = start_datetime_unav.hour
330
+ start_minute = start_datetime_unav.minute
331
+ end_hour = end_datetime_unav.hour
332
+ end_minute = end_datetime_unav.minute
333
+
334
+ if start_date_unav <= day <= end_date_unav:
335
+ # Check if the day is already updated with a later update_date
336
+
337
+ if day in results_current_plant and updated_date_unav <= results_current_plant[day]["updated_date"]:
338
+ # Here is likely where we can do the filter for worst case scenario
339
+ # --------------------------- !!!!!!CREATE NEW FILTER THAT KEEPS ONLY MOST RESTRICTIVE OVERLAP!!!!!! --------------------------- #
340
+ # if power_unavailability < results_current_plant[day]['available_capacity']:
341
+
342
+ # # Calculate the % of the day that the plant is under maintenance
343
+ # if start_date_unav == day and day == end_date_unav:
344
+ # # The unavailability starts and ends on the same day
345
+ # percentage_of_day = (end_hour * 60 + end_minute - start_hour * 60 - start_minute) / (24 * 60)
346
+ # elif start_date_unav == day:
347
+ # # The unavailability starts on the current day but ends on a later day
348
+ # percentage_of_day = (24 * 60 - (start_hour * 60 + start_minute)) / (24 * 60)
349
+ # elif day == end_date_unav:
350
+ # # The unavailability starts on a previous day and ends on the current day
351
+ # percentage_of_day = (end_hour * 60 + end_minute) / (24 * 60)
352
+ # else:
353
+ # # The unavailability covers the entire day
354
+ # percentage_of_day = 1
355
+
356
+ # --------------------------- !!!!!!CREATE NEW FILTER THAT KEEPS ONLY MOST RESTRICTIVE OVERLAP!!!!!! --------------------------- #
357
+ # else:
358
+
359
+ continue # Skip to the next loop if there is already information for a later update_date
360
+
361
+ # Calculate the % of the day that the plant is under maintenance
362
+ if start_date_unav == day and day == end_date_unav:
363
+ # The unavailability starts and ends on the same day
364
+ percentage_of_day = (end_hour * 60 + end_minute - start_hour * 60 - start_minute) / (24 * 60)
365
+ elif start_date_unav == day:
366
+ # The unavailability starts on the current day but ends on a later day
367
+ percentage_of_day = (24 * 60 - (start_hour * 60 + start_minute)) / (24 * 60)
368
+ elif day == end_date_unav:
369
+ # The unavailability starts on a previous day and ends on the current day
370
+ percentage_of_day = (end_hour * 60 + end_minute) / (24 * 60)
371
+ else:
372
+ # The unavailability covers the entire day
373
+ percentage_of_day = 1
374
+
375
+ # The average power of the day is calculated
376
+ power_of_day = percentage_of_day * power_unavailability + (1 - percentage_of_day) * original_power
377
+
378
+ # Update the available_capacity for the day only if it's not already updated with a later update_date
379
+ if (day not in results_current_plant):
380
+ results_current_plant[day] = {"available_capacity": power_of_day, "updated_date": updated_date_unav}
381
+
382
+ elif (day in results_current_plant) and (updated_date_unav > results_current_plant[day]["updated_date"]) and (power_of_day < results_current_plant[day]['available_capacity']):
383
+ results_current_plant[day] = {"available_capacity": power_of_day, "updated_date": updated_date_unav}
384
+
385
+ else:
386
+ continue
387
 
 
388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
  output_results = {}
391
  for plant, plant_data in results_plants.items():
392
  available_capacity_per_day = {str(date): data["available_capacity"] for date, data in plant_data.items()}
393
  output_results[plant] = available_capacity_per_day
394
 
395
+ # print(output_results)
396
  add_total(output_results)
397
+ # print("Done")
398
+ # print(results_plants)
399
+ # Convert datetime key to string to store in mongodb
400
  output_results = {plant: {str(date): power for date, power in plant_data.items()} for plant, plant_data in output_results.items()}
401
  output_results = pd.DataFrame(output_results)
402
+ print(output_results)
403
 
404
  # -------------------------------------------------
405
  # Calculate the average of each column excluding the last row
 
407
 
408
  # Replace the last row with the calculated averages
409
  output_results.iloc[-1, :] = averages
410
+
411
  output_results = output_results.to_dict()
412
 
413
  def turn_total_row_to_avg(data):
 
418
 
419
  turn_total_row_to_avg(output_results)
420
 
421
+ # print(output_results)
422
+
423
  json_data = json.dumps(output_results)
424
  # print(json_data)
425
  return json_data
 
537
  df_photo_date_2.index = pd.to_datetime(df_photo_date_2.index)
538
 
539
  # Calculate monthly averages with date in yyyy-mm format
540
+ monthly_average_nucmonitor = df_nucmonitor_2.resample('M').mean()
541
  monthly_average_nucmonitor.index = monthly_average_nucmonitor.index.strftime('%Y-%m')
542
 
543
+ monthly_average_photo_date = df_photo_date_2.resample('M').mean()
544
  monthly_average_photo_date.index = monthly_average_photo_date.index.strftime('%Y-%m')
545
 
546