Diego Marroquin commited on
Commit
b63a760
·
1 Parent(s): e6ba92b

Just throwing everything into the streamlit fuckit

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +516 -7
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ /app_with_api.py
app.py CHANGED
@@ -4,6 +4,506 @@ import pandas as pd
4
  import json
5
  import io
6
  import datetime
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  st.title("Nucmonitor App")
9
 
@@ -17,14 +517,23 @@ if photo_date == True:
17
  else:
18
  past_date = None
19
 
 
 
 
 
 
 
 
 
 
 
20
  @st.cache_data
21
  def get_nucmonitor_data(start_date, end_date, photo_date, past_date):
22
- response_nucmonitor = requests.get(f"https://dmarr-nucpy-api.hf.space/nucpy/v1/nucmonitor?start_date={start_date}&end_date={end_date}&photo_date={photo_date}&past_date={past_date}")
23
- # response_nucmonitor = requests.get(f"http://127.0.0.1:5000/nucpy/v1/nucmonitor?start_date={start_date}&end_date={end_date}&photo_date={photo_date}&past_date={past_date}")
24
-
25
- nucmonitor_data = response_nucmonitor.json()
26
- nucmonitor_json = json.loads(nucmonitor_data)
27
- df = pd.DataFrame(nucmonitor_json)
28
  return df
29
 
30
  with st.form("nucmonitor_form"):
@@ -96,4 +605,4 @@ if submitted:
96
  data=excel_buffer,
97
  file_name=f"nucmonitor_data_{current_year}-{current_month}-{current_day}-h{current_hour}m{current_minute}s{current_second}.xlsx",
98
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
99
- )
 
4
  import json
5
  import io
6
  import datetime
7
+ import pandas as pd
8
+ import numpy as np
9
+ from flask import Flask, jsonify, request
10
+ from flask_restx import Api, Resource, Namespace
11
+ # from flask_httpauth import HTTPBasicAuth
12
+ import requests
13
+ import base64
14
+ import json
15
+ import datetime
16
+ from calendar import monthrange
17
+ import pymongo
18
+ from mongoengine import StringField, ListField, DateTimeField, DictField
19
+
20
+ def mongo_unavs_call(user_input_start_date, user_input_end_date, user_input_photo_date, user_input_past_date):
21
+ print("Starting mongo_unavs_call")
22
+ # Connect to the MongoDB database
23
+ user = "dmarroquin"
24
+ passw = "tN9XpCCQM2MtYDme"
25
+ host = "nucmonitordata.xxcwx9k.mongodb.net"
26
+ client = pymongo.MongoClient(
27
+ f"mongodb+srv://{user}:{passw}@{host}/?retryWrites=true&w=majority"
28
+ )
29
+
30
+ db = client["data"]
31
+ collection = db["unavs"]
32
+
33
+ start_date = f"{user_input_start_date}T00:00:00"
34
+ end_date = f"{user_input_end_date}T23:59:59"
35
+
36
+ pipeline = [
37
+ {
38
+ "$unwind": "$results"
39
+ },
40
+ {
41
+ "$unwind": "$results.generation_unavailabilities"
42
+ },
43
+ {
44
+ "$match": {
45
+ "results.generation_unavailabilities.production_type": "NUCLEAR",
46
+ "results.generation_unavailabilities.start_date": {"$lte": end_date},
47
+ "results.generation_unavailabilities.end_date": {"$gte": start_date},
48
+ "results.generation_unavailabilities.updated_date": {"$lte": end_date}
49
+ }
50
+ },
51
+ {
52
+ "$project": {
53
+ "_id": 0,
54
+ "generation_unavailabilities": "$results.generation_unavailabilities"
55
+ }
56
+ }
57
+ ]
58
+
59
+ result = collection.aggregate(pipeline)
60
+
61
+ return list(result)
62
+
63
+ # --------------------------------------------------------------------------------------- #
64
+
65
+ # Convert the dictionary of dictionaries to JSON
66
+ def convert_to_json(item):
67
+ if isinstance(item, dict):
68
+ return {str(k): convert_to_json(v) for k, v in item.items()}
69
+ elif isinstance(item, list):
70
+ return [convert_to_json(i) for i in item]
71
+ elif isinstance(item, ObjectId):
72
+ return str(item)
73
+ else:
74
+ return item
75
+ # --------------------------------------------------------------------------------------- #
76
+
77
+ # Function gives the total of the data. When printed as dataframe/excel,
78
+ # Will give a final row with the total for each plant and the total overall
79
+ def add_total(data):
80
+ total_values = {}
81
+ for key in data:
82
+ daily_values = data[key]
83
+ total = sum(daily_values.values())
84
+ daily_values["Total"] = total
85
+ for date, value in daily_values.items():
86
+ if date not in total_values:
87
+ total_values[date] = value
88
+ else:
89
+ total_values[date] += value
90
+
91
+ data["Total"] = total_values
92
+
93
+ # --------------------------------------------------------------------------------------- #
94
+
95
+ # This file will simply connect to the rte and get the data directly from there
96
+
97
+ # Function to create an authentication token. This token is then used in the HTTP requests to the API for authentication.
98
+ # It is necessary to receive data from RTE.
99
+ def get_oauth():
100
+ # ID from the user. This is encoded to base64 and sent in an HTTP request to receive the oauth token.
101
+ # This ID is from my account (RMP). However, another account can be created in the RTE API portal and get another ID.
102
+ joined_ID = '057e2984-edb3-4706-984b-9ea0176e74db:dc9df9f7-9f91-4c7a-910c-15c4832fb7bc'
103
+ b64_ID = base64.b64encode(joined_ID.encode('utf-8'))
104
+ b64_ID_decoded = b64_ID.decode('utf-8')
105
+
106
+ # Headers for the HTTP request
107
+ headers = {'Content-Type': 'application/x-www-form-urlencoded',
108
+ 'Authorization': f'Basic {b64_ID_decoded}'}
109
+ api_url = 'https://digital.iservices.rte-france.com/token/oauth/'
110
+ # Call to the API and if successful, the response will be 200.
111
+ response = requests.post(api_url, headers=headers)
112
+
113
+ # When positive response, the token is retrieved
114
+ data = response.json()
115
+ oauth = data['access_token']
116
+
117
+ return(oauth)
118
+
119
+ # --------------------------------------------------------------------------------------- #
120
+
121
+ # This function does severall calls to the RTE API (because maximum time between start_date and end_date is 1 month)
122
+ # the argument past_photo is a boolean (True, False) that indicates if we want to make a photo from the past or not
123
+ # However, the past_photo part and past_date is not yet implemented.
124
+ def get_unavailabilities(usr_start_date, usr_end_date):
125
+ oauth = get_oauth()
126
+ print("Get Oauth done")
127
+ date_type = 'APPLICATION_DATE'
128
+
129
+ # Current year/month/day/hour/minute/second is calculated for the last call to the API. For instance, if today is 05/05/2023,
130
+ # the last call of the API will be from 01/05/2023 to 05/05/2023 (+current hour,minute,second).
131
+ current_datetime = datetime.datetime.now()
132
+ current_year = current_datetime.strftime('%Y')
133
+ current_month = current_datetime.strftime('%m')
134
+ current_day = current_datetime.strftime('%d')
135
+ current_hour = current_datetime.strftime('%H')
136
+ current_minute = current_datetime.strftime('%M')
137
+ current_second = current_datetime.strftime('%S')
138
+
139
+ # Headers for the HTTP request
140
+ headers = {'Host': 'digital.iservices.rte-france.com',
141
+ 'Authorization': f'Bearer {oauth}'
142
+ }
143
+
144
+ # the responses object is where we are going to store all the responses from the API.
145
+ # Initially, current_datetime is included to know when we have called the API and all the
146
+ # individual results of the API (because each call is Maz 1 month) are stored in responses["results"]
147
+ responses = {"current_datetime": current_datetime.strftime("%m/%d/%Y, %H:%M:%S"),
148
+ "results":[]
149
+ }
150
+
151
+ # --------------------------- HERE HAVE TO GET THE RANGE OF DATES FROM START AND END AND PUT THEM INTO LIST --------------------------- #
152
+ # Convert start_date and end_date to datetime objects
153
+ usr_start_date = str(usr_start_date)
154
+ usr_end_date = str(usr_end_date)
155
+ start_date_obj = datetime.datetime.strptime(usr_start_date, "%Y-%m-%d").date()
156
+ end_date_obj = datetime.datetime.strptime(usr_end_date, "%Y-%m-%d").date()
157
+ # start_date_obj = usr_start_date
158
+ # end_date_obj = usr_end_date
159
+ # Initialize lists to store years and months
160
+ years = []
161
+ months = []
162
+
163
+ # Generate the range of years and months
164
+ current_date = start_date_obj
165
+ while current_date <= end_date_obj:
166
+ years.append(current_date.year)
167
+ months.append(current_date.month)
168
+ current_date += datetime.timedelta(days=1)
169
+
170
+ # Remove duplicates from the lists
171
+ years = list(set(years))
172
+ months = list(set(months))
173
+ years.sort()
174
+ months.sort()
175
+ print(years)
176
+ print(months)
177
+ # --------------------------- HERE HAVE TO GET THE RANGE OF DATES FROM START AND END AND PUT THEM INTO LIST --------------------------- #
178
+
179
+ # Loop to call the API all the necessary times.
180
+ for i in range(len(years)):
181
+ for j in range(len(months)):
182
+ # start_year and start_month of the current call to the API
183
+ start_year = years[i]
184
+ start_month = months[j]
185
+ # start_date is constructed. Now we only need to construct the end_date.
186
+ start_date = f'{start_year}-{start_month}-01T00:00:00%2B02:00'
187
+
188
+ if True:
189
+ # Calculate the number of days in the current month
190
+ _, num_days = monthrange(int(start_year), int(start_month))
191
+ end_date = f'{start_year}-{start_month}-{num_days}T23:59:59%2B02:00'
192
+
193
+ print(f'start date is {start_date}')
194
+ print(f'end date is {end_date}')
195
+
196
+ # Call to the API
197
+ api_url = f'https://digital.iservices.rte-france.com/open_api/unavailability_additional_information/v4/generation_unavailabilities?date_type={date_type}&start_date={start_date}&end_date={end_date}'
198
+
199
+ response = requests.get(api_url, headers=headers)
200
+ json_response = response.json()
201
+ responses["results"].append(json_response)
202
+ # print(responses)
203
+ return responses
204
+
205
+ # --------------------------------------------------------------------------------------- #
206
+
207
+
208
+ def nuc_monitor(usr_start_date, usr_end_date, photo_date, past_date):
209
+ # # Slightly changed metadata to fit the data from the RTE API: ST-LAURENT B 2 --> ST LAURENT 2, ....
210
+
211
+ plants_metadata = {"BELLEVILLE 1": 1310.0, "BELLEVILLE 2": 1310.0, "BLAYAIS 1": 910.0, "BLAYAIS 2": 910.0,
212
+ "BLAYAIS 3": 910.0, "BLAYAIS 4": 910.0, "BUGEY 2": 910.0, "BUGEY 3": 910.0, "BUGEY 4": 880.0,
213
+ "BUGEY 5": 880.0, "CATTENOM 1": 1300.0, "CATTENOM 2": 1300.0, "CATTENOM 3": 1300.0,
214
+ "CATTENOM 4": 1300.0, "CHINON 1": 905.0, "CHINON 2": 905.0, "CHINON 3": 905.0,
215
+ "CHINON 4": 905.0, "CHOOZ 1": 1500.0, "CHOOZ 2": 1500.0, "CIVAUX 1": 1495.0,
216
+ "CIVAUX 2": 1495.0, "CRUAS 1": 915.0, "CRUAS 2": 915.0, "CRUAS 3": 915.0, "CRUAS 4": 915.0,
217
+ "DAMPIERRE 1": 890.0, "DAMPIERRE 2": 890.0, "DAMPIERRE 3": 890.0, "DAMPIERRE 4": 890.0,
218
+ "FLAMANVILLE 1": 1330.0, "FLAMANVILLE 2": 1330.0, "GOLFECH 1": 1310.0, "GOLFECH 2": 1310.0,
219
+ "GRAVELINES 1": 910.0, "GRAVELINES 2": 910.0, "GRAVELINES 3": 910.0, "GRAVELINES 4": 910.0,
220
+ "GRAVELINES 5": 910.0, "GRAVELINES 6": 910.0, "NOGENT 1": 1310.0, "NOGENT 2": 1310.0,
221
+ "PALUEL 1": 1330.0, "PALUEL 2": 1330.0, "PALUEL 3": 1330.0, "PALUEL 4": 1330.0, "PENLY 1": 1330.0,
222
+ "PENLY 2": 1330.0, "ST ALBAN 1": 1335.0, "ST ALBAN 2": 1335.0, "ST LAURENT 1": 915.0,
223
+ "ST LAURENT 2": 915.0, "TRICASTIN 1": 915.0, "TRICASTIN 2": 915.0, "TRICASTIN 3": 915.0,
224
+ "TRICASTIN 4": 915.0, "FESSENHEIM 1": 880.0, "FESSENHEIM 2": 880.0}
225
+
226
+ # --------------------- INITIAL DATA CLEANING FOR RTE DATA ------------------------ #
227
+ # unav_API = rte_data.json()
228
+ rte_stuff = get_unavailabilities(usr_start_date, usr_end_date)
229
+ unav_API = rte_stuff
230
+ # print(unav_API)
231
+ # Store the unavailabilities in a list
232
+ unavailabilities = []
233
+ print("Unav")
234
+ for unavailabilities_API in unav_API['results']:
235
+ try:
236
+ unavailabilities.extend(unavailabilities_API.get('generation_unavailabilities', []))
237
+ except:
238
+ print('There was an error')
239
+ # print(unavailabilities_API)
240
+ rte_df = pd.DataFrame(unavailabilities)
241
+
242
+
243
+ def unpack_values(row):
244
+ if isinstance(row["values"], list):
245
+ for key, value in row["values"][0].items():
246
+ row[key] = value
247
+ return row
248
+ # Apply the function to each row in the DataFrame
249
+ rte_df = rte_df.apply(unpack_values, axis=1)
250
+
251
+ # Drop the original "values" column
252
+ rte_df.drop("values", axis=1, inplace=True)
253
+
254
+ # Unpack the unit column
255
+ rte_df2 = pd.concat([rte_df, pd.json_normalize(rte_df['unit'])], axis=1)
256
+ rte_df2.drop('unit', axis=1, inplace=True)
257
+
258
+
259
+ rte_nuclear_unav = rte_df2[(rte_df2["production_type"] == "NUCLEAR")]
260
+
261
+ # --------------------- INITIAL DATA CLEANING FOR RTE DATA ------------------------ #
262
+
263
+
264
+ # --------------------- INITIAL DATA CLEANING FOR MONGO DATA ------------------------ #
265
+
266
+ # # Create a DataFrame
267
+ mongo_data = mongo_unavs_call(usr_start_date, usr_end_date, photo_date, past_date)
268
+ mongo_df = pd.DataFrame(mongo_data)
269
+
270
+ # Unpack the dictionaries into separate columns
271
+ mongo_df_unpacked = pd.json_normalize(mongo_df['generation_unavailabilities'])
272
+
273
+ # Concatenate the unpacked columns with the original DataFrame
274
+ mongo_df_result = pd.concat([mongo_df, mongo_df_unpacked], axis=1)
275
+
276
+ # Drop the original column
277
+ mongo_df_result.drop(columns=['generation_unavailabilities'], inplace=True)
278
+ mongo_df_columns = mongo_df_result.columns
279
+
280
+ mongo_df_result['start_date'] = mongo_df_result['values'].apply(lambda x: x[0]['start_date'])
281
+ mongo_df_result['end_date'] = mongo_df_result['values'].apply(lambda x: x[0]['end_date'])
282
+ mongo_df_result['available_capacity'] = mongo_df_result['values'].apply(lambda x: x[0]['available_capacity'])
283
+ mongo_df_result['unavailable_capacity'] = mongo_df_result['values'].apply(lambda x: x[0]['unavailable_capacity'])
284
+ # print(mongo_df_result)
285
+ # print(mongo_df_result.columns)
286
+ # Drop the original 'values' column
287
+ mongo_df_result.drop('values', axis=1, inplace=True)
288
+ mongo_df2 = mongo_df_result
289
+ mongo_df2.rename(columns=lambda col: col.replace('unit.', ''), inplace=True)
290
+
291
+
292
+
293
+ # --------------------- INITIAL DATA CLEANING FOR MONGO DATA ------------------------ #
294
+
295
+ # Make the two dataframes have the same columns
296
+ mongo_unavs = mongo_df2.copy()
297
+ mongo_unavs.drop(columns="type", inplace=True)
298
+
299
+ rte_unavs = rte_nuclear_unav.copy()
300
+ rte_unavs.drop(columns="type", inplace=True)
301
+
302
+ # Merge dataframes
303
+ column_order = mongo_unavs.columns
304
+ # print(column_order)
305
+ merged_df = pd.concat([mongo_unavs[column_order], rte_unavs[column_order]], ignore_index=True)
306
+ # merged_df['updated_date'] = merged_df['updated_date'].astype(str)
307
+
308
+ # --------------------------- HERE IS THE CHANGE TO GET ONLY ACTIVE OR ACTIVE AND INACTIVE --------------------------- #
309
+ # start_date_str = usr_start_date.strftime("%Y-%m-%d")
310
+ start_date_str = str(usr_start_date)
311
+ # end_date_str = usr_end_date.strftime("%Y-%m-%d")
312
+ end_date_str = str(usr_end_date)
313
+ current_datetime = datetime.datetime.now()
314
+ current_datetime_str = current_datetime.strftime("%Y-%m-%d")
315
+
316
+ if photo_date == True:
317
+ nuclear_unav = merged_df.copy()[(merged_df.copy()["production_type"] == "NUCLEAR") & (merged_df.copy()["updated_date"] <= past_date)]
318
+ photo_date = True
319
+ else: # need to add updated_date as a conditional to get the newest for that day
320
+ nuclear_unav = merged_df.copy()[(merged_df.copy()["production_type"] == "NUCLEAR") & (merged_df.copy()["updated_date"] <= end_date_str)]
321
+
322
+ # --------------------------- HERE IS THE CHANGE TO GET ONLY ACTIVE OR ACTIVE AND INACTIVE --------------------------- #
323
+
324
+ # --------------------- SECOND DATA CLEANING ------------------------ #
325
+ # This filter should take only the most recent id and discard the rest
326
+
327
+ # Sort by updated date
328
+ sorted_df = nuclear_unav.copy().sort_values(by='updated_date')
329
+
330
+ sorted_df = sorted_df.copy().reset_index(drop=True)
331
+
332
+ # Filter to get identifiers
333
+ filtered_id_df = sorted_df.copy()
334
+ filtered_id_df.drop_duplicates(subset='identifier', keep='last', inplace=True)
335
+ filtered_id_df = filtered_id_df.copy().reset_index(drop=True)
336
+
337
+ # This filter should take all the dates with unavs that include days with unavs in the range of the start and end date
338
+
339
+ filtered_df = filtered_id_df.copy()[(filtered_id_df.copy()['start_date'] <= end_date_str) & (filtered_id_df.copy()['end_date'] >= start_date_str)]
340
+
341
+ # Standardize datetime in dataframe
342
+ filtered_df2 = filtered_df.copy() # This code will just standardize datetime stuff
343
+ filtered_df2['creation_date'] = pd.to_datetime(filtered_df2['creation_date'], utc=True)
344
+ filtered_df2['updated_date'] = pd.to_datetime(filtered_df2['updated_date'], utc=True)
345
+ filtered_df2['start_date'] = pd.to_datetime(filtered_df2['start_date'], utc=True)
346
+ filtered_df2['end_date'] = pd.to_datetime(filtered_df2['end_date'], utc=True)
347
+
348
+ # Drop the duplicates
349
+ filtered_df3 = filtered_df2.copy().drop_duplicates()
350
+
351
+ # start_date_datetime = pd.to_datetime(start_date_str, utc=True) # Remove timezone info
352
+ start_date_datetime = pd.Timestamp(start_date_str, tz='UTC')
353
+ # end_date_datetime = pd.to_datetime(end_date_str, utc=True)
354
+ end_date_datetime = pd.Timestamp(end_date_str, tz='UTC')
355
+
356
+ # Turn df into dict for json processing
357
+ filtered_unavs = filtered_df3.copy().to_dict(orient='records')
358
+
359
+ results = {}
360
+
361
+ for unav in filtered_unavs:
362
+ plant_name = unav['name']
363
+ if plant_name in results:
364
+ # If the key is already in the dictionary, append unavailability to the list
365
+ results[plant_name].append({'status': unav['status'],
366
+ 'id': unav['message_id'],
367
+ 'creation_date': unav['creation_date'],
368
+ 'updated_date': unav['updated_date'],
369
+ 'start_date': unav['start_date'],
370
+ 'end_date': unav['end_date'],
371
+ 'available_capacity': unav['available_capacity']})
372
+ else:
373
+ # if the key of the plant is not there yet, create a new element of the dictionary
374
+
375
+ # Get message_id instead of identifier, easier to identify stuff with it
376
+ results[plant_name] = [{'status': unav['status'],
377
+ 'id': unav['message_id'],
378
+ 'creation_date': unav['creation_date'],
379
+ 'updated_date': unav['updated_date'],
380
+ 'start_date': unav['start_date'],
381
+ 'end_date': unav['end_date'],
382
+ 'available_capacity': unav['available_capacity']}]
383
+
384
+ # Custom encoder to handle datetime objects
385
+ class DateTimeEncoder(json.JSONEncoder):
386
+ def default(self, o):
387
+ if isinstance(o, datetime.datetime):
388
+ return o.isoformat()
389
+ return super().default(o)
390
+
391
+ results_holder = results
392
+
393
+ # Create new dict with each plant only having start_date less than user_end_date and an end_date greater than user_start_date
394
+ # should just be doing the same as above in the df for filtering only dates that inclued the start and end date
395
+ start_date = start_date_datetime.date()
396
+ end_date = end_date_datetime.date()
397
+ results_filtered = results_holder
398
+ for key, value in results_filtered.items():
399
+ filtered_values = []
400
+ for item in value:
401
+ item_start_date = item['start_date'].date()
402
+ item_end_date = item['end_date'].date()
403
+ identifier = item['id']
404
+ if item_start_date < end_date and item_end_date > start_date and identifier not in filtered_values:
405
+ filtered_values.append(item)
406
+ results_filtered[key] = filtered_values
407
+
408
+
409
+ sorted_results = results_filtered
410
+ # --------------------- SECOND DATA CLEANING ------------------------ #
411
+
412
+ # --------------------------- HERE IS THE FINAL PROCESS --------------------------- #
413
+
414
+ for key, value in sorted_results.items():
415
+ sorted_results[key] = sorted(value, key=lambda x: x['updated_date'])
416
+
417
+ results_sorted = sorted_results
418
+
419
+ dates_of_interest = [start_date] # We are creating a list of dates ranging from user specified start and end dates
420
+ date_plus_one = start_date
421
+
422
+ while date_plus_one < end_date:
423
+ date_plus_one = date_plus_one + datetime.timedelta(days=1)
424
+ dates_of_interest.append(date_plus_one)
425
+
426
+ # This is to standardize the datetimes. Without this, the datetime calculations for each power plant will not work
427
+ results_plants = {plant_name: {date: {"available_capacity": power, "updated_date": pd.to_datetime("1970-01-01", utc=True)} for date in dates_of_interest}
428
+ for plant_name, power in plants_metadata.items()}
429
+
430
+
431
+ for plant, unavailabilities in results_sorted.items():
432
+
433
+ original_power = plants_metadata[plant]
434
+ # Get all the unavailabilities scheduled for the plant.
435
+ results_current_plant = results_plants[plant]
436
+
437
+ for unavailability in unavailabilities:
438
+ # For each unavailability, the resulting power, start and end datetime are collected. Need to collect updated_date
439
+ power_unavailability = unavailability["available_capacity"]
440
+ updated_date_unav = unavailability["updated_date"]
441
+ # The date comes as a string
442
+ start_datetime_unav = unavailability["start_date"]
443
+ end_datetime_unav = unavailability["end_date"]
444
+ start_date_unav = start_datetime_unav.date() # Extract date part
445
+ end_date_unav = end_datetime_unav.date() # Extract date part
446
+
447
+ # For the current unavailability, we want to find which days it affects
448
+ for day in dates_of_interest:
449
+
450
+ start_hour = start_datetime_unav.hour
451
+ start_minute = start_datetime_unav.minute
452
+ end_hour = end_datetime_unav.hour
453
+ end_minute = end_datetime_unav.minute
454
+
455
+ if start_date_unav <= day <= end_date_unav:
456
+ # Check if the day is already updated with a later update_date
457
+ if day in results_current_plant and updated_date_unav <= results_current_plant[day]["updated_date"]:
458
+ continue # Skip to the next loop if there is already information for a later update_date
459
+
460
+ # Calculate the % of the day that the plant is under maintenance
461
+ if start_date_unav == day and day == end_date_unav:
462
+ # The unavailability starts and ends on the same day
463
+ percentage_of_day = (end_hour * 60 + end_minute - start_hour * 60 - start_minute) / (24 * 60)
464
+ elif start_date_unav == day:
465
+ # The unavailability starts on the current day but ends on a later day
466
+ percentage_of_day = (24 * 60 - (start_hour * 60 + start_minute)) / (24 * 60)
467
+ elif day == end_date_unav:
468
+ # The unavailability starts on a previous day and ends on the current day
469
+ percentage_of_day = (end_hour * 60 + end_minute) / (24 * 60)
470
+ else:
471
+ # The unavailability covers the entire day
472
+ percentage_of_day = 1
473
+
474
+ # The average power of the day is calculated
475
+ power_of_day = percentage_of_day * power_unavailability + (1 - percentage_of_day) * original_power
476
+
477
+ # Update the available_capacity for the day only if it's not already updated with a later update_date
478
+ if day not in results_current_plant or updated_date_unav > results_current_plant[day]["updated_date"]:
479
+ results_current_plant[day] = {"available_capacity": power_of_day, "updated_date": updated_date_unav}
480
+
481
+
482
+ output_results = {}
483
+ for plant, plant_data in results_plants.items():
484
+ available_capacity_per_day = {str(date): data["available_capacity"] for date, data in plant_data.items()}
485
+ output_results[plant] = available_capacity_per_day
486
+
487
+ # print(output_results)
488
+ add_total(output_results)
489
+ # print("Done")
490
+ # print(results_plants)
491
+ # Convert datetime key to string to store in mongodb
492
+ output_results = {plant: {str(date): power for date, power in plant_data.items()} for plant, plant_data in output_results.items()}
493
+ # print(output_results)
494
+ # -------------------------------------------------
495
+ if photo_date == False:
496
+
497
+ json_data = json.dumps(output_results)
498
+ # print(json_data)
499
+ return json_data
500
+ else:
501
+
502
+ json_data = json.dumps(output_results)
503
+ # print(json_data)
504
+ return json_data
505
+ # -------------------------------------------------
506
+ return
507
 
508
  st.title("Nucmonitor App")
509
 
 
517
  else:
518
  past_date = None
519
 
520
+ @st.cache_data
521
+ def get_rte_data(start_date, end_date):
522
+ rte_data = get_unavailabilities(start_date, end_date)
523
+ print(rte_data)
524
+ return rte_data
525
+ @st.cache_data
526
+ def get_mongodb_data(start_date, end_date, photo_date, past_date):
527
+ database_data = mongo_unavs_call(start_date, end_date, photo_date, past_date)
528
+ return database_data
529
+
530
  @st.cache_data
531
  def get_nucmonitor_data(start_date, end_date, photo_date, past_date):
532
+ response_nucmonitor = nuc_monitor(start_date, end_date, photo_date, past_date)
533
+ # nucmonitor_data = response_nucmonitor.json()
534
+ # nucmonitor_json = json.loads(nucmonitor_data)
535
+ print(response_nucmonitor)
536
+ df = pd.read_json(response_nucmonitor)
 
537
  return df
538
 
539
  with st.form("nucmonitor_form"):
 
605
  data=excel_buffer,
606
  file_name=f"nucmonitor_data_{current_year}-{current_month}-{current_day}-h{current_hour}m{current_minute}s{current_second}.xlsx",
607
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
608
+ )