Diego Marroquin commited on
Commit
66dc998
·
1 Parent(s): 4ce1950

Debugging

Browse files
Files changed (1) hide show
  1. main.py +656 -643
main.py CHANGED
@@ -1,679 +1,692 @@
1
- import pandas as pd
2
- import numpy as np
3
- from flask import Flask, jsonify, request
4
- from flask_restx import Api, Resource, Namespace
5
- # from flask_httpauth import HTTPBasicAuth
6
- import requests
7
- import base64
8
- import json
9
- import datetime
10
- from calendar import monthrange
11
- import pymongo
12
- from mongoengine import StringField, ListField, DateTimeField, DictField
13
-
14
- """
15
- This script creates an api that connects to the MongoDB database. This api will
16
- eventually allow connection between the database and the frontend
17
- """
18
- # Connect to MongoDB
19
- # For some reason none of this works when im connected to VPN
20
-
21
-
22
- app = Flask(__name__)
23
- api = Api(app, version='1.0',
24
- title='Haya Energy NucPy API',
25
- description="""
26
- API endpoints used to communicate NucPy
27
- with MongoDB
28
- """,
29
- contact="Diego",
30
- endpoint="/nucpy/v1")
31
-
32
-
33
- def mongo_unavs_call(user_input_start_date, user_input_end_date, user_input_photo_date, user_input_past_date):
34
- # Connect to the MongoDB database
35
- user = "dmarroquin"
36
- passw = "tN9XpCCQM2MtYDme"
37
- host = "nucmonitordata.xxcwx9k.mongodb.net"
38
- client = pymongo.MongoClient(
39
- f"mongodb+srv://{user}:{passw}@{host}/?retryWrites=true&w=majority"
40
- )
41
-
42
- db = client["data"]
43
- collection = db["unavs"]
44
-
45
- start_date = f"{user_input_start_date}T00:00:00"
46
- end_date = f"{user_input_end_date}T23:59:59"
47
 
48
- pipeline = [
49
- {
50
- "$unwind": "$results"
51
- },
52
- {
53
- "$unwind": "$results.generation_unavailabilities"
54
- },
55
- {
56
- "$match": {
57
- "results.generation_unavailabilities.production_type": "NUCLEAR",
58
- "results.generation_unavailabilities.start_date": {"$lte": end_date},
59
- "results.generation_unavailabilities.end_date": {"$gte": start_date},
60
- "results.generation_unavailabilities.updated_date": {"$lte": end_date}
61
- }
62
- },
63
- {
64
- "$project": {
65
- "_id": 0,
66
- "generation_unavailabilities": "$results.generation_unavailabilities"
67
- }
68
- }
69
- ]
70
-
71
- result = collection.aggregate(pipeline)
72
-
73
- return list(result)
74
-
75
- # --------------------------------------------------------------------------------------- #
76
-
77
- # Convert the dictionary of dictionaries to JSON
78
- def convert_to_json(item):
79
- if isinstance(item, dict):
80
- return {str(k): convert_to_json(v) for k, v in item.items()}
81
- elif isinstance(item, list):
82
- return [convert_to_json(i) for i in item]
83
- elif isinstance(item, ObjectId):
84
- return str(item)
85
- else:
86
- return item
87
- # --------------------------------------------------------------------------------------- #
88
-
89
- # The idea of this function is to sum the total availability for each day of interest
90
- # This is already done in the Excel so it might be useful to check
91
- # Function gives the total of the data. When printed as dataframe/excel,
92
- # Will give a final row with the total for each plant and the total overall
93
- def add_total(data):
94
- total_values = {}
95
- for key in data:
96
- daily_values = data[key]
97
- total = sum(daily_values.values())
98
- daily_values["Total"] = total
99
- for date, value in daily_values.items():
100
- if date not in total_values:
101
- total_values[date] = value
102
- else:
103
- total_values[date] += value
104
 
105
- data["Total"] = total_values
106
 
107
- # --------------------------------------------------------------------------------------- #
108
 
109
- # This file will simply connect to the rte and get the data directly from there
110
 
111
- # Function to create an authentication token. This token is then used in the HTTP requests to the API for authentication.
112
- # It is necessary to receive data from RTE.
113
- def get_oauth():
114
- # ID from the user. This is encoded to base64 and sent in an HTTP request to receive the oauth token.
115
- # This ID is from my account (RMP). However, another account can be created in the RTE API portal and get another ID.
116
- joined_ID = '057e2984-edb3-4706-984b-9ea0176e74db:dc9df9f7-9f91-4c7a-910c-15c4832fb7bc'
117
- b64_ID = base64.b64encode(joined_ID.encode('utf-8'))
118
- b64_ID_decoded = b64_ID.decode('utf-8')
119
 
120
- # Headers for the HTTP request
121
- headers = {'Content-Type': 'application/x-www-form-urlencoded',
122
- 'Authorization': f'Basic {b64_ID_decoded}'}
123
- api_url = 'https://digital.iservices.rte-france.com/token/oauth/'
124
- # Call to the API and if successful, the response will be 200.
125
- response = requests.post(api_url, headers=headers)
126
 
127
- # When positive response, the token is retrieved
128
- data = response.json()
129
- oauth = data['access_token']
130
 
131
- return(oauth)
132
-
133
- # --------------------------------------------------------------------------------------- #
134
-
135
- # This function does severall calls to the RTE API (because maximum time between start_date and end_date is 1 month)
136
- # the argument past_photo is a boolean (True, False) that indicates if we want to make a photo from the past or not
137
- # However, the past_photo part and past_date is not yet implemented.
138
- def get_unavailabilities(usr_start_date, usr_end_date):
139
- # This should be changed in the case of getting a past_photo because many of the rows that are relevant for that
140
- # past photo will not be ACTIVE anymore.
141
- # unav_status = ['ACTIVE', 'INACTIVE']
142
- # This could also be changed. Currently it means that if we call the API with start_date=01/01/2023 and end_date=01/02/2023,
143
- # it will return all the records of unavailabilities that have been updated between the two dates.
144
- # date_type = 'UPDATED_DATE'
145
- # date_type APPLICATION_DATE gets all unavailabilities with predictions in the defined dates, so that
146
- # we can get an unavailability that has updated_date outside the defined dates for start_date and end_date
147
- oauth = get_oauth()
148
- print("Get Oauth done")
149
- date_type = 'APPLICATION_DATE'
150
 
151
- # Current year/month/day/hour/minute/second is calculated for the last call to the API. For instance, if today is 05/05/2023,
152
- # the last call of the API will be from 01/05/2023 to 05/05/2023 (+current hour,minute,second).
153
- current_datetime = datetime.datetime.now()
154
- current_year = current_datetime.strftime('%Y')
155
- current_month = current_datetime.strftime('%m')
156
- current_day = current_datetime.strftime('%d')
157
- current_hour = current_datetime.strftime('%H')
158
- current_minute = current_datetime.strftime('%M')
159
- current_second = current_datetime.strftime('%S')
160
 
161
- # Headers for the HTTP request
162
- headers = {'Host': 'digital.iservices.rte-france.com',
163
- 'Authorization': f'Bearer {oauth}'
164
- }
165
 
166
- # the responses object is where we are going to store all the responses from the API.
167
- # Initially, current_datetime is included to know when we have called the API and all the
168
- # individual results of the API (because each call is Maz 1 month) are stored in responses["results"]
169
- responses = {"current_datetime": current_datetime.strftime("%m/%d/%Y, %H:%M:%S"),
170
- "results":[]
171
- }
172
-
173
- # --------------------------- HERE HAVE TO GET THE RANGE OF DATES FROM START AND END AND PUT THEM INTO LIST --------------------------- #
174
- # Convert start_date and end_date to datetime objects
175
- start_date_obj = datetime.datetime.strptime(usr_start_date, "%Y-%m-%d").date()
176
- end_date_obj = datetime.datetime.strptime(usr_end_date, "%Y-%m-%d").date()
177
-
178
- # Initialize lists to store years and months
179
- years = []
180
- months = []
181
-
182
- # Generate the range of years and months
183
- current_date = start_date_obj
184
- while current_date <= end_date_obj:
185
- years.append(current_date.year)
186
- months.append(current_date.month)
187
- current_date += datetime.timedelta(days=1)
188
-
189
- # Remove duplicates from the lists
190
- years = list(set(years))
191
- months = list(set(months))
192
- years.sort()
193
- months.sort()
194
- print(years)
195
- print(months)
196
- # --------------------------- HERE HAVE TO GET THE RANGE OF DATES FROM START AND END AND PUT THEM INTO LIST --------------------------- #
197
-
198
- # Loop to call the API all the necessary times.
199
- for i in range(len(years)):
200
- for j in range(len(months)):
201
- # start_year and start_month of the current call to the API
202
- start_year = years[i]
203
- start_month = months[j]
204
- # start_date is constructed. Now we only need to construct the end_date.
205
- start_date = f'{start_year}-{start_month}-01T00:00:00%2B02:00'
206
-
207
- if True:
208
- # Calculate the number of days in the current month
209
- _, num_days = monthrange(int(start_year), int(start_month))
210
- end_date = f'{start_year}-{start_month}-{num_days}T23:59:59%2B02:00'
211
 
212
- print(f'start date is {start_date}')
213
- print(f'end date is {end_date}')
214
 
215
- # Call to the API
216
- api_url = f'https://digital.iservices.rte-france.com/open_api/unavailability_additional_information/v4/generation_unavailabilities?date_type={date_type}&start_date={start_date}&end_date={end_date}'
217
-
218
- response = requests.get(api_url, headers=headers)
219
- json_response = response.json()
220
- responses["results"].append(json_response)
221
- print(responses)
222
- return responses
223
-
224
- # --------------------------------------------------------------------------------------- #
225
-
226
-
227
- # this function does the proper analysis of the data
228
- # It takes the user, password, host, to connect to the mongodb database and get
229
- # the data to clean from the database from database and collection
230
- # Create a condition that makes it so it only takes the ACTIVE when nucmonitor, and
231
- # all (INACTIVE, ACTIVE) when photo_date
232
- # nuc_monitor will always take the photo_date and past_date as inputs, even when photo_date == False. In case False, past_date == 0 or None
233
- def nuc_monitor(rte_data, mongo_json_data, usr_start_date, usr_end_date, photo_date, past_date):
234
- # # Slightly changed metadata to fit the data from the RTE API: ST-LAURENT B 2 --> ST LAURENT 2, ....
235
-
236
- # --------------------------------------------- #
237
- # photo_date = False
238
-
239
- # file_path = "/Users/diegomarroquin/HayaEnergy/data/plants_metadata.json"
240
-
241
- # with open(file_path, "r") as file:
242
- # plants_metadata = json.load(file)
243
- plants_metadata = {"BELLEVILLE 1": 1310.0, "BELLEVILLE 2": 1310.0, "BLAYAIS 1": 910.0, "BLAYAIS 2": 910.0,
244
- "BLAYAIS 3": 910.0, "BLAYAIS 4": 910.0, "BUGEY 2": 910.0, "BUGEY 3": 910.0, "BUGEY 4": 880.0,
245
- "BUGEY 5": 880.0, "CATTENOM 1": 1300.0, "CATTENOM 2": 1300.0, "CATTENOM 3": 1300.0,
246
- "CATTENOM 4": 1300.0, "CHINON 1": 905.0, "CHINON 2": 905.0, "CHINON 3": 905.0,
247
- "CHINON 4": 905.0, "CHOOZ 1": 1500.0, "CHOOZ 2": 1500.0, "CIVAUX 1": 1495.0,
248
- "CIVAUX 2": 1495.0, "CRUAS 1": 915.0, "CRUAS 2": 915.0, "CRUAS 3": 915.0, "CRUAS 4": 915.0,
249
- "DAMPIERRE 1": 890.0, "DAMPIERRE 2": 890.0, "DAMPIERRE 3": 890.0, "DAMPIERRE 4": 890.0,
250
- "FLAMANVILLE 1": 1330.0, "FLAMANVILLE 2": 1330.0, "GOLFECH 1": 1310.0, "GOLFECH 2": 1310.0,
251
- "GRAVELINES 1": 910.0, "GRAVELINES 2": 910.0, "GRAVELINES 3": 910.0, "GRAVELINES 4": 910.0,
252
- "GRAVELINES 5": 910.0, "GRAVELINES 6": 910.0, "NOGENT 1": 1310.0, "NOGENT 2": 1310.0,
253
- "PALUEL 1": 1330.0, "PALUEL 2": 1330.0, "PALUEL 3": 1330.0, "PALUEL 4": 1330.0, "PENLY 1": 1330.0,
254
- "PENLY 2": 1330.0, "ST ALBAN 1": 1335.0, "ST ALBAN 2": 1335.0, "ST LAURENT 1": 915.0,
255
- "ST LAURENT 2": 915.0, "TRICASTIN 1": 915.0, "TRICASTIN 2": 915.0, "TRICASTIN 3": 915.0,
256
- "TRICASTIN 4": 915.0, "FESSENHEIM 1": 880.0, "FESSENHEIM 2": 880.0}
257
-
258
-
259
- # Get raw data from database and the RTE
260
- # oauth = get_oauth()
261
 
262
 
263
- # --------------------- INITIAL DATA CLEANING FOR RTE DATA ------------------------ #
264
- unav_API = rte_data.json()
265
- print(unav_API)
266
- # Store the unavailabilities in a list
267
- unavailabilities = []
268
- print("Unav")
269
- for unavailabilities_API in unav_API['results']:
270
- try:
271
- unavailabilities.extend(unavailabilities_API.get('generation_unavailabilities', []))
272
- except:
273
- print('There was an error')
274
- # print(unavailabilities_API)
275
- rte_df = pd.DataFrame(unavailabilities)
276
-
277
-
278
- def unpack_values(row):
279
- if isinstance(row["values"], list):
280
- for key, value in row["values"][0].items():
281
- row[key] = value
282
- return row
283
- # Apply the function to each row in the DataFrame
284
- rte_df = rte_df.apply(unpack_values, axis=1)
285
-
286
- # Drop the original "values" column
287
- rte_df.drop("values", axis=1, inplace=True)
288
-
289
- # Unpack the unit column
290
- rte_df2 = pd.concat([rte_df, pd.json_normalize(rte_df['unit'])], axis=1)
291
- rte_df2.drop('unit', axis=1, inplace=True)
292
-
293
-
294
- rte_nuclear_unav = rte_df2[(rte_df2["production_type"] == "NUCLEAR")]
295
-
296
- # --------------------- INITIAL DATA CLEANING FOR RTE DATA ------------------------ #
297
-
298
-
299
- # --------------------- INITIAL DATA CLEANING FOR MONGO DATA ------------------------ #
300
-
301
-
302
-
303
- # mongo_data = mongo_json_data.json()
304
-
305
- # # Specify the file path
306
- # file_path = "/Users/diegomarroquin/HayaEnergy/Nucmonitor_MVP/NucPy_v0.2/testing/test_data3.txt"
307
-
308
- # Open the file in write mode
309
- with open(file_path, 'w') as file:
310
- for item in mongo_data:
311
- file.write("%s" % item)
312
-
313
- # # Create a DataFrame
314
- mongo_df = pd.DataFrame(mongo_data)
315
-
316
- # Unpack the dictionaries into separate columns
317
- mongo_df_unpacked = pd.json_normalize(mongo_df['generation_unavailabilities'])
318
-
319
- # Concatenate the unpacked columns with the original DataFrame
320
- mongo_df_result = pd.concat([mongo_df, mongo_df_unpacked], axis=1)
321
-
322
- # Drop the original column
323
- mongo_df_result.drop(columns=['generation_unavailabilities'], inplace=True)
324
- mongo_df_columns = mongo_df_result.columns
325
- # print(mongo_df_columns)
326
- # print(mongo_df_result)
327
- # print(mongo_df_result["values"])
328
- # # Unpack values column
329
- # # mongo_df2 = mongo_df_result.copy().apply(unpack_values, axis=1)
330
- # mongo_df_values_unpacked = pd.json_normalize(mongo_df_result['values'])
331
- # mongo_df2 = pd.concat([mongo_df_result, mongo_df_values_unpacked], axis=1)
332
- # print(mongo_df2.columns)
333
- # print(mongo_df2)
334
- # # mongo_df2 = pd.concat([mongo_df_result, pd.json_normalize(mongo_df_result['values'])], axis=1)
335
- # # mongo_df2 = pd.concat([mongo_df2, pd.json_normalize(mongo_df2['unit'])], axis=1)
336
- # # mongo_df2 = mongo_df.copy().apply(unpack_values, axis=1)
337
- # # mongo_df2 = mongo_df_result.copy()
338
- # mongo_df2.drop(columns=["values"], inplace=True)
339
- # mongo_df2.drop(0, axis=1, inplace=True)
340
- # Unpack values using apply() and lambda functions
341
- mongo_df_result['start_date'] = mongo_df_result['values'].apply(lambda x: x[0]['start_date'])
342
- mongo_df_result['end_date'] = mongo_df_result['values'].apply(lambda x: x[0]['end_date'])
343
- mongo_df_result['available_capacity'] = mongo_df_result['values'].apply(lambda x: x[0]['available_capacity'])
344
- mongo_df_result['unavailable_capacity'] = mongo_df_result['values'].apply(lambda x: x[0]['unavailable_capacity'])
345
- # print(mongo_df_result)
346
- # print(mongo_df_result.columns)
347
- # Drop the original 'values' column
348
- mongo_df_result.drop('values', axis=1, inplace=True)
349
- mongo_df2 = mongo_df_result
350
- mongo_df2.rename(columns=lambda col: col.replace('unit.', ''), inplace=True)
351
 
352
 
353
 
354
- # --------------------- INITIAL DATA CLEANING FOR MONGO DATA ------------------------ #
355
-
356
- # Make the two dataframes have the same columns
357
- mongo_unavs = mongo_df2.copy()
358
- mongo_unavs.drop(columns="type", inplace=True)
359
-
360
- rte_unavs = rte_nuclear_unav.copy()
361
- rte_unavs.drop(columns="type", inplace=True)
362
-
363
- # Merge dataframes
364
- column_order = mongo_unavs.columns
365
- # print(column_order)
366
- merged_df = pd.concat([mongo_unavs[column_order], rte_unavs[column_order]], ignore_index=True)
367
-
368
- # --------------------------- HERE IS THE CHANGE TO GET ONLY ACTIVE OR ACTIVE AND INACTIVE --------------------------- #
369
- # start_date_str = usr_start_date.strftime("%Y-%m-%d")
370
- start_date_str = usr_start_date
371
- # end_date_str = usr_end_date.strftime("%Y-%m-%d")
372
- end_date_str = usr_end_date
373
- current_datetime = datetime.datetime.now()
374
- current_datetime_str = current_datetime.strftime("%Y-%m-%d")
375
-
376
- if photo_date == True:
377
- nuclear_unav = merged_df.copy()[(merged_df.copy()["production_type"] == "NUCLEAR") & (merged_df.copy()["updated_date"] <= past_date)]
378
- photo_date = True
379
- else: # need to add updated_date as a conditional to get the newest for that day
380
- nuclear_unav = merged_df.copy()[(merged_df.copy()["production_type"] == "NUCLEAR") & (merged_df.copy()["updated_date"] <= end_date_str)]
381
-
382
- # --------------------------- HERE IS THE CHANGE TO GET ONLY ACTIVE OR ACTIVE AND INACTIVE --------------------------- #
383
-
384
- # --------------------- SECOND DATA CLEANING ------------------------ #
385
- # This filter should take only the most recent id and discard the rest
386
-
387
- # Sort by updated date
388
- sorted_df = nuclear_unav.copy().sort_values(by='updated_date')
389
-
390
- sorted_df = sorted_df.copy().reset_index(drop=True)
391
-
392
- # Filter to get identifiers
393
- filtered_id_df = sorted_df.copy()
394
- filtered_id_df.drop_duplicates(subset='identifier', keep='last', inplace=True)
395
- filtered_id_df = filtered_id_df.copy().reset_index(drop=True)
396
-
397
-
398
- # This filter should take all the dates with unavs that include days with unavs in the range of the start and end date
399
-
400
- filtered_df = filtered_id_df.copy()[(filtered_id_df.copy()['start_date'] <= end_date_str) & (filtered_id_df.copy()['end_date'] >= start_date_str)]
401
-
402
- # Standardize datetime in dataframe
403
- filtered_df2 = filtered_df.copy() # This code will just standardize datetime stuff
404
- filtered_df2['creation_date'] = pd.to_datetime(filtered_df2['creation_date'], utc=True)
405
- filtered_df2['updated_date'] = pd.to_datetime(filtered_df2['updated_date'], utc=True)
406
- filtered_df2['start_date'] = pd.to_datetime(filtered_df2['start_date'], utc=True)
407
- filtered_df2['end_date'] = pd.to_datetime(filtered_df2['end_date'], utc=True)
408
-
409
- # Drop the duplicates
410
- filtered_df3 = filtered_df2.copy().drop_duplicates()
411
-
412
- # start_date_datetime = pd.to_datetime(start_date_str, utc=True) # Remove timezone info
413
- start_date_datetime = pd.Timestamp(start_date_str, tz='UTC')
414
- # end_date_datetime = pd.to_datetime(end_date_str, utc=True)
415
- end_date_datetime = pd.Timestamp(end_date_str, tz='UTC')
416
-
417
- # Turn df into dict for json processing
418
- filtered_unavs = filtered_df3.copy().to_dict(orient='records')
419
-
420
- # file_path = "/Users/diegomarroquin/HayaEnergy/Nucmonitor_MVP/NucPy_v0.2/testing/test_data4.txt"
421
-
422
- # # Open the file in write mode
423
- # with open(file_path, 'w') as file:
424
- # for item in filtered_unavs:
425
- # file.write("%s" % item)
426
- results = {}
427
-
428
- for unav in filtered_unavs:
429
- plant_name = unav['name']
430
- if plant_name in results:
431
- # If the key is already in the dictionary, append unavailability to the list
432
- results[plant_name].append({'status': unav['status'],
433
- 'id': unav['message_id'],
434
- 'creation_date': unav['creation_date'],
435
- 'updated_date': unav['updated_date'],
436
- 'start_date': unav['start_date'],
437
- 'end_date': unav['end_date'],
438
- 'available_capacity': unav['available_capacity']})
439
- else:
440
- # if the key of the plant is not there yet, create a new element of the dictionary
441
-
442
- # Get message_id instead of identifier, easier to identify stuff with it
443
- results[plant_name] = [{'status': unav['status'],
444
- 'id': unav['message_id'],
445
- 'creation_date': unav['creation_date'],
446
- 'updated_date': unav['updated_date'],
447
- 'start_date': unav['start_date'],
448
- 'end_date': unav['end_date'],
449
- 'available_capacity': unav['available_capacity']}]
450
 
451
- # Custom encoder to handle datetime objects
452
- class DateTimeEncoder(json.JSONEncoder):
453
- def default(self, o):
454
- if isinstance(o, datetime.datetime):
455
- return o.isoformat()
456
- return super().default(o)
457
-
458
- results_holder = results
459
-
460
- # Create new dict with each plant only having start_date less than user_end_date and an end_date greater than user_start_date
461
- # should just be doing the same as above in the df for filtering only dates that inclued the start and end date
462
- start_date = start_date_datetime.date()
463
- end_date = end_date_datetime.date()
464
- results_filtered = results_holder
465
- for key, value in results_filtered.items():
466
- filtered_values = []
467
- for item in value:
468
- item_start_date = item['start_date'].date()
469
- item_end_date = item['end_date'].date()
470
- identifier = item['id']
471
- if item_start_date < end_date and item_end_date > start_date and identifier not in filtered_values:
472
- filtered_values.append(item)
473
- results_filtered[key] = filtered_values
474
-
475
-
476
- sorted_results = results_filtered
477
- # --------------------- SECOND DATA CLEANING ------------------------ #
478
-
479
- # --------------------------- HERE IS THE FINAL PROCESS --------------------------- #
480
-
481
- for key, value in sorted_results.items():
482
- sorted_results[key] = sorted(value, key=lambda x: x['updated_date'])
483
-
484
- results_sorted = sorted_results
485
 
486
- dates_of_interest = [start_date] # We are creating a list of dates ranging from user specified start and end dates
487
- date_plus_one = start_date
488
 
489
- while date_plus_one < end_date:
490
- date_plus_one = date_plus_one + datetime.timedelta(days=1)
491
- dates_of_interest.append(date_plus_one)
492
 
493
- # This is to standardize the datetimes. Without this, the datetime calculations for each power plant will not work
494
- results_plants = {plant_name: {date: {"available_capacity": power, "updated_date": pd.to_datetime("1970-01-01", utc=True)} for date in dates_of_interest}
495
- for plant_name, power in plants_metadata.items()}
496
 
497
 
498
- for plant, unavailabilities in results_sorted.items():
499
 
500
- original_power = plants_metadata[plant]
501
- # Get all the unavailabilities scheduled for the plant.
502
- results_current_plant = results_plants[plant]
503
 
504
- for unavailability in unavailabilities:
505
- # For each unavailability, the resulting power, start and end datetime are collected. Need to collect updated_date
506
- power_unavailability = unavailability["available_capacity"]
507
- updated_date_unav = unavailability["updated_date"]
508
- # The date comes as a string
509
- start_datetime_unav = unavailability["start_date"]
510
- end_datetime_unav = unavailability["end_date"]
511
- start_date_unav = start_datetime_unav.date() # Extract date part
512
- end_date_unav = end_datetime_unav.date() # Extract date part
513
 
514
- # For the current unavailability, we want to find which days it affects
515
- for day in dates_of_interest:
516
-
517
- start_hour = start_datetime_unav.hour
518
- start_minute = start_datetime_unav.minute
519
- end_hour = end_datetime_unav.hour
520
- end_minute = end_datetime_unav.minute
521
-
522
- if start_date_unav <= day <= end_date_unav:
523
- # Check if the day is already updated with a later update_date
524
- if day in results_current_plant and updated_date_unav <= results_current_plant[day]["updated_date"]:
525
- continue # Skip to the next loop if there is already information for a later update_date
526
-
527
- # Calculate the % of the day that the plant is under maintenance
528
- if start_date_unav == day and day == end_date_unav:
529
- # The unavailability starts and ends on the same day
530
- percentage_of_day = (end_hour * 60 + end_minute - start_hour * 60 - start_minute) / (24 * 60)
531
- elif start_date_unav == day:
532
- # The unavailability starts on the current day but ends on a later day
533
- percentage_of_day = (24 * 60 - (start_hour * 60 + start_minute)) / (24 * 60)
534
- elif day == end_date_unav:
535
- # The unavailability starts on a previous day and ends on the current day
536
- percentage_of_day = (end_hour * 60 + end_minute) / (24 * 60)
537
- else:
538
- # The unavailability covers the entire day
539
- percentage_of_day = 1
540
-
541
- # The average power of the day is calculated
542
- power_of_day = percentage_of_day * power_unavailability + (1 - percentage_of_day) * original_power
543
-
544
- # Update the available_capacity for the day only if it's not already updated with a later update_date
545
- if day not in results_current_plant or updated_date_unav > results_current_plant[day]["updated_date"]:
546
- results_current_plant[day] = {"available_capacity": power_of_day, "updated_date": updated_date_unav}
547
-
548
-
549
- output_results = {}
550
- for plant, plant_data in results_plants.items():
551
- available_capacity_per_day = {str(date): data["available_capacity"] for date, data in plant_data.items()}
552
- output_results[plant] = available_capacity_per_day
553
-
554
- # print(output_results)
555
- add_total(output_results)
556
- # print("Done")
557
- # print(results_plants)
558
- # Convert datetime key to string to store in mongodb
559
- output_results = {plant: {str(date): power for date, power in plant_data.items()} for plant, plant_data in output_results.items()}
560
- # print(output_results)
561
- # -------------------------------------------------
562
- if photo_date == False:
563
- # Store the results_plants in MongoDB
564
- database_name = "data" # Specify your database name
565
- collection_name = "filtered" # Specify your collection name
566
- # mongo_store_data(output_results, database_name, collection_name)
567
- # mongo_replace_data(results_plants_total, database_name, "filtered_excel")
568
- # print("Data stored in database")
569
- # mongo_append_data(results_plants, database_name, collection_name)
570
 
571
- # json_data = json.dumps(convert_to_json(output_results))
572
- json_data = json.dumps(output_results)
573
- # print(json_data)
574
- return json_data
575
- else:
576
- database_name = "data" # Specify your database name
577
- collection_name = "photo_date" # Specify your collection name
578
- # mongo_store_data(output_results, database_name, collection_name)
579
-
580
- # json_data = json.dumps(convert_to_json(output_results))
581
- json_data = json.dumps(output_results)
582
- # print(json_data)
583
- return json_data
584
- # -------------------------------------------------
585
- return
586
-
587
-
588
- # Namespaces
589
-
590
- # Get raw data stuff
591
-
592
- raw_ns = Namespace('raw', description='Raw Data', path='/nucpy/v1')
593
- api.add_namespace(raw_ns)
594
-
595
- @raw_ns.route('/raw', methods=["GET"])
596
- @raw_ns.doc(params= {"start_date": "Start date", "end_date": "end date", "photo_date": "True False", "past_date": "Cutoff date"})
597
- class Raw(Resource):
598
- # @auth.login_required
599
- def get(self):
600
- # raw_data = merge_gridfs_files_to_json()
601
- print("Applying request")
602
- mongo_start_date = request.args.get("start_date")
603
- mongo_end_date = request.args.get("end_date")
604
- mongo_photo_date = request.args.get("photo_date")
605
- mongo_past_date = request.args.get("past_date")
606
- print("Getting raw_data")
607
- raw_data = mongo_unavs_call(mongo_start_date, mongo_end_date, mongo_past_date, mongo_photo_date)
608
- print("Returning raw_data")
609
- print(raw_data)
610
- return raw_data
611
-
612
- # Get RTE data
613
-
614
- rte_ns = Namespace('rte', description='RTE Data', path='/nucpy/v1')
615
- api.add_namespace(rte_ns)
616
-
617
- @rte_ns.route('/rte', methods=["GET"])
618
- # @rte_ns.doc(params= {"start_date": "Start date", "end_date": "end date"})
619
- class RTEDATA(Resource):
620
- # @auth.login_required
621
- def get(self):
622
- rte_start_date = request.args.get("start_date")
623
- rte_end_date = request.args.get("end_date")
624
- print(rte_start_date)
625
- print(rte_end_date)
626
- # Process the user input and retrieve data
627
- data = get_unavailabilities(rte_start_date, rte_end_date)
628
-
629
- return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
630
 
631
- # Get processed data
632
 
633
- nucmonitor_ns = Namespace('nucmonitor', description='Nucmonitor', path='/nucpy/v1')
634
- api.add_namespace(nucmonitor_ns)
635
 
636
- @nucmonitor_ns.route('/nucmonitor', methods=['GET'])
637
- class Nucmonitor(Resource):
638
- # @auth.login_required
639
- def get(self):
640
- # Retrieve input parameters from request.args
641
- start_date = request.args.get("start_date")
642
- end_date = request.args.get("end_date")
643
- photo_date = request.args.get("photo_date")
644
- past_date = request.args.get("past_date")
645
-
646
- # Call the /rte endpoint to get RTE data
647
- rte_data = self.get_rte_data(start_date, end_date)
648
- print("Got RTE data")
649
- print("Getting Mongo data")
650
- mongo_data = self.get_mongo_data(start_date, end_date, photo_date, past_date)
651
- print("Got Mongo data")
652
- print(mongo_data)
653
- # Process data using nuc_monitor
654
- nucmonitor_response = nuc_monitor(rte_data, mongo_data, start_date, end_date, photo_date, past_date)
655
- # print(nucmonitor_response)
656
- return (nucmonitor_response)
657
-
658
- def get_rte_data(self, start_date, end_date):
659
- rte_url = "http://0.0.0.0:7860/nucpy/v1/rte" # RTE endpoint URL
660
- rte_params = {"start_date": start_date, "end_date": end_date}
661
- rte_response = requests.get(rte_url, params=rte_params)
662
- # rte_data = rte_response.json()
663
- return rte_response
664
-
665
- def get_mongo_data(self, start_date, end_date, photo_date, past_date):
666
- print("Getting url")
667
- mongo_url = "http://0.0.0.0:7860/nucpy/v1/raw" # Mongo endpoint URL
668
- print("Getting params")
669
- mongo_params = {"start_date": start_date, "end_date": end_date, "photo_date": photo_date, "past_date": past_date}
670
- print("Getting request")
671
- mongo_response = requests.get(mongo_url, params=mongo_params)
672
- # mongo_data = mongo_response.json()
673
- print("Returning response")
674
- return mongo_response
675
 
 
 
676
 
 
 
 
 
677
 
678
  if __name__ == '__main__':
679
  app.run(host='0.0.0.0', port=7860)
 
1
+ # import pandas as pd
2
+ # import numpy as np
3
+ # from flask import Flask, jsonify, request
4
+ # from flask_restx import Api, Resource, Namespace
5
+ # # from flask_httpauth import HTTPBasicAuth
6
+ # import requests
7
+ # import base64
8
+ # import json
9
+ # import datetime
10
+ # from calendar import monthrange
11
+ # import pymongo
12
+ # from mongoengine import StringField, ListField, DateTimeField, DictField
13
+
14
+ # """
15
+ # This script creates an api that connects to the MongoDB database. This api will
16
+ # eventually allow connection between the database and the frontend
17
+ # """
18
+ # # Connect to MongoDB
19
+ # # For some reason none of this works when im connected to VPN
20
+
21
+
22
+ # app = Flask(__name__)
23
+ # api = Api(app, version='1.0',
24
+ # title='Haya Energy NucPy API',
25
+ # description="""
26
+ # API endpoints used to communicate NucPy
27
+ # with MongoDB
28
+ # """,
29
+ # contact="Diego",
30
+ # endpoint="/nucpy/v1")
31
+
32
+
33
+ # def mongo_unavs_call(user_input_start_date, user_input_end_date, user_input_photo_date, user_input_past_date):
34
+ # # Connect to the MongoDB database
35
+ # user = "dmarroquin"
36
+ # passw = "tN9XpCCQM2MtYDme"
37
+ # host = "nucmonitordata.xxcwx9k.mongodb.net"
38
+ # client = pymongo.MongoClient(
39
+ # f"mongodb+srv://{user}:{passw}@{host}/?retryWrites=true&w=majority"
40
+ # )
41
+
42
+ # db = client["data"]
43
+ # collection = db["unavs"]
44
+
45
+ # start_date = f"{user_input_start_date}T00:00:00"
46
+ # end_date = f"{user_input_end_date}T23:59:59"
47
 
48
+ # pipeline = [
49
+ # {
50
+ # "$unwind": "$results"
51
+ # },
52
+ # {
53
+ # "$unwind": "$results.generation_unavailabilities"
54
+ # },
55
+ # {
56
+ # "$match": {
57
+ # "results.generation_unavailabilities.production_type": "NUCLEAR",
58
+ # "results.generation_unavailabilities.start_date": {"$lte": end_date},
59
+ # "results.generation_unavailabilities.end_date": {"$gte": start_date},
60
+ # "results.generation_unavailabilities.updated_date": {"$lte": end_date}
61
+ # }
62
+ # },
63
+ # {
64
+ # "$project": {
65
+ # "_id": 0,
66
+ # "generation_unavailabilities": "$results.generation_unavailabilities"
67
+ # }
68
+ # }
69
+ # ]
70
+
71
+ # result = collection.aggregate(pipeline)
72
+
73
+ # return list(result)
74
+
75
+ # # --------------------------------------------------------------------------------------- #
76
+
77
+ # # Convert the dictionary of dictionaries to JSON
78
+ # def convert_to_json(item):
79
+ # if isinstance(item, dict):
80
+ # return {str(k): convert_to_json(v) for k, v in item.items()}
81
+ # elif isinstance(item, list):
82
+ # return [convert_to_json(i) for i in item]
83
+ # elif isinstance(item, ObjectId):
84
+ # return str(item)
85
+ # else:
86
+ # return item
87
+ # # --------------------------------------------------------------------------------------- #
88
+
89
+ # # The idea of this function is to sum the total availability for each day of interest
90
+ # # This is already done in the Excel so it might be useful to check
91
+ # # Function gives the total of the data. When printed as dataframe/excel,
92
+ # # Will give a final row with the total for each plant and the total overall
93
+ # def add_total(data):
94
+ # total_values = {}
95
+ # for key in data:
96
+ # daily_values = data[key]
97
+ # total = sum(daily_values.values())
98
+ # daily_values["Total"] = total
99
+ # for date, value in daily_values.items():
100
+ # if date not in total_values:
101
+ # total_values[date] = value
102
+ # else:
103
+ # total_values[date] += value
104
 
105
+ # data["Total"] = total_values
106
 
107
+ # # --------------------------------------------------------------------------------------- #
108
 
109
+ # # This file will simply connect to the rte and get the data directly from there
110
 
111
+ # # Function to create an authentication token. This token is then used in the HTTP requests to the API for authentication.
112
+ # # It is necessary to receive data from RTE.
113
+ # def get_oauth():
114
+ # # ID from the user. This is encoded to base64 and sent in an HTTP request to receive the oauth token.
115
+ # # This ID is from my account (RMP). However, another account can be created in the RTE API portal and get another ID.
116
+ # joined_ID = '057e2984-edb3-4706-984b-9ea0176e74db:dc9df9f7-9f91-4c7a-910c-15c4832fb7bc'
117
+ # b64_ID = base64.b64encode(joined_ID.encode('utf-8'))
118
+ # b64_ID_decoded = b64_ID.decode('utf-8')
119
 
120
+ # # Headers for the HTTP request
121
+ # headers = {'Content-Type': 'application/x-www-form-urlencoded',
122
+ # 'Authorization': f'Basic {b64_ID_decoded}'}
123
+ # api_url = 'https://digital.iservices.rte-france.com/token/oauth/'
124
+ # # Call to the API and if successful, the response will be 200.
125
+ # response = requests.post(api_url, headers=headers)
126
 
127
+ # # When positive response, the token is retrieved
128
+ # data = response.json()
129
+ # oauth = data['access_token']
130
 
131
+ # return(oauth)
132
+
133
+ # # --------------------------------------------------------------------------------------- #
134
+
135
+ # # This function does severall calls to the RTE API (because maximum time between start_date and end_date is 1 month)
136
+ # # the argument past_photo is a boolean (True, False) that indicates if we want to make a photo from the past or not
137
+ # # However, the past_photo part and past_date is not yet implemented.
138
+ # def get_unavailabilities(usr_start_date, usr_end_date):
139
+ # # This should be changed in the case of getting a past_photo because many of the rows that are relevant for that
140
+ # # past photo will not be ACTIVE anymore.
141
+ # # unav_status = ['ACTIVE', 'INACTIVE']
142
+ # # This could also be changed. Currently it means that if we call the API with start_date=01/01/2023 and end_date=01/02/2023,
143
+ # # it will return all the records of unavailabilities that have been updated between the two dates.
144
+ # # date_type = 'UPDATED_DATE'
145
+ # # date_type APPLICATION_DATE gets all unavailabilities with predictions in the defined dates, so that
146
+ # # we can get an unavailability that has updated_date outside the defined dates for start_date and end_date
147
+ # oauth = get_oauth()
148
+ # print("Get Oauth done")
149
+ # date_type = 'APPLICATION_DATE'
150
 
151
+ # # Current year/month/day/hour/minute/second is calculated for the last call to the API. For instance, if today is 05/05/2023,
152
+ # # the last call of the API will be from 01/05/2023 to 05/05/2023 (+current hour,minute,second).
153
+ # current_datetime = datetime.datetime.now()
154
+ # current_year = current_datetime.strftime('%Y')
155
+ # current_month = current_datetime.strftime('%m')
156
+ # current_day = current_datetime.strftime('%d')
157
+ # current_hour = current_datetime.strftime('%H')
158
+ # current_minute = current_datetime.strftime('%M')
159
+ # current_second = current_datetime.strftime('%S')
160
 
161
+ # # Headers for the HTTP request
162
+ # headers = {'Host': 'digital.iservices.rte-france.com',
163
+ # 'Authorization': f'Bearer {oauth}'
164
+ # }
165
 
166
+ # # the responses object is where we are going to store all the responses from the API.
167
+ # # Initially, current_datetime is included to know when we have called the API and all the
168
+ # # individual results of the API (because each call is Maz 1 month) are stored in responses["results"]
169
+ # responses = {"current_datetime": current_datetime.strftime("%m/%d/%Y, %H:%M:%S"),
170
+ # "results":[]
171
+ # }
172
+
173
+ # # --------------------------- HERE HAVE TO GET THE RANGE OF DATES FROM START AND END AND PUT THEM INTO LIST --------------------------- #
174
+ # # Convert start_date and end_date to datetime objects
175
+ # start_date_obj = datetime.datetime.strptime(usr_start_date, "%Y-%m-%d").date()
176
+ # end_date_obj = datetime.datetime.strptime(usr_end_date, "%Y-%m-%d").date()
177
+
178
+ # # Initialize lists to store years and months
179
+ # years = []
180
+ # months = []
181
+
182
+ # # Generate the range of years and months
183
+ # current_date = start_date_obj
184
+ # while current_date <= end_date_obj:
185
+ # years.append(current_date.year)
186
+ # months.append(current_date.month)
187
+ # current_date += datetime.timedelta(days=1)
188
+
189
+ # # Remove duplicates from the lists
190
+ # years = list(set(years))
191
+ # months = list(set(months))
192
+ # years.sort()
193
+ # months.sort()
194
+ # print(years)
195
+ # print(months)
196
+ # # --------------------------- HERE HAVE TO GET THE RANGE OF DATES FROM START AND END AND PUT THEM INTO LIST --------------------------- #
197
+
198
+ # # Loop to call the API all the necessary times.
199
+ # for i in range(len(years)):
200
+ # for j in range(len(months)):
201
+ # # start_year and start_month of the current call to the API
202
+ # start_year = years[i]
203
+ # start_month = months[j]
204
+ # # start_date is constructed. Now we only need to construct the end_date.
205
+ # start_date = f'{start_year}-{start_month}-01T00:00:00%2B02:00'
206
+
207
+ # if True:
208
+ # # Calculate the number of days in the current month
209
+ # _, num_days = monthrange(int(start_year), int(start_month))
210
+ # end_date = f'{start_year}-{start_month}-{num_days}T23:59:59%2B02:00'
211
 
212
+ # print(f'start date is {start_date}')
213
+ # print(f'end date is {end_date}')
214
 
215
+ # # Call to the API
216
+ # api_url = f'https://digital.iservices.rte-france.com/open_api/unavailability_additional_information/v4/generation_unavailabilities?date_type={date_type}&start_date={start_date}&end_date={end_date}'
217
+
218
+ # response = requests.get(api_url, headers=headers)
219
+ # json_response = response.json()
220
+ # responses["results"].append(json_response)
221
+ # print(responses)
222
+ # return responses
223
+
224
+ # # --------------------------------------------------------------------------------------- #
225
+
226
+
227
+ # # this function does the proper analysis of the data
228
+ # # It takes the user, password, host, to connect to the mongodb database and get
229
+ # # the data to clean from the database from database and collection
230
+ # # Create a condition that makes it so it only takes the ACTIVE when nucmonitor, and
231
+ # # all (INACTIVE, ACTIVE) when photo_date
232
+ # # nuc_monitor will always take the photo_date and past_date as inputs, even when photo_date == False. In case False, past_date == 0 or None
233
+ # def nuc_monitor(rte_data, mongo_json_data, usr_start_date, usr_end_date, photo_date, past_date):
234
+ # # # Slightly changed metadata to fit the data from the RTE API: ST-LAURENT B 2 --> ST LAURENT 2, ....
235
+
236
+ # # --------------------------------------------- #
237
+ # # photo_date = False
238
+
239
+ # # file_path = "/Users/diegomarroquin/HayaEnergy/data/plants_metadata.json"
240
+
241
+ # # with open(file_path, "r") as file:
242
+ # # plants_metadata = json.load(file)
243
+ # plants_metadata = {"BELLEVILLE 1": 1310.0, "BELLEVILLE 2": 1310.0, "BLAYAIS 1": 910.0, "BLAYAIS 2": 910.0,
244
+ # "BLAYAIS 3": 910.0, "BLAYAIS 4": 910.0, "BUGEY 2": 910.0, "BUGEY 3": 910.0, "BUGEY 4": 880.0,
245
+ # "BUGEY 5": 880.0, "CATTENOM 1": 1300.0, "CATTENOM 2": 1300.0, "CATTENOM 3": 1300.0,
246
+ # "CATTENOM 4": 1300.0, "CHINON 1": 905.0, "CHINON 2": 905.0, "CHINON 3": 905.0,
247
+ # "CHINON 4": 905.0, "CHOOZ 1": 1500.0, "CHOOZ 2": 1500.0, "CIVAUX 1": 1495.0,
248
+ # "CIVAUX 2": 1495.0, "CRUAS 1": 915.0, "CRUAS 2": 915.0, "CRUAS 3": 915.0, "CRUAS 4": 915.0,
249
+ # "DAMPIERRE 1": 890.0, "DAMPIERRE 2": 890.0, "DAMPIERRE 3": 890.0, "DAMPIERRE 4": 890.0,
250
+ # "FLAMANVILLE 1": 1330.0, "FLAMANVILLE 2": 1330.0, "GOLFECH 1": 1310.0, "GOLFECH 2": 1310.0,
251
+ # "GRAVELINES 1": 910.0, "GRAVELINES 2": 910.0, "GRAVELINES 3": 910.0, "GRAVELINES 4": 910.0,
252
+ # "GRAVELINES 5": 910.0, "GRAVELINES 6": 910.0, "NOGENT 1": 1310.0, "NOGENT 2": 1310.0,
253
+ # "PALUEL 1": 1330.0, "PALUEL 2": 1330.0, "PALUEL 3": 1330.0, "PALUEL 4": 1330.0, "PENLY 1": 1330.0,
254
+ # "PENLY 2": 1330.0, "ST ALBAN 1": 1335.0, "ST ALBAN 2": 1335.0, "ST LAURENT 1": 915.0,
255
+ # "ST LAURENT 2": 915.0, "TRICASTIN 1": 915.0, "TRICASTIN 2": 915.0, "TRICASTIN 3": 915.0,
256
+ # "TRICASTIN 4": 915.0, "FESSENHEIM 1": 880.0, "FESSENHEIM 2": 880.0}
257
+
258
+
259
+ # # Get raw data from database and the RTE
260
+ # # oauth = get_oauth()
261
 
262
 
263
+ # # --------------------- INITIAL DATA CLEANING FOR RTE DATA ------------------------ #
264
+ # unav_API = rte_data.json()
265
+ # print(unav_API)
266
+ # # Store the unavailabilities in a list
267
+ # unavailabilities = []
268
+ # print("Unav")
269
+ # for unavailabilities_API in unav_API['results']:
270
+ # try:
271
+ # unavailabilities.extend(unavailabilities_API.get('generation_unavailabilities', []))
272
+ # except:
273
+ # print('There was an error')
274
+ # # print(unavailabilities_API)
275
+ # rte_df = pd.DataFrame(unavailabilities)
276
+
277
+
278
+ # def unpack_values(row):
279
+ # if isinstance(row["values"], list):
280
+ # for key, value in row["values"][0].items():
281
+ # row[key] = value
282
+ # return row
283
+ # # Apply the function to each row in the DataFrame
284
+ # rte_df = rte_df.apply(unpack_values, axis=1)
285
+
286
+ # # Drop the original "values" column
287
+ # rte_df.drop("values", axis=1, inplace=True)
288
+
289
+ # # Unpack the unit column
290
+ # rte_df2 = pd.concat([rte_df, pd.json_normalize(rte_df['unit'])], axis=1)
291
+ # rte_df2.drop('unit', axis=1, inplace=True)
292
+
293
+
294
+ # rte_nuclear_unav = rte_df2[(rte_df2["production_type"] == "NUCLEAR")]
295
+
296
+ # # --------------------- INITIAL DATA CLEANING FOR RTE DATA ------------------------ #
297
+
298
+
299
+ # # --------------------- INITIAL DATA CLEANING FOR MONGO DATA ------------------------ #
300
+
301
+
302
+
303
+ # # mongo_data = mongo_json_data.json()
304
+
305
+ # # # Specify the file path
306
+ # # file_path = "/Users/diegomarroquin/HayaEnergy/Nucmonitor_MVP/NucPy_v0.2/testing/test_data3.txt"
307
+
308
+ # # Open the file in write mode
309
+ # with open(file_path, 'w') as file:
310
+ # for item in mongo_data:
311
+ # file.write("%s" % item)
312
+
313
+ # # # Create a DataFrame
314
+ # mongo_df = pd.DataFrame(mongo_data)
315
+
316
+ # # Unpack the dictionaries into separate columns
317
+ # mongo_df_unpacked = pd.json_normalize(mongo_df['generation_unavailabilities'])
318
+
319
+ # # Concatenate the unpacked columns with the original DataFrame
320
+ # mongo_df_result = pd.concat([mongo_df, mongo_df_unpacked], axis=1)
321
+
322
+ # # Drop the original column
323
+ # mongo_df_result.drop(columns=['generation_unavailabilities'], inplace=True)
324
+ # mongo_df_columns = mongo_df_result.columns
325
+ # # print(mongo_df_columns)
326
+ # # print(mongo_df_result)
327
+ # # print(mongo_df_result["values"])
328
+ # # # Unpack values column
329
+ # # # mongo_df2 = mongo_df_result.copy().apply(unpack_values, axis=1)
330
+ # # mongo_df_values_unpacked = pd.json_normalize(mongo_df_result['values'])
331
+ # # mongo_df2 = pd.concat([mongo_df_result, mongo_df_values_unpacked], axis=1)
332
+ # # print(mongo_df2.columns)
333
+ # # print(mongo_df2)
334
+ # # # mongo_df2 = pd.concat([mongo_df_result, pd.json_normalize(mongo_df_result['values'])], axis=1)
335
+ # # # mongo_df2 = pd.concat([mongo_df2, pd.json_normalize(mongo_df2['unit'])], axis=1)
336
+ # # # mongo_df2 = mongo_df.copy().apply(unpack_values, axis=1)
337
+ # # # mongo_df2 = mongo_df_result.copy()
338
+ # # mongo_df2.drop(columns=["values"], inplace=True)
339
+ # # mongo_df2.drop(0, axis=1, inplace=True)
340
+ # # Unpack values using apply() and lambda functions
341
+ # mongo_df_result['start_date'] = mongo_df_result['values'].apply(lambda x: x[0]['start_date'])
342
+ # mongo_df_result['end_date'] = mongo_df_result['values'].apply(lambda x: x[0]['end_date'])
343
+ # mongo_df_result['available_capacity'] = mongo_df_result['values'].apply(lambda x: x[0]['available_capacity'])
344
+ # mongo_df_result['unavailable_capacity'] = mongo_df_result['values'].apply(lambda x: x[0]['unavailable_capacity'])
345
+ # # print(mongo_df_result)
346
+ # # print(mongo_df_result.columns)
347
+ # # Drop the original 'values' column
348
+ # mongo_df_result.drop('values', axis=1, inplace=True)
349
+ # mongo_df2 = mongo_df_result
350
+ # mongo_df2.rename(columns=lambda col: col.replace('unit.', ''), inplace=True)
351
 
352
 
353
 
354
+ # # --------------------- INITIAL DATA CLEANING FOR MONGO DATA ------------------------ #
355
+
356
+ # # Make the two dataframes have the same columns
357
+ # mongo_unavs = mongo_df2.copy()
358
+ # mongo_unavs.drop(columns="type", inplace=True)
359
+
360
+ # rte_unavs = rte_nuclear_unav.copy()
361
+ # rte_unavs.drop(columns="type", inplace=True)
362
+
363
+ # # Merge dataframes
364
+ # column_order = mongo_unavs.columns
365
+ # # print(column_order)
366
+ # merged_df = pd.concat([mongo_unavs[column_order], rte_unavs[column_order]], ignore_index=True)
367
+
368
+ # # --------------------------- HERE IS THE CHANGE TO GET ONLY ACTIVE OR ACTIVE AND INACTIVE --------------------------- #
369
+ # # start_date_str = usr_start_date.strftime("%Y-%m-%d")
370
+ # start_date_str = usr_start_date
371
+ # # end_date_str = usr_end_date.strftime("%Y-%m-%d")
372
+ # end_date_str = usr_end_date
373
+ # current_datetime = datetime.datetime.now()
374
+ # current_datetime_str = current_datetime.strftime("%Y-%m-%d")
375
+
376
+ # if photo_date == True:
377
+ # nuclear_unav = merged_df.copy()[(merged_df.copy()["production_type"] == "NUCLEAR") & (merged_df.copy()["updated_date"] <= past_date)]
378
+ # photo_date = True
379
+ # else: # need to add updated_date as a conditional to get the newest for that day
380
+ # nuclear_unav = merged_df.copy()[(merged_df.copy()["production_type"] == "NUCLEAR") & (merged_df.copy()["updated_date"] <= end_date_str)]
381
+
382
+ # # --------------------------- HERE IS THE CHANGE TO GET ONLY ACTIVE OR ACTIVE AND INACTIVE --------------------------- #
383
+
384
+ # # --------------------- SECOND DATA CLEANING ------------------------ #
385
+ # # This filter should take only the most recent id and discard the rest
386
+
387
+ # # Sort by updated date
388
+ # sorted_df = nuclear_unav.copy().sort_values(by='updated_date')
389
+
390
+ # sorted_df = sorted_df.copy().reset_index(drop=True)
391
+
392
+ # # Filter to get identifiers
393
+ # filtered_id_df = sorted_df.copy()
394
+ # filtered_id_df.drop_duplicates(subset='identifier', keep='last', inplace=True)
395
+ # filtered_id_df = filtered_id_df.copy().reset_index(drop=True)
396
+
397
+
398
+ # # This filter should take all the dates with unavs that include days with unavs in the range of the start and end date
399
+
400
+ # filtered_df = filtered_id_df.copy()[(filtered_id_df.copy()['start_date'] <= end_date_str) & (filtered_id_df.copy()['end_date'] >= start_date_str)]
401
+
402
+ # # Standardize datetime in dataframe
403
+ # filtered_df2 = filtered_df.copy() # This code will just standardize datetime stuff
404
+ # filtered_df2['creation_date'] = pd.to_datetime(filtered_df2['creation_date'], utc=True)
405
+ # filtered_df2['updated_date'] = pd.to_datetime(filtered_df2['updated_date'], utc=True)
406
+ # filtered_df2['start_date'] = pd.to_datetime(filtered_df2['start_date'], utc=True)
407
+ # filtered_df2['end_date'] = pd.to_datetime(filtered_df2['end_date'], utc=True)
408
+
409
+ # # Drop the duplicates
410
+ # filtered_df3 = filtered_df2.copy().drop_duplicates()
411
+
412
+ # # start_date_datetime = pd.to_datetime(start_date_str, utc=True) # Remove timezone info
413
+ # start_date_datetime = pd.Timestamp(start_date_str, tz='UTC')
414
+ # # end_date_datetime = pd.to_datetime(end_date_str, utc=True)
415
+ # end_date_datetime = pd.Timestamp(end_date_str, tz='UTC')
416
+
417
+ # # Turn df into dict for json processing
418
+ # filtered_unavs = filtered_df3.copy().to_dict(orient='records')
419
+
420
+ # # file_path = "/Users/diegomarroquin/HayaEnergy/Nucmonitor_MVP/NucPy_v0.2/testing/test_data4.txt"
421
+
422
+ # # # Open the file in write mode
423
+ # # with open(file_path, 'w') as file:
424
+ # # for item in filtered_unavs:
425
+ # # file.write("%s" % item)
426
+ # results = {}
427
+
428
+ # for unav in filtered_unavs:
429
+ # plant_name = unav['name']
430
+ # if plant_name in results:
431
+ # # If the key is already in the dictionary, append unavailability to the list
432
+ # results[plant_name].append({'status': unav['status'],
433
+ # 'id': unav['message_id'],
434
+ # 'creation_date': unav['creation_date'],
435
+ # 'updated_date': unav['updated_date'],
436
+ # 'start_date': unav['start_date'],
437
+ # 'end_date': unav['end_date'],
438
+ # 'available_capacity': unav['available_capacity']})
439
+ # else:
440
+ # # if the key of the plant is not there yet, create a new element of the dictionary
441
+
442
+ # # Get message_id instead of identifier, easier to identify stuff with it
443
+ # results[plant_name] = [{'status': unav['status'],
444
+ # 'id': unav['message_id'],
445
+ # 'creation_date': unav['creation_date'],
446
+ # 'updated_date': unav['updated_date'],
447
+ # 'start_date': unav['start_date'],
448
+ # 'end_date': unav['end_date'],
449
+ # 'available_capacity': unav['available_capacity']}]
450
 
451
+ # # Custom encoder to handle datetime objects
452
+ # class DateTimeEncoder(json.JSONEncoder):
453
+ # def default(self, o):
454
+ # if isinstance(o, datetime.datetime):
455
+ # return o.isoformat()
456
+ # return super().default(o)
457
+
458
+ # results_holder = results
459
+
460
+ # # Create new dict with each plant only having start_date less than user_end_date and an end_date greater than user_start_date
461
+ # # should just be doing the same as above in the df for filtering only dates that inclued the start and end date
462
+ # start_date = start_date_datetime.date()
463
+ # end_date = end_date_datetime.date()
464
+ # results_filtered = results_holder
465
+ # for key, value in results_filtered.items():
466
+ # filtered_values = []
467
+ # for item in value:
468
+ # item_start_date = item['start_date'].date()
469
+ # item_end_date = item['end_date'].date()
470
+ # identifier = item['id']
471
+ # if item_start_date < end_date and item_end_date > start_date and identifier not in filtered_values:
472
+ # filtered_values.append(item)
473
+ # results_filtered[key] = filtered_values
474
+
475
+
476
+ # sorted_results = results_filtered
477
+ # # --------------------- SECOND DATA CLEANING ------------------------ #
478
+
479
+ # # --------------------------- HERE IS THE FINAL PROCESS --------------------------- #
480
+
481
+ # for key, value in sorted_results.items():
482
+ # sorted_results[key] = sorted(value, key=lambda x: x['updated_date'])
483
+
484
+ # results_sorted = sorted_results
485
 
486
+ # dates_of_interest = [start_date] # We are creating a list of dates ranging from user specified start and end dates
487
+ # date_plus_one = start_date
488
 
489
+ # while date_plus_one < end_date:
490
+ # date_plus_one = date_plus_one + datetime.timedelta(days=1)
491
+ # dates_of_interest.append(date_plus_one)
492
 
493
+ # # This is to standardize the datetimes. Without this, the datetime calculations for each power plant will not work
494
+ # results_plants = {plant_name: {date: {"available_capacity": power, "updated_date": pd.to_datetime("1970-01-01", utc=True)} for date in dates_of_interest}
495
+ # for plant_name, power in plants_metadata.items()}
496
 
497
 
498
+ # for plant, unavailabilities in results_sorted.items():
499
 
500
+ # original_power = plants_metadata[plant]
501
+ # # Get all the unavailabilities scheduled for the plant.
502
+ # results_current_plant = results_plants[plant]
503
 
504
+ # for unavailability in unavailabilities:
505
+ # # For each unavailability, the resulting power, start and end datetime are collected. Need to collect updated_date
506
+ # power_unavailability = unavailability["available_capacity"]
507
+ # updated_date_unav = unavailability["updated_date"]
508
+ # # The date comes as a string
509
+ # start_datetime_unav = unavailability["start_date"]
510
+ # end_datetime_unav = unavailability["end_date"]
511
+ # start_date_unav = start_datetime_unav.date() # Extract date part
512
+ # end_date_unav = end_datetime_unav.date() # Extract date part
513
 
514
+ # # For the current unavailability, we want to find which days it affects
515
+ # for day in dates_of_interest:
516
+
517
+ # start_hour = start_datetime_unav.hour
518
+ # start_minute = start_datetime_unav.minute
519
+ # end_hour = end_datetime_unav.hour
520
+ # end_minute = end_datetime_unav.minute
521
+
522
+ # if start_date_unav <= day <= end_date_unav:
523
+ # # Check if the day is already updated with a later update_date
524
+ # if day in results_current_plant and updated_date_unav <= results_current_plant[day]["updated_date"]:
525
+ # continue # Skip to the next loop if there is already information for a later update_date
526
+
527
+ # # Calculate the % of the day that the plant is under maintenance
528
+ # if start_date_unav == day and day == end_date_unav:
529
+ # # The unavailability starts and ends on the same day
530
+ # percentage_of_day = (end_hour * 60 + end_minute - start_hour * 60 - start_minute) / (24 * 60)
531
+ # elif start_date_unav == day:
532
+ # # The unavailability starts on the current day but ends on a later day
533
+ # percentage_of_day = (24 * 60 - (start_hour * 60 + start_minute)) / (24 * 60)
534
+ # elif day == end_date_unav:
535
+ # # The unavailability starts on a previous day and ends on the current day
536
+ # percentage_of_day = (end_hour * 60 + end_minute) / (24 * 60)
537
+ # else:
538
+ # # The unavailability covers the entire day
539
+ # percentage_of_day = 1
540
+
541
+ # # The average power of the day is calculated
542
+ # power_of_day = percentage_of_day * power_unavailability + (1 - percentage_of_day) * original_power
543
+
544
+ # # Update the available_capacity for the day only if it's not already updated with a later update_date
545
+ # if day not in results_current_plant or updated_date_unav > results_current_plant[day]["updated_date"]:
546
+ # results_current_plant[day] = {"available_capacity": power_of_day, "updated_date": updated_date_unav}
547
+
548
+
549
+ # output_results = {}
550
+ # for plant, plant_data in results_plants.items():
551
+ # available_capacity_per_day = {str(date): data["available_capacity"] for date, data in plant_data.items()}
552
+ # output_results[plant] = available_capacity_per_day
553
+
554
+ # # print(output_results)
555
+ # add_total(output_results)
556
+ # # print("Done")
557
+ # # print(results_plants)
558
+ # # Convert datetime key to string to store in mongodb
559
+ # output_results = {plant: {str(date): power for date, power in plant_data.items()} for plant, plant_data in output_results.items()}
560
+ # # print(output_results)
561
+ # # -------------------------------------------------
562
+ # if photo_date == False:
563
+ # # Store the results_plants in MongoDB
564
+ # database_name = "data" # Specify your database name
565
+ # collection_name = "filtered" # Specify your collection name
566
+ # # mongo_store_data(output_results, database_name, collection_name)
567
+ # # mongo_replace_data(results_plants_total, database_name, "filtered_excel")
568
+ # # print("Data stored in database")
569
+ # # mongo_append_data(results_plants, database_name, collection_name)
570
 
571
+ # # json_data = json.dumps(convert_to_json(output_results))
572
+ # json_data = json.dumps(output_results)
573
+ # # print(json_data)
574
+ # return json_data
575
+ # else:
576
+ # database_name = "data" # Specify your database name
577
+ # collection_name = "photo_date" # Specify your collection name
578
+ # # mongo_store_data(output_results, database_name, collection_name)
579
+
580
+ # # json_data = json.dumps(convert_to_json(output_results))
581
+ # json_data = json.dumps(output_results)
582
+ # # print(json_data)
583
+ # return json_data
584
+ # # -------------------------------------------------
585
+ # return
586
+
587
+
588
+ # # Namespaces
589
+
590
+ # # Get raw data stuff
591
+
592
+ # raw_ns = Namespace('raw', description='Raw Data', path='/nucpy/v1')
593
+ # api.add_namespace(raw_ns)
594
+
595
+ # @raw_ns.route('/raw', methods=["GET"])
596
+ # @raw_ns.doc(params= {"start_date": "Start date", "end_date": "end date", "photo_date": "True False", "past_date": "Cutoff date"})
597
+ # class Raw(Resource):
598
+ # # @auth.login_required
599
+ # def get(self):
600
+ # # raw_data = merge_gridfs_files_to_json()
601
+ # print("Applying request")
602
+ # mongo_start_date = request.args.get("start_date")
603
+ # mongo_end_date = request.args.get("end_date")
604
+ # mongo_photo_date = request.args.get("photo_date")
605
+ # mongo_past_date = request.args.get("past_date")
606
+ # print("Getting raw_data")
607
+ # raw_data = mongo_unavs_call(mongo_start_date, mongo_end_date, mongo_past_date, mongo_photo_date)
608
+ # print("Returning raw_data")
609
+ # print(raw_data)
610
+ # return raw_data
611
+
612
+ # # Get RTE data
613
+
614
+ # rte_ns = Namespace('rte', description='RTE Data', path='/nucpy/v1')
615
+ # api.add_namespace(rte_ns)
616
+
617
+ # @rte_ns.route('/rte', methods=["GET"])
618
+ # # @rte_ns.doc(params= {"start_date": "Start date", "end_date": "end date"})
619
+ # class RTEDATA(Resource):
620
+ # # @auth.login_required
621
+ # def get(self):
622
+ # rte_start_date = request.args.get("start_date")
623
+ # rte_end_date = request.args.get("end_date")
624
+ # print(rte_start_date)
625
+ # print(rte_end_date)
626
+ # # Process the user input and retrieve data
627
+ # data = get_unavailabilities(rte_start_date, rte_end_date)
628
+
629
+ # return data
630
+
631
+ # # Get processed data
632
+
633
+ # nucmonitor_ns = Namespace('nucmonitor', description='Nucmonitor', path='/nucpy/v1')
634
+ # api.add_namespace(nucmonitor_ns)
635
+
636
+ # @nucmonitor_ns.route('/nucmonitor', methods=['GET'])
637
+ # class Nucmonitor(Resource):
638
+ # # @auth.login_required
639
+ # def get(self):
640
+ # # Retrieve input parameters from request.args
641
+ # start_date = request.args.get("start_date")
642
+ # end_date = request.args.get("end_date")
643
+ # photo_date = request.args.get("photo_date")
644
+ # past_date = request.args.get("past_date")
645
+
646
+ # # Call the /rte endpoint to get RTE data
647
+ # rte_data = self.get_rte_data(start_date, end_date)
648
+ # print("Got RTE data")
649
+ # print("Getting Mongo data")
650
+ # mongo_data = self.get_mongo_data(start_date, end_date, photo_date, past_date)
651
+ # print("Got Mongo data")
652
+ # print(mongo_data)
653
+ # # Process data using nuc_monitor
654
+ # nucmonitor_response = nuc_monitor(rte_data, mongo_data, start_date, end_date, photo_date, past_date)
655
+ # # print(nucmonitor_response)
656
+ # return (nucmonitor_response)
657
+
658
+ # def get_rte_data(self, start_date, end_date):
659
+ # rte_url = "http://0.0.0.0:7860/nucpy/v1/rte" # RTE endpoint URL
660
+ # rte_params = {"start_date": start_date, "end_date": end_date}
661
+ # rte_response = requests.get(rte_url, params=rte_params)
662
+ # # rte_data = rte_response.json()
663
+ # return rte_response
664
+
665
+ # def get_mongo_data(self, start_date, end_date, photo_date, past_date):
666
+ # print("Getting url")
667
+ # mongo_url = "http://0.0.0.0:7860/nucpy/v1/raw" # Mongo endpoint URL
668
+ # print("Getting params")
669
+ # mongo_params = {"start_date": start_date, "end_date": end_date, "photo_date": photo_date, "past_date": past_date}
670
+ # print("Getting request")
671
+ # mongo_response = requests.get(mongo_url, params=mongo_params)
672
+ # # mongo_data = mongo_response.json()
673
+ # print("Returning response")
674
+ # return mongo_response
675
 
 
676
 
 
 
677
 
678
+ # if __name__ == '__main__':
679
+ # app.run(host='0.0.0.0', port=7860)
680
+ from flask import Flask
681
+ from flask_restx import Api, Resource
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
682
 
683
+ app = Flask(__name__)
684
+ api = Api(app)
685
 
686
+ @api.route('/hello')
687
+ class HelloWorld(Resource):
688
+ def get(self):
689
+ return {'message': 'Hello, World!'}
690
 
691
  if __name__ == '__main__':
692
  app.run(host='0.0.0.0', port=7860)