Update for v0.7 RTE
Browse files
app.py
CHANGED
|
@@ -157,49 +157,97 @@ def nuc_monitor(usr_start_date, usr_end_date, past_date, mongo_db_data):
|
|
| 157 |
# print(mongo_db_data)
|
| 158 |
mongo_df = pd.DataFrame(mongo_db_data)
|
| 159 |
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
#
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
# Convert the date columns to datetime objects
|
| 205 |
for col in ["publication_date", "start_date", "end_date"]:
|
|
@@ -210,7 +258,7 @@ def nuc_monitor(usr_start_date, usr_end_date, past_date, mongo_db_data):
|
|
| 210 |
# mongo_df_2[col] = mongo_df_2[col].dt.tz_convert("Europe/Paris")
|
| 211 |
|
| 212 |
# mongo_df_2 = mongo_df_2.drop_duplicates(subset='identifier', keep='first')
|
| 213 |
-
mongo_df_2['version'] = mongo_df_2['version'].astype(
|
| 214 |
# Sort by identifier and version to ensure the latest version is at the top
|
| 215 |
# Method 1: Use groupby + idxmax to pick the row with the largest version per identifier
|
| 216 |
idx = mongo_df_2.groupby("identifier")["version"].idxmax()
|
|
|
|
| 157 |
# print(mongo_db_data)
|
| 158 |
mongo_df = pd.DataFrame(mongo_db_data)
|
| 159 |
|
| 160 |
+
# the five columns you care about
|
| 161 |
+
_optional = {
|
| 162 |
+
"updated_date",
|
| 163 |
+
"type",
|
| 164 |
+
"production_type",
|
| 165 |
+
"unit",
|
| 166 |
+
"status",
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
# see which of those actually exist
|
| 170 |
+
present = set(mongo_df.columns) & _optional
|
| 171 |
+
|
| 172 |
+
if _optional.issubset(mongo_df.columns):
|
| 173 |
+
# 0. first, pick the columns you know exist, plus the always‑present ones
|
| 174 |
+
cols = [
|
| 175 |
+
"identifier", "version", "message_id",
|
| 176 |
+
"values", "publication_date", "unavailability_type", "fuel_type",
|
| 177 |
+
"affected_asset_or_unit_name", "affected_asset_or_unit_installed_capacity",
|
| 178 |
+
"event_status"
|
| 179 |
+
] + list(_optional)
|
| 180 |
+
mongo_df = mongo_df[cols]
|
| 181 |
+
|
| 182 |
+
# 1. normalize “unit”
|
| 183 |
+
unit_expanded = pd.json_normalize(mongo_df["unit"])
|
| 184 |
+
mongo_df_2 = pd.concat([mongo_df.drop(columns=["unit"]), unit_expanded], axis=1)
|
| 185 |
+
|
| 186 |
+
# 2. normalize the first element of “values”
|
| 187 |
+
mongo_df_2["values_first"] = (
|
| 188 |
+
mongo_df_2["values"]
|
| 189 |
+
.apply(lambda lst: lst[0] if isinstance(lst, list) and lst else {})
|
| 190 |
+
)
|
| 191 |
+
values_expanded = pd.json_normalize(mongo_df_2["values_first"])
|
| 192 |
+
mongo_df_2 = pd.concat(
|
| 193 |
+
[mongo_df_2.drop(columns=["values", "values_first"]), values_expanded],
|
| 194 |
+
axis=1
|
| 195 |
+
)
|
| 196 |
|
| 197 |
+
# 3. coalesce and drop old cols
|
| 198 |
+
mongo_df_2["fuel_type"] = mongo_df_2["fuel_type"].combine_first(mongo_df_2["production_type"])
|
| 199 |
+
mongo_df_2["publication_date"] = mongo_df_2["publication_date"].combine_first(mongo_df_2["updated_date"])
|
| 200 |
+
mongo_df_2["event_status"] = mongo_df_2["event_status"].combine_first(mongo_df_2["status"])
|
| 201 |
+
mongo_df_2["affected_asset_or_unit_installed_capacity"] = (
|
| 202 |
+
mongo_df_2["affected_asset_or_unit_installed_capacity"]
|
| 203 |
+
.combine_first(mongo_df_2["installed_capacity"])
|
| 204 |
+
)
|
| 205 |
+
mongo_df_2["affected_asset_or_unit_name"] = (
|
| 206 |
+
mongo_df_2["affected_asset_or_unit_name"]
|
| 207 |
+
.combine_first(mongo_df_2["name"])
|
| 208 |
+
)
|
| 209 |
+
mongo_df_2["unavailability_type"] = (
|
| 210 |
+
mongo_df_2["unavailability_type"]
|
| 211 |
+
.combine_first(mongo_df_2["type"].iloc[:, 0])
|
| 212 |
+
)
|
| 213 |
|
| 214 |
+
drop_cols = [
|
| 215 |
+
"production_type",
|
| 216 |
+
"updated_date",
|
| 217 |
+
"status",
|
| 218 |
+
"installed_capacity",
|
| 219 |
+
"name",
|
| 220 |
+
"type",
|
| 221 |
+
"eic_code",
|
| 222 |
+
]
|
| 223 |
+
# only drop those that are actually there
|
| 224 |
+
drop_cols = [c for c in drop_cols if c in mongo_df_2.columns]
|
| 225 |
+
mongo_df_2 = mongo_df_2.drop(columns=drop_cols)
|
| 226 |
+
|
| 227 |
+
# now mongo_df_2 is your final
|
| 228 |
+
else:
|
| 229 |
+
# at least one of the required columns is missing:
|
| 230 |
+
# present contains the ones you did find
|
| 231 |
+
missing = _optional - present
|
| 232 |
+
print(f"Skipping normalize process because these columns are missing: {missing}")
|
| 233 |
+
mongo_df_2 = mongo_df.copy() # or however you want to proceed
|
| 234 |
+
|
| 235 |
+
mongo_df_2["values_first"] = mongo_df_2["values"].apply(
|
| 236 |
+
lambda lst: lst[0] if isinstance(lst, list) and len(lst) > 0 else {}
|
| 237 |
+
)
|
| 238 |
|
| 239 |
+
# 2. Normalize that dict into separate columns
|
| 240 |
+
values_expanded = pd.json_normalize(mongo_df_2["values_first"])
|
| 241 |
+
# e.g. this produces columns like “start_date”, “end_date”, etc.
|
| 242 |
+
|
| 243 |
+
# 3. Concatenate back and drop the originals
|
| 244 |
+
mongo_df_2 = pd.concat(
|
| 245 |
+
[
|
| 246 |
+
mongo_df_2.drop(columns=["values", "values_first", "start_date", "end_date"]),
|
| 247 |
+
values_expanded
|
| 248 |
+
],
|
| 249 |
+
axis=1
|
| 250 |
+
)
|
| 251 |
|
| 252 |
# Convert the date columns to datetime objects
|
| 253 |
for col in ["publication_date", "start_date", "end_date"]:
|
|
|
|
| 258 |
# mongo_df_2[col] = mongo_df_2[col].dt.tz_convert("Europe/Paris")
|
| 259 |
|
| 260 |
# mongo_df_2 = mongo_df_2.drop_duplicates(subset='identifier', keep='first')
|
| 261 |
+
mongo_df_2['version'] = mongo_df_2['version'].astype(float)
|
| 262 |
# Sort by identifier and version to ensure the latest version is at the top
|
| 263 |
# Method 1: Use groupby + idxmax to pick the row with the largest version per identifier
|
| 264 |
idx = mongo_df_2.groupby("identifier")["version"].idxmax()
|