mabuseif commited on
Commit
e0ab830
·
verified ·
1 Parent(s): 1499f62

Update app/climate_data.py

Browse files
Files changed (1) hide show
  1. app/climate_data.py +242 -378
app/climate_data.py CHANGED
@@ -77,234 +77,206 @@ class ClimateDataManager:
77
  """Initialize climate data manager."""
78
  pass
79
 
80
- def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, year: str = None) -> Dict[str, Any]:
81
- """
82
- Parse an EPW file and extract climate data.
83
-
84
- Args:
85
- uploaded_file: The uploaded EPW file object or file content as string
86
- location_num: Location number for climate projection (optional)
87
- rcp: RCP scenario for climate projection (optional)
88
- year: Year for climate projection (optional)
89
-
90
- Returns:
91
- Dict containing parsed climate data
92
- """
93
- try:
94
- # Read the EPW file
95
- if isinstance(uploaded_file, str):
96
- content = uploaded_file
97
- epw_filename = f"{location_num}_{rcp}_{year}.epw"
98
- else:
99
- content = uploaded_file.getvalue().decode('utf-8')
100
- epw_filename = uploaded_file.name
101
-
102
- lines = content.split('\n')
103
-
104
- # Extract header information (first 8 lines)
105
- header_lines = lines[:8]
106
-
107
- # Parse location data from line 1
108
- location_data = header_lines[0].split(',')
109
-
110
- # Extract location information
111
- location = {
112
- "city": location_data[1].strip(),
113
- "state_province": location_data[2].strip(),
114
- "country": location_data[3].strip(),
115
- "source": location_data[4].strip(),
116
- "wmo": location_data[5].strip(),
117
- "latitude": float(location_data[6]),
118
- "longitude": float(location_data[7]),
119
- "timezone": float(location_data[8]),
120
- "elevation": float(location_data[9])
121
- }
122
-
123
- # Override city and state from LOCATION_MAPPING if provided
124
- if location_num in LOCATION_MAPPING:
125
- location["city"] = LOCATION_MAPPING[location_num]["city"]
126
- location["state_province"] = LOCATION_MAPPING[location_num]["state"]
127
 
128
- # Parse TYPICAL/EXTREME PERIODS
129
- typical_extreme_periods = {}
130
- date_pattern = r'^\d{1,2}\s*/\s*\d{1,2}$'
131
- for line in lines:
132
- if line.startswith("TYPICAL/EXTREME PERIODS"):
133
- parts = line.strip().split(',')
134
- try:
135
- num_periods = int(parts[1])
136
- except ValueError:
137
- logger.warning("Invalid number of periods in TYPICAL/EXTREME PERIODS, skipping parsing.")
138
- break
139
- for i in range(num_periods):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  try:
141
- if len(parts) < 2 + i*4 + 4:
142
- logger.warning(f"Insufficient fields for period {i+1}, skipping.")
143
- continue
144
- period_name = parts[2 + i*4]
145
- period_type = parts[3 + i*4]
146
- start_date = parts[4 + i*4].strip()
147
- end_date = parts[5 + i*4].strip()
148
- if period_name in [
149
- "Summer - Week Nearest Max Temperature For Period",
150
- "Summer - Week Nearest Average Temperature For Period",
151
- "Winter - Week Nearest Min Temperature For Period",
152
- "Winter - Week Nearest Average Temperature For Period"
153
- ]:
154
- season = 'summer' if 'Summer' in period_name else 'winter'
155
- period_type = 'extreme' if 'Max' in period_name or 'Min' in period_name else 'typical'
156
- key = f"{season}_{period_type}"
157
- start_date_clean = re.sub(r'\s+', '', start_date)
158
- end_date_clean = re.sub(r'\s+', '', end_date)
159
- if not re.match(date_pattern, start_date) or not re.match(date_pattern, end_date):
160
- logger.warning(f"Invalid date format for period {period_name}: {start_date} to {end_date}, skipping.")
161
  continue
162
- start_month, start_day = map(int, start_date_clean.split('/'))
163
- end_month, end_day = map(int, end_date_clean.split('/'))
164
- typical_extreme_periods[key] = {
165
- "start": {"month": start_month, "day": start_day},
166
- "end": {"month": end_month, "day": end_day}
167
- }
168
- except (IndexError, ValueError) as e:
169
- logger.warning(f"Error parsing period {i+1}: {str(e)}, skipping.")
170
- continue
171
- break
172
-
173
- # Parse GROUND TEMPERATURES
174
- ground_temperatures = {}
175
- for line in lines:
176
- if line.startswith("GROUND TEMPERATURES"):
177
- parts = line.strip().split(',')
178
- try:
179
- num_depths = int(parts[1])
180
- except ValueError:
181
- logger.warning("Invalid number of depths in GROUND TEMPERATURES, skipping parsing.")
 
 
 
 
 
 
 
182
  break
183
- for i in range(num_depths):
 
 
 
 
 
184
  try:
185
- if len(parts) < 2 + i*16 + 16:
186
- logger.warning(f"Insufficient fields for ground temperature depth {i+1}, skipping.")
187
- continue
188
- depth = parts[2 + i*16]
189
- temps = [float(t) for t in parts[6 + i*16:18 + i*16] if t.strip()]
190
- if len(temps) != 12:
191
- logger.warning(f"Invalid number of temperatures for depth {depth}m, expected 12, got {len(temps)}, skipping.")
 
 
 
 
 
 
 
 
 
 
192
  continue
193
- ground_temperatures[depth] = temps
194
- except (ValueError, IndexError) as e:
195
- logger.warning(f"Error parsing ground temperatures for depth {i+1}: {str(e)}, skipping.")
196
- continue
197
- break
198
-
199
- # Parse data rows (starting from line 9)
200
- data_lines = lines[8:]
201
-
202
- # Create a DataFrame from the data rows
203
- data = []
204
- for line in data_lines:
205
- if line.strip(): # Skip empty lines
206
- data.append(line.split(','))
207
-
208
- # Define core columns (common to both 32 and 35 column formats)
209
- core_columns = [
210
- "year", "month", "day", "hour", "minute", "data_source", "dry_bulb_temp",
211
- "dew_point_temp", "relative_humidity", "atmospheric_pressure", "extraterrestrial_radiation",
212
- "extraterrestrial_radiation_normal", "horizontal_infrared_radiation", "global_horizontal_radiation",
213
- "direct_normal_radiation", "diffuse_horizontal_radiation", "global_horizontal_illuminance",
214
- "direct_normal_illuminance", "diffuse_horizontal_illuminance", "zenith_luminance",
215
- "wind_direction", "wind_speed", "total_sky_cover", "opaque_sky_cover", "visibility",
216
- "ceiling_height", "present_weather_observation", "present_weather_codes",
217
- "precipitable_water", "aerosol_optical_depth", "snow_depth", "days_since_last_snowfall"
218
- ]
219
-
220
- # Additional columns for 35-column format
221
- additional_columns = ["albedo", "liquid_precipitation_depth", "liquid_precipitation_quantity"]
222
-
223
- # Determine number of columns in data
224
- num_columns = len(data[0]) if data else 0
225
- if num_columns not in [32, 35]:
226
- raise ValueError(f"Invalid number of columns in EPW file: {num_columns}. Expected 32 or 35 columns.")
227
-
228
- # Select appropriate columns based on file format
229
- columns = core_columns if num_columns == 32 else core_columns + additional_columns
230
-
231
- # Create DataFrame
232
- df = pd.DataFrame(data, columns=columns[:num_columns])
233
-
234
- # Ensure critical columns exist and are properly typed
235
- for col in ["year", "month", "day", "hour"]:
236
- if col not in df.columns:
237
- logger.warning(f"Missing '{col}' column in EPW data; initializing with default values")
238
- if col == "year":
239
- df[col] = 2025 # Default year
240
- elif col == "month":
241
- df[col] = pd.Series(range(1, len(df) + 1)).mod(12).replace(0, 12).astype('Int64')
242
- elif col == "day":
243
- df[col] = pd.Series(range(1, len(df) + 1)).mod(31).replace(0, 31).astype('Int64')
244
- elif col == "hour":
245
- df[col] = pd.Series(range(len(df))).mod(24).add(1).astype('Int64')
246
-
247
- # Convert numeric columns
248
- numeric_columns = [
249
- "year", "month", "day", "hour", "dry_bulb_temp", "dew_point_temp", "relative_humidity",
250
- "atmospheric_pressure", "global_horizontal_radiation", "direct_normal_radiation",
251
- "diffuse_horizontal_radiation", "wind_direction", "wind_speed", "total_sky_cover"
252
- ]
253
-
254
- for col in numeric_columns:
255
- if col in df.columns:
256
- df[col] = pd.to_numeric(df[col], errors='coerce')
257
- if col in ["year", "month", "day", "hour"]:
258
- df[col] = df[col].astype('Int64')
259
-
260
- # Validate and correct date columns
261
- if not df["month"].between(1, 12).all() or df["month"].isna().any():
262
- logger.warning("Invalid or missing 'month' values detected; attempting to infer from index")
263
- df["month"] = pd.Series(range(1, len(df) + 1)).mod(12).replace(0, 12).astype('Int64')
264
- if not df["day"].between(1, 31).all() or df["day"].isna().any():
265
- logger.warning("Invalid or missing 'day' values detected; assigning sequential days")
266
- df["day"] = pd.Series(range(1, len(df) + 1)).mod(31).replace(0, 31).astype('Int64')
267
- if not df["hour"].between(1, 24).all() or df["hour"].isna().any():
268
- logger.warning("Invalid or missing 'hour' values detected; assigning sequential hours")
269
- df["hour"] = pd.Series(range(len(df))).mod(24).add(1).astype('Int64')
270
-
271
- # Calculate diffuse fraction
272
- df['diffuse_fraction'] = df.apply(
273
- lambda row: row['diffuse_horizontal_radiation'] / row['global_horizontal_radiation']
274
- if pd.notna(row['global_horizontal_radiation']) and row['global_horizontal_radiation'] > 0 else 0.0,
275
- axis=1
276
- )
277
-
278
- # Calculate design conditions
279
- design_conditions = self._calculate_design_conditions(df)
280
-
281
- # Process hourly data
282
- hourly_data = self._process_hourly_data(df)
283
-
284
- # Determine climate zone based on HDD and CDD
285
- climate_zone = self._determine_climate_zone(
286
- design_conditions["heating_degree_days"],
287
- design_conditions["cooling_degree_days"]
288
- )
289
-
290
- # Create climate data dictionary
291
- climate_data = {
292
- "id": f"{location['city']}_{location['country']}_{rcp}_{year}".replace(" ", "_") if rcp and year else f"{location['city']}_{location['country']}".replace(" ", "_"),
293
- "location": location,
294
- "design_conditions": design_conditions,
295
- "climate_zone": climate_zone,
296
- "hourly_data": hourly_data,
297
- "epw_filename": epw_filename,
298
- "typical_extreme_periods": typical_extreme_periods,
299
- "ground_temperatures": ground_temperatures
300
- }
301
-
302
- logger.info(f"EPW file processed successfully: {epw_filename}")
303
- return climate_data
304
-
305
- except Exception as e:
306
- logger.error(f"Error processing EPW file: {str(e)}")
307
- raise ValueError(f"Error processing EPW file: {str(e)}")
308
 
309
  def _calculate_design_conditions(self, df: pd.DataFrame) -> Dict[str, Any]:
310
  """
@@ -461,7 +433,7 @@ def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, yea
461
  except Exception as e:
462
  logger.warning(f"Error calculating Sky Clearness Index: {str(e)}, returning None")
463
  return None
464
-
465
  def _process_hourly_data(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
466
  """
467
  Process hourly data from EPW DataFrame, including dew point, Sky Clearness Index, diffuse fraction, and total sky cover.
@@ -475,30 +447,6 @@ def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, yea
475
  hourly_data = []
476
 
477
  try:
478
- # Log DataFrame structure for debugging
479
- logger.debug(f"Input DataFrame columns: {list(df.columns)}")
480
- logger.debug(f"Input DataFrame sample: {df.head().to_dict()}")
481
-
482
- # Validate presence of critical columns
483
- required_columns = ["month", "day", "hour", "dry_bulb_temp"]
484
- missing_columns = [col for col in required_columns if col not in df.columns]
485
- if missing_columns:
486
- logger.warning(f"Missing required columns in DataFrame: {missing_columns}")
487
- # Initialize missing columns with defaults
488
- for col in missing_columns:
489
- if col == "month":
490
- df["month"] = pd.Series(range(1, len(df) + 1)).mod(12).replace(0, 12).astype('Int64')
491
- logger.info("Initialized 'month' column with sequential values")
492
- elif col == "day":
493
- df["day"] = pd.Series(range(1, len(df) + 1)).mod(31).replace(0, 31).astype('Int64')
494
- logger.info("Initialized 'day' column with sequential values")
495
- elif col == "hour":
496
- df["hour"] = pd.Series(range(len(df))).mod(24).add(1).astype('Int64')
497
- logger.info("Initialized 'hour' column with sequential values")
498
- elif col == "dry_bulb_temp":
499
- df["dry_bulb_temp"] = 20.0
500
- logger.info("Initialized 'dry_bulb_temp' column with default value 20.0")
501
-
502
  # Ensure numeric columns
503
  numeric_columns = [
504
  "dry_bulb_temp", "dew_point_temp", "relative_humidity", "atmospheric_pressure",
@@ -511,22 +459,14 @@ def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, yea
511
  if col in df.columns:
512
  df[col] = pd.to_numeric(df[col], errors='coerce')
513
 
514
- # Ensure date columns are integers
515
- for col in ["month", "day", "hour"]:
516
- if col in df.columns:
517
- df[col] = pd.to_numeric(df[col], errors='coerce').astype('Int64')
518
- # Replace any remaining NaN values
519
- if col == "month" and df[col].isna().any():
520
- df[col] = pd.Series(range(1, len(df) + 1)).mod(12).replace(0, 12).astype('Int64')
521
- elif col == "day" and df[col].isna().any():
522
- df[col] = pd.Series(range(1, len(df) + 1)).mod(31).replace(0, 31).astype('Int64')
523
- elif col == "hour" and df[col].isna().any():
524
- df[col] = pd.Series(range(len(df))).mod(24).add(1).astype('Int64')
525
 
526
  # Process each row
527
  for _, row in df.iterrows():
528
  if pd.isna(row["month"]) or pd.isna(row["day"]) or pd.isna(row["hour"]) or pd.isna(row["dry_bulb_temp"]):
529
- logger.warning(f"Skipping row with missing critical data: month={row.get('month')}, day={row.get('day')}, hour={row.get('hour')}, dry_bulb_temp={row.get('dry_bulb_temp')}")
530
  continue # Skip rows with missing critical data
531
 
532
  # Calculate dew point temperature
@@ -570,13 +510,12 @@ def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, yea
570
  if len(hourly_data) < 8700: # Allow for some missing data
571
  logger.warning(f"Hourly data has {len(hourly_data)} records instead of 8760. Some records may be missing.")
572
 
573
- logger.debug(f"Processed hourly_data sample: {hourly_data[:5]}")
574
  return hourly_data
575
 
576
  except Exception as e:
577
  logger.error(f"Error processing hourly data: {str(e)}")
578
  return []
579
-
580
  def _determine_climate_zone(self, hdd: float, cdd: float) -> str:
581
  """
582
  Determine ASHRAE climate zone based on heating and cooling degree days.
@@ -825,6 +764,7 @@ def display_climate_summary(climate_data: Dict[str, Any]):
825
 
826
  # Overview Tab (unchanged)
827
  with tab1:
 
828
  design = climate_data["design_conditions"]
829
  location = climate_data["location"]
830
 
@@ -969,7 +909,7 @@ def display_climate_summary(climate_data: Dict[str, Any]):
969
  else:
970
  st.warning("No hourly data available.")
971
 
972
- # Sun Path Tab (unchanged)
973
  with tab2:
974
  st.subheader("Sun Path Diagram")
975
  st.markdown("Visualize the sun's path for the selected location and date.")
@@ -1078,41 +1018,21 @@ def display_climate_summary(climate_data: Dict[str, Any]):
1078
  end_month = st.selectbox("End Month", list(range(1, 13)), index=11, key="wind_rose_end_month")
1079
 
1080
  try:
1081
- # Validate hourly_data
1082
- if not climate_data.get("hourly_data"):
1083
- st.warning("No hourly data available for wind rose visualization.")
1084
- logger.warning("hourly_data is empty or missing")
1085
- raise ValueError("hourly_data is empty or missing")
1086
-
1087
- # Create DataFrame with explicit column mapping
1088
- hourly_df = pd.DataFrame(climate_data["hourly_data"], columns=[
1089
- "month", "day", "hour", "dry_bulb", "dew_point", "relative_humidity",
1090
- "atmospheric_pressure", "global_horizontal_radiation", "direct_normal_radiation",
1091
- "diffuse_horizontal_radiation", "wind_speed", "wind_direction",
1092
- "sky_clearness_index", "total_sky_cover", "diffuse_fraction"
1093
- ])
1094
-
1095
- # Log DataFrame structure
1096
- logger.debug(f"Wind Rose hourly_df columns: {list(hourly_df.columns)}")
1097
- logger.debug(f"Wind Rose hourly_df sample: {hourly_df.head().to_dict()}")
1098
-
1099
- # Check for month column
1100
- if "month" not in hourly_df.columns:
1101
- logger.warning("Missing 'month' column in hourly_df; initializing with sequential values")
1102
- hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
1103
-
1104
- # Convert month to integer and handle NaN
1105
- hourly_df["month"] = pd.to_numeric(hourly_df["month"], errors='coerce').astype('Int64')
1106
- if hourly_df["month"].isna().any():
1107
- logger.warning("NaN values in 'month' column; replacing with sequential values")
1108
- hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
1109
-
1110
  # Create AnalysisPeriod for filtering
1111
  analysis_period = AnalysisPeriod(st_month=start_month, end_month=end_month)
1112
 
 
 
 
 
 
 
 
 
 
1113
  # Filter data
1114
  filtered_df = hourly_df[
1115
- (hourly_df["month"] >= start_month) & (hourly_df["month"] <= end_month)
1116
  ]
1117
 
1118
  if filtered_df.empty:
@@ -1167,38 +1087,18 @@ def display_climate_summary(climate_data: Dict[str, Any]):
1167
  show_comfort = st.checkbox("Show Comfort Polygon", value=True, key="psychro_comfort")
1168
 
1169
  try:
1170
- # Validate hourly_data
1171
- if not climate_data.get("hourly_data"):
1172
- st.warning("No hourly data available for psychrometric chart visualization.")
1173
- logger.warning("hourly_data is empty or missing")
1174
- raise ValueError("hourly_data is empty or missing")
1175
-
1176
- # Create DataFrame with explicit column mapping
1177
- hourly_df = pd.DataFrame(climate_data["hourly_data"], columns=[
1178
- "month", "day", "hour", "dry_bulb", "dew_point", "relative_humidity",
1179
- "atmospheric_pressure", "global_horizontal_radiation", "direct_normal_radiation",
1180
- "diffuse_horizontal_radiation", "wind_speed", "wind_direction",
1181
- "sky_clearness_index", "total_sky_cover", "diffuse_fraction"
1182
- ])
1183
-
1184
- # Log DataFrame structure
1185
- logger.debug(f"Psychrometric Chart hourly_df columns: {list(hourly_df.columns)}")
1186
- logger.debug(f"Psychrometric Chart hourly_df sample: {hourly_df.head().to_dict()}")
1187
-
1188
- # Check for month column
1189
  if "month" not in hourly_df.columns:
1190
- logger.warning("Missing 'month' column in hourly_df; initializing with sequential values")
1191
- hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
1192
-
1193
- # Convert month to integer and handle NaN
1194
- hourly_df["month"] = pd.to_numeric(hourly_df["month"], errors='coerce').astype('Int64')
1195
- if hourly_df["month"].isna().any():
1196
- logger.warning("NaN values in 'month' column; replacing with sequential values")
1197
- hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
1198
 
1199
  # Filter data
1200
  filtered_df = hourly_df[
1201
- (hourly_df["month"] >= start_month) & (hourly_df["month"] <= end_month)
1202
  ]
1203
 
1204
  if filtered_df.empty:
@@ -1251,6 +1151,9 @@ def display_climate_summary(climate_data: Dict[str, Any]):
1251
  for wb_temp_target in [10, 15, 20, 25]:
1252
  wb_temps = np.linspace(min(dry_bulb_temps), max(dry_bulb_temps), 100)
1253
  wb_humid_ratios = []
 
 
 
1254
  for t in wb_temps:
1255
  es = 610.78 * 10 ** ((7.5 * t) / (237.3 + t))
1256
  e = es
@@ -1294,38 +1197,18 @@ def display_climate_summary(climate_data: Dict[str, Any]):
1294
  end_month = st.selectbox("End Month", list(range(1, 13)), index=11, key="radiation_end_month")
1295
 
1296
  try:
1297
- # Validate hourly_data
1298
- if not climate_data.get("hourly_data"):
1299
- st.warning("No hourly data available for radiation diagram visualization.")
1300
- logger.warning("hourly_data is empty or missing")
1301
- raise ValueError("hourly_data is empty or missing")
1302
-
1303
- # Create DataFrame with explicit column mapping
1304
- hourly_df = pd.DataFrame(climate_data["hourly_data"], columns=[
1305
- "month", "day", "hour", "dry_bulb", "dew_point", "relative_humidity",
1306
- "atmospheric_pressure", "global_horizontal_radiation", "direct_normal_radiation",
1307
- "diffuse_horizontal_radiation", "wind_speed", "wind_direction",
1308
- "sky_clearness_index", "total_sky_cover", "diffuse_fraction"
1309
- ])
1310
-
1311
- # Log DataFrame structure
1312
- logger.debug(f"Radiation Diagram hourly_df columns: {list(hourly_df.columns)}")
1313
- logger.debug(f"Radiation Diagram hourly_df sample: {hourly_df.head().to_dict()}")
1314
-
1315
- # Check for month column
1316
  if "month" not in hourly_df.columns:
1317
- logger.warning("Missing 'month' column in hourly_df; initializing with sequential values")
1318
- hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
1319
-
1320
- # Convert month to integer and handle NaN
1321
- hourly_df["month"] = pd.to_numeric(hourly_df["month"], errors='coerce').astype('Int64')
1322
- if hourly_df["month"].isna().any():
1323
- logger.warning("NaN values in 'month' column; replacing with sequential values")
1324
- hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
1325
 
1326
  # Filter data
1327
  filtered_df = hourly_df[
1328
- (hourly_df["month"] >= start_month) & (hourly_df["month"] <= end_month)
1329
  ]
1330
 
1331
  if filtered_df.empty:
@@ -1366,6 +1249,7 @@ def display_climate_summary(climate_data: Dict[str, Any]):
1366
 
1367
  # Sunlight Hours Tab (unchanged)
1368
  with tab6:
 
1369
  st.subheader("Sunlight Hours and Shadow Study")
1370
  st.markdown("Visualize sunlight hours on a simple building surface.")
1371
 
@@ -1470,35 +1354,6 @@ def display_climate_summary(climate_data: Dict[str, Any]):
1470
  plot_type = st.selectbox("Plot Type", ["Line", "Scatter", "Histogram"], key="custom_plot_type")
1471
 
1472
  try:
1473
- # Validate hourly_data
1474
- if not climate_data.get("hourly_data"):
1475
- st.warning("No hourly data available for custom plot visualization.")
1476
- logger.warning("hourly_data is empty or missing")
1477
- raise ValueError("hourly_data is empty or missing")
1478
-
1479
- # Create DataFrame with explicit column mapping
1480
- hourly_df = pd.DataFrame(climate_data["hourly_data"], columns=[
1481
- "month", "day", "hour", "dry_bulb", "dew_point", "relative_humidity",
1482
- "atmospheric_pressure", "global_horizontal_radiation", "direct_normal_radiation",
1483
- "diffuse_horizontal_radiation", "wind_speed", "wind_direction",
1484
- "sky_clearness_index", "total_sky_cover", "diffuse_fraction"
1485
- ])
1486
-
1487
- # Log DataFrame structure
1488
- logger.debug(f"Custom Plot hourly_df columns: {list(hourly_df.columns)}")
1489
- logger.debug(f"Custom Plot hourly_df sample: {hourly_df.head().to_dict()}")
1490
-
1491
- # Check for month column
1492
- if "month" not in hourly_df.columns:
1493
- logger.warning("Missing 'month' column in hourly_df; initializing with sequential values")
1494
- hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
1495
-
1496
- # Convert month to integer and handle NaN
1497
- hourly_df["month"] = pd.to_numeric(hourly_df["month"], errors='coerce').astype('Int64')
1498
- if hourly_df["month"].isna().any():
1499
- logger.warning("NaN values in 'month' column; replacing with sequential values")
1500
- hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
1501
-
1502
  # Map variable to data key
1503
  variable_map = {
1504
  "Dry Bulb Temperature (°C)": "dry_bulb",
@@ -1516,9 +1371,18 @@ def display_climate_summary(climate_data: Dict[str, Any]):
1516
 
1517
  data_key = variable_map[variable]
1518
 
 
 
 
 
 
 
 
 
 
1519
  # Filter data
1520
  filtered_df = hourly_df[
1521
- (hourly_df["month"] >= start_month) & (hourly_df["month"] <= end_month)
1522
  ]
1523
 
1524
  if filtered_df.empty:
 
77
  """Initialize climate data manager."""
78
  pass
79
 
80
+ def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, year: str = None) -> Dict[str, Any]:
81
+ """
82
+ Parse an EPW file and extract climate data.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ Args:
85
+ uploaded_file: The uploaded EPW file object or file content as string
86
+ location_num: Location number for climate projection (optional)
87
+ rcp: RCP scenario for climate projection (optional)
88
+ year: Year for climate projection (optional)
89
+
90
+ Returns:
91
+ Dict containing parsed climate data
92
+ """
93
+ try:
94
+ # Read the EPW file
95
+ if isinstance(uploaded_file, str):
96
+ content = uploaded_file
97
+ epw_filename = f"{location_num}_{rcp}_{year}.epw"
98
+ else:
99
+ content = uploaded_file.getvalue().decode('utf-8')
100
+ epw_filename = uploaded_file.name
101
+
102
+ lines = content.split('\n')
103
+
104
+ # Extract header information (first 8 lines)
105
+ header_lines = lines[:8]
106
+
107
+ # Parse location data from line 1
108
+ location_data = header_lines[0].split(',')
109
+
110
+ # Extract location information
111
+ location = {
112
+ "city": location_data[1].strip(),
113
+ "state_province": location_data[2].strip(),
114
+ "country": location_data[3].strip(),
115
+ "source": location_data[4].strip(),
116
+ "wmo": location_data[5].strip(),
117
+ "latitude": float(location_data[6]),
118
+ "longitude": float(location_data[7]),
119
+ "timezone": float(location_data[8]),
120
+ "elevation": float(location_data[9])
121
+ }
122
+
123
+ # Override city and state from LOCATION_MAPPING if provided
124
+ if location_num in LOCATION_MAPPING:
125
+ location["city"] = LOCATION_MAPPING[location_num]["city"]
126
+ location["state_province"] = LOCATION_MAPPING[location_num]["state"]
127
+
128
+ # Parse TYPICAL/EXTREME PERIODS
129
+ typical_extreme_periods = {}
130
+ date_pattern = r'^\d{1,2}\s*/\s*\d{1,2}$'
131
+ for line in lines:
132
+ if line.startswith("TYPICAL/EXTREME PERIODS"):
133
+ parts = line.strip().split(',')
134
  try:
135
+ num_periods = int(parts[1])
136
+ except ValueError:
137
+ logger.warning("Invalid number of periods in TYPICAL/EXTREME PERIODS, skipping parsing.")
138
+ break
139
+ for i in range(num_periods):
140
+ try:
141
+ if len(parts) < 2 + i*4 + 4:
142
+ logger.warning(f"Insufficient fields for period {i+1}, skipping.")
 
 
 
 
 
 
 
 
 
 
 
 
143
  continue
144
+ period_name = parts[2 + i*4]
145
+ period_type = parts[3 + i*4]
146
+ start_date = parts[4 + i*4].strip()
147
+ end_date = parts[5 + i*4].strip()
148
+ if period_name in [
149
+ "Summer - Week Nearest Max Temperature For Period",
150
+ "Summer - Week Nearest Average Temperature For Period",
151
+ "Winter - Week Nearest Min Temperature For Period",
152
+ "Winter - Week Nearest Average Temperature For Period"
153
+ ]:
154
+ season = 'summer' if 'Summer' in period_name else 'winter'
155
+ period_type = 'extreme' if 'Max' in period_name or 'Min' in period_name else 'typical'
156
+ key = f"{season}_{period_type}"
157
+ start_date_clean = re.sub(r'\s+', '', start_date)
158
+ end_date_clean = re.sub(r'\s+', '', end_date)
159
+ if not re.match(date_pattern, start_date) or not re.match(date_pattern, end_date):
160
+ logger.warning(f"Invalid date format for period {period_name}: {start_date} to {end_date}, skipping.")
161
+ continue
162
+ start_month, start_day = map(int, start_date_clean.split('/'))
163
+ end_month, end_day = map(int, end_date_clean.split('/'))
164
+ typical_extreme_periods[key] = {
165
+ "start": {"month": start_month, "day": start_day},
166
+ "end": {"month": end_month, "day": end_day}
167
+ }
168
+ except (IndexError, ValueError) as e:
169
+ logger.warning(f"Error parsing period {i+1}: {str(e)}, skipping.")
170
+ continue
171
  break
172
+
173
+ # Parse GROUND TEMPERATURES
174
+ ground_temperatures = {}
175
+ for line in lines:
176
+ if line.startswith("GROUND TEMPERATURES"):
177
+ parts = line.strip().split(',')
178
  try:
179
+ num_depths = int(parts[1])
180
+ except ValueError:
181
+ logger.warning("Invalid number of depths in GROUND TEMPERATURES, skipping parsing.")
182
+ break
183
+ for i in range(num_depths):
184
+ try:
185
+ if len(parts) < 2 + i*16 + 16:
186
+ logger.warning(f"Insufficient fields for ground temperature depth {i+1}, skipping.")
187
+ continue
188
+ depth = parts[2 + i*16]
189
+ temps = [float(t) for t in parts[6 + i*16:18 + i*16] if t.strip()]
190
+ if len(temps) != 12:
191
+ logger.warning(f"Invalid number of temperatures for depth {depth}m, expected 12, got {len(temps)}, skipping.")
192
+ continue
193
+ ground_temperatures[depth] = temps
194
+ except (ValueError, IndexError) as e:
195
+ logger.warning(f"Error parsing ground temperatures for depth {i+1}: {str(e)}, skipping.")
196
  continue
197
+ break
198
+
199
+ # Parse data rows (starting from line 9)
200
+ data_lines = lines[8:]
201
+
202
+ # Create a DataFrame from the data rows
203
+ data = []
204
+ for line in data_lines:
205
+ if line.strip(): # Skip empty lines
206
+ data.append(line.split(','))
207
+
208
+ # Define core columns (common to both 32 and 35 column formats)
209
+ core_columns = [
210
+ "year", "month", "day", "hour", "minute", "data_source", "dry_bulb_temp",
211
+ "dew_point_temp", "relative_humidity", "atmospheric_pressure", "extraterrestrial_radiation",
212
+ "extraterrestrial_radiation_normal", "horizontal_infrared_radiation", "global_horizontal_radiation",
213
+ "direct_normal_radiation", "diffuse_horizontal_radiation", "global_horizontal_illuminance",
214
+ "direct_normal_illuminance", "diffuse_horizontal_illuminance", "zenith_luminance",
215
+ "wind_direction", "wind_speed", "total_sky_cover", "opaque_sky_cover", "visibility",
216
+ "ceiling_height", "present_weather_observation", "present_weather_codes",
217
+ "precipitable_water", "aerosol_optical_depth", "snow_depth", "days_since_last_snowfall"
218
+ ]
219
+
220
+ # Additional columns for 35-column format
221
+ additional_columns = ["albedo", "liquid_precipitation_depth", "liquid_precipitation_quantity"]
222
+
223
+ # Determine number of columns in data
224
+ num_columns = len(data[0]) if data else 0
225
+ if num_columns not in [32, 35]:
226
+ raise ValueError(f"Invalid number of columns in EPW file: {num_columns}. Expected 32 or 35 columns.")
227
+
228
+ # Select appropriate columns based on file format
229
+ columns = core_columns if num_columns == 32 else core_columns + additional_columns
230
+
231
+ # Create DataFrame
232
+ df = pd.DataFrame(data, columns=columns[:num_columns])
233
+
234
+ # Convert numeric columns
235
+ numeric_columns = [
236
+ "dry_bulb_temp", "dew_point_temp", "relative_humidity", "atmospheric_pressure",
237
+ "global_horizontal_radiation", "direct_normal_radiation", "diffuse_horizontal_radiation",
238
+ "wind_direction", "wind_speed"
239
+ ]
240
+
241
+ for col in numeric_columns:
242
+ if col in df.columns:
243
+ df[col] = pd.to_numeric(df[col], errors='coerce')
244
+
245
+ # Calculate diffuse fraction
246
+ df['diffuse_fraction'] = df.apply(
247
+ lambda row: row['diffuse_horizontal_radiation'] / row['global_horizontal_radiation'] if row['global_horizontal_radiation'] > 0 else 0.0, axis=1
248
+ )
249
+
250
+ # Calculate design conditions
251
+ design_conditions = self._calculate_design_conditions(df)
252
+
253
+ # Process hourly data
254
+ hourly_data = self._process_hourly_data(df)
255
+
256
+ # Determine climate zone based on HDD and CDD
257
+ climate_zone = self._determine_climate_zone(
258
+ design_conditions["heating_degree_days"],
259
+ design_conditions["cooling_degree_days"]
260
+ )
261
+
262
+ # Create climate data dictionary
263
+ climate_data = {
264
+ "id": f"{location['city']}_{location['country']}_{rcp}_{year}".replace(" ", "_") if rcp and year else f"{location['city']}_{location['country']}".replace(" ", "_"),
265
+ "location": location,
266
+ "design_conditions": design_conditions,
267
+ "climate_zone": climate_zone,
268
+ "hourly_data": hourly_data,
269
+ "epw_filename": epw_filename,
270
+ "typical_extreme_periods": typical_extreme_periods,
271
+ "ground_temperatures": ground_temperatures
272
+ }
273
+
274
+ logger.info(f"EPW file processed successfully: {epw_filename}")
275
+ return climate_data
276
+
277
+ except Exception as e:
278
+ logger.error(f"Error processing EPW file: {str(e)}")
279
+ raise ValueError(f"Error processing EPW file: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
  def _calculate_design_conditions(self, df: pd.DataFrame) -> Dict[str, Any]:
282
  """
 
433
  except Exception as e:
434
  logger.warning(f"Error calculating Sky Clearness Index: {str(e)}, returning None")
435
  return None
436
+
437
  def _process_hourly_data(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
438
  """
439
  Process hourly data from EPW DataFrame, including dew point, Sky Clearness Index, diffuse fraction, and total sky cover.
 
447
  hourly_data = []
448
 
449
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  # Ensure numeric columns
451
  numeric_columns = [
452
  "dry_bulb_temp", "dew_point_temp", "relative_humidity", "atmospheric_pressure",
 
459
  if col in df.columns:
460
  df[col] = pd.to_numeric(df[col], errors='coerce')
461
 
462
+ # Convert to integers for month, day, hour
463
+ df["month"] = pd.to_numeric(df["month"], errors='coerce').astype('Int64')
464
+ df["day"] = pd.to_numeric(df["day"], errors='coerce').astype('Int64')
465
+ df["hour"] = pd.to_numeric(df["hour"], errors='coerce').astype('Int64')
 
 
 
 
 
 
 
466
 
467
  # Process each row
468
  for _, row in df.iterrows():
469
  if pd.isna(row["month"]) or pd.isna(row["day"]) or pd.isna(row["hour"]) or pd.isna(row["dry_bulb_temp"]):
 
470
  continue # Skip rows with missing critical data
471
 
472
  # Calculate dew point temperature
 
510
  if len(hourly_data) < 8700: # Allow for some missing data
511
  logger.warning(f"Hourly data has {len(hourly_data)} records instead of 8760. Some records may be missing.")
512
 
 
513
  return hourly_data
514
 
515
  except Exception as e:
516
  logger.error(f"Error processing hourly data: {str(e)}")
517
  return []
518
+
519
  def _determine_climate_zone(self, hdd: float, cdd: float) -> str:
520
  """
521
  Determine ASHRAE climate zone based on heating and cooling degree days.
 
764
 
765
  # Overview Tab (unchanged)
766
  with tab1:
767
+ # ... (original code unchanged)
768
  design = climate_data["design_conditions"]
769
  location = climate_data["location"]
770
 
 
909
  else:
910
  st.warning("No hourly data available.")
911
 
912
+ # Sun Path Tab
913
  with tab2:
914
  st.subheader("Sun Path Diagram")
915
  st.markdown("Visualize the sun's path for the selected location and date.")
 
1018
  end_month = st.selectbox("End Month", list(range(1, 13)), index=11, key="wind_rose_end_month")
1019
 
1020
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1021
  # Create AnalysisPeriod for filtering
1022
  analysis_period = AnalysisPeriod(st_month=start_month, end_month=end_month)
1023
 
1024
+ # Create DataFrame and validate
1025
+ hourly_df = pd.DataFrame(climate_data["hourly_data"])
1026
+ logger.debug(f"hourly_data columns: {list(hourly_df.columns)}")
1027
+ logger.debug(f"hourly_data sample: {hourly_df.head().to_dict()}")
1028
+ if "month" not in hourly_df.columns:
1029
+ logger.error("Missing 'month' column in hourly_data")
1030
+ st.warning("Hourly data is missing required 'month' column. Visualizations may be limited.")
1031
+ raise KeyError("'month' column missing in hourly_data")
1032
+
1033
  # Filter data
1034
  filtered_df = hourly_df[
1035
+ (hourly_df["month"].astype(int) >= start_month) & (hourly_df["month"].astype(int) <= end_month)
1036
  ]
1037
 
1038
  if filtered_df.empty:
 
1087
  show_comfort = st.checkbox("Show Comfort Polygon", value=True, key="psychro_comfort")
1088
 
1089
  try:
1090
+ # Create DataFrame and validate
1091
+ hourly_df = pd.DataFrame(climate_data["hourly_data"])
1092
+ logger.debug(f"hourly_data columns: {list(hourly_df.columns)}")
1093
+ logger.debug(f"hourly_data sample: {hourly_df.head().to_dict()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1094
  if "month" not in hourly_df.columns:
1095
+ logger.error("Missing 'month' column in hourly_data")
1096
+ st.warning("Hourly data is missing required 'month' column. Visualizations may be limited.")
1097
+ raise KeyError("'month' column missing in hourly_data")
 
 
 
 
 
1098
 
1099
  # Filter data
1100
  filtered_df = hourly_df[
1101
+ (hourly_df["month"].astype(int) >= start_month) & (hourly_df["month"].astype(int) <= end_month)
1102
  ]
1103
 
1104
  if filtered_df.empty:
 
1151
  for wb_temp_target in [10, 15, 20, 25]:
1152
  wb_temps = np.linspace(min(dry_bulb_temps), max(dry_bulb_temps), 100)
1153
  wb_humid_ratios = []
1154
+ wb_temps_calc = ClimateDataManager._calculate_wet_bulb(
1155
+ np.array([t] * 100), np.array([100] * 100)
1156
+ )
1157
  for t in wb_temps:
1158
  es = 610.78 * 10 ** ((7.5 * t) / (237.3 + t))
1159
  e = es
 
1197
  end_month = st.selectbox("End Month", list(range(1, 13)), index=11, key="radiation_end_month")
1198
 
1199
  try:
1200
+ # Create DataFrame and validate
1201
+ hourly_df = pd.DataFrame(climate_data["hourly_data"])
1202
+ logger.debug(f"hourly_data columns: {list(hourly_df.columns)}")
1203
+ logger.debug(f"hourly_data sample: {hourly_df.head().to_dict()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1204
  if "month" not in hourly_df.columns:
1205
+ logger.error("Missing 'month' column in hourly_data")
1206
+ st.warning("Hourly data is missing required 'month' column. Visualizations may be limited.")
1207
+ raise KeyError("'month' column missing in hourly_data")
 
 
 
 
 
1208
 
1209
  # Filter data
1210
  filtered_df = hourly_df[
1211
+ (hourly_df["month"].astype(int) >= start_month) & (hourly_df["month"].astype(int) <= end_month)
1212
  ]
1213
 
1214
  if filtered_df.empty:
 
1249
 
1250
  # Sunlight Hours Tab (unchanged)
1251
  with tab6:
1252
+ # ... (original code unchanged)
1253
  st.subheader("Sunlight Hours and Shadow Study")
1254
  st.markdown("Visualize sunlight hours on a simple building surface.")
1255
 
 
1354
  plot_type = st.selectbox("Plot Type", ["Line", "Scatter", "Histogram"], key="custom_plot_type")
1355
 
1356
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1357
  # Map variable to data key
1358
  variable_map = {
1359
  "Dry Bulb Temperature (°C)": "dry_bulb",
 
1371
 
1372
  data_key = variable_map[variable]
1373
 
1374
+ # Create DataFrame and validate
1375
+ hourly_df = pd.DataFrame(climate_data["hourly_data"])
1376
+ logger.debug(f"hourly_data columns: {list(hourly_df.columns)}")
1377
+ logger.debug(f"hourly_data sample: {hourly_df.head().to_dict()}")
1378
+ if "month" not in hourly_df.columns:
1379
+ logger.error("Missing 'month' column in hourly_data")
1380
+ st.warning("Hourly data is missing required 'month' column. Visualizations may be limited.")
1381
+ raise KeyError("'month' column missing in hourly_data")
1382
+
1383
  # Filter data
1384
  filtered_df = hourly_df[
1385
+ (hourly_df["month"].astype(int) >= start_month) & (hourly_df["month"].astype(int) <= end_month)
1386
  ]
1387
 
1388
  if filtered_df.empty: