Spaces:

mabuseif
/

BuildSustain-03

Build error

App Files Files Community

mabuseif commited on Jun 18, 2025

Commit

e0ab830

verified ·

1 Parent(s): 1499f62

Update app/climate_data.py

Browse files

Files changed (1) hide show

app/climate_data.py +242 -378

app/climate_data.py CHANGED Viewed

@@ -77,234 +77,206 @@ class ClimateDataManager:
         """Initialize climate data manager."""
         pass
-def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, year: str = None) -> Dict[str, Any]:
-    """
-    Parse an EPW file and extract climate data.
-    Args:
-        uploaded_file: The uploaded EPW file object or file content as string
-        location_num: Location number for climate projection (optional)
-        rcp: RCP scenario for climate projection (optional)
-        year: Year for climate projection (optional)
-    Returns:
-        Dict containing parsed climate data
-    """
-    try:
-        # Read the EPW file
-        if isinstance(uploaded_file, str):
-            content = uploaded_file
-            epw_filename = f"{location_num}_{rcp}_{year}.epw"
-        else:
-            content = uploaded_file.getvalue().decode('utf-8')
-            epw_filename = uploaded_file.name
-        lines = content.split('\n')
-        # Extract header information (first 8 lines)
-        header_lines = lines[:8]
-        # Parse location data from line 1
-        location_data = header_lines[0].split(',')
-        # Extract location information
-        location = {
-            "city": location_data[1].strip(),
-            "state_province": location_data[2].strip(),
-            "country": location_data[3].strip(),
-            "source": location_data[4].strip(),
-            "wmo": location_data[5].strip(),
-            "latitude": float(location_data[6]),
-            "longitude": float(location_data[7]),
-            "timezone": float(location_data[8]),
-            "elevation": float(location_data[9])
-        }
-        # Override city and state from LOCATION_MAPPING if provided
-        if location_num in LOCATION_MAPPING:
-            location["city"] = LOCATION_MAPPING[location_num]["city"]
-            location["state_province"] = LOCATION_MAPPING[location_num]["state"]
-        # Parse TYPICAL/EXTREME PERIODS
-        typical_extreme_periods = {}
-        date_pattern = r'^\d{1,2}\s*/\s*\d{1,2}$'
-        for line in lines:
-            if line.startswith("TYPICAL/EXTREME PERIODS"):
-                parts = line.strip().split(',')
-                try:
-                    num_periods = int(parts[1])
-                except ValueError:
-                    logger.warning("Invalid number of periods in TYPICAL/EXTREME PERIODS, skipping parsing.")
-                    break
-                for i in range(num_periods):
                     try:
-                        if len(parts) < 2 + i*4 + 4:
-                            logger.warning(f"Insufficient fields for period {i+1}, skipping.")
-                            continue
-                        period_name = parts[2 + i*4]
-                        period_type = parts[3 + i*4]
-                        start_date = parts[4 + i*4].strip()
-                        end_date = parts[5 + i*4].strip()
-                        if period_name in [
-                            "Summer - Week Nearest Max Temperature For Period",
-                            "Summer - Week Nearest Average Temperature For Period",
-                            "Winter - Week Nearest Min Temperature For Period",
-                            "Winter - Week Nearest Average Temperature For Period"
-                        ]:
-                            season = 'summer' if 'Summer' in period_name else 'winter'
-                            period_type = 'extreme' if 'Max' in period_name or 'Min' in period_name else 'typical'
-                            key = f"{season}_{period_type}"
-                            start_date_clean = re.sub(r'\s+', '', start_date)
-                            end_date_clean = re.sub(r'\s+', '', end_date)
-                            if not re.match(date_pattern, start_date) or not re.match(date_pattern, end_date):
-                                logger.warning(f"Invalid date format for period {period_name}: {start_date} to {end_date}, skipping.")
                                 continue
-                            start_month, start_day = map(int, start_date_clean.split('/'))
-                            end_month, end_day = map(int, end_date_clean.split('/'))
-                            typical_extreme_periods[key] = {
-                                "start": {"month": start_month, "day": start_day},
-                                "end": {"month": end_month, "day": end_day}
-                            }
-                    except (IndexError, ValueError) as e:
-                        logger.warning(f"Error parsing period {i+1}: {str(e)}, skipping.")
-                        continue
-                break
-        # Parse GROUND TEMPERATURES
-        ground_temperatures = {}
-        for line in lines:
-            if line.startswith("GROUND TEMPERATURES"):
-                parts = line.strip().split(',')
-                try:
-                    num_depths = int(parts[1])
-                except ValueError:
-                    logger.warning("Invalid number of depths in GROUND TEMPERATURES, skipping parsing.")
                     break
-                for i in range(num_depths):
                     try:
-                        if len(parts) < 2 + i*16 + 16:
-                            logger.warning(f"Insufficient fields for ground temperature depth {i+1}, skipping.")
-                            continue
-                        depth = parts[2 + i*16]
-                        temps = [float(t) for t in parts[6 + i*16:18 + i*16] if t.strip()]
-                        if len(temps) != 12:
-                            logger.warning(f"Invalid number of temperatures for depth {depth}m, expected 12, got {len(temps)}, skipping.")
                             continue
-                        ground_temperatures[depth] = temps
-                    except (ValueError, IndexError) as e:
-                        logger.warning(f"Error parsing ground temperatures for depth {i+1}: {str(e)}, skipping.")
-                        continue
-                break
-        # Parse data rows (starting from line 9)
-        data_lines = lines[8:]
-        # Create a DataFrame from the data rows
-        data = []
-        for line in data_lines:
-            if line.strip():  # Skip empty lines
-                data.append(line.split(','))
-        # Define core columns (common to both 32 and 35 column formats)
-        core_columns = [
-            "year", "month", "day", "hour", "minute", "data_source", "dry_bulb_temp",
-            "dew_point_temp", "relative_humidity", "atmospheric_pressure", "extraterrestrial_radiation",
-            "extraterrestrial_radiation_normal", "horizontal_infrared_radiation", "global_horizontal_radiation",
-            "direct_normal_radiation", "diffuse_horizontal_radiation", "global_horizontal_illuminance",
-            "direct_normal_illuminance", "diffuse_horizontal_illuminance", "zenith_luminance",
-            "wind_direction", "wind_speed", "total_sky_cover", "opaque_sky_cover", "visibility",
-            "ceiling_height", "present_weather_observation", "present_weather_codes",
-            "precipitable_water", "aerosol_optical_depth", "snow_depth", "days_since_last_snowfall"
-        ]
-        # Additional columns for 35-column format
-        additional_columns = ["albedo", "liquid_precipitation_depth", "liquid_precipitation_quantity"]
-        # Determine number of columns in data
-        num_columns = len(data[0]) if data else 0
-        if num_columns not in [32, 35]:
-            raise ValueError(f"Invalid number of columns in EPW file: {num_columns}. Expected 32 or 35 columns.")
-        # Select appropriate columns based on file format
-        columns = core_columns if num_columns == 32 else core_columns + additional_columns
-        # Create DataFrame
-        df = pd.DataFrame(data, columns=columns[:num_columns])
-        # Ensure critical columns exist and are properly typed
-        for col in ["year", "month", "day", "hour"]:
-            if col not in df.columns:
-                logger.warning(f"Missing '{col}' column in EPW data; initializing with default values")
-                if col == "year":
-                    df[col] = 2025  # Default year
-                elif col == "month":
-                    df[col] = pd.Series(range(1, len(df) + 1)).mod(12).replace(0, 12).astype('Int64')
-                elif col == "day":
-                    df[col] = pd.Series(range(1, len(df) + 1)).mod(31).replace(0, 31).astype('Int64')
-                elif col == "hour":
-                    df[col] = pd.Series(range(len(df))).mod(24).add(1).astype('Int64')
-        # Convert numeric columns
-        numeric_columns = [
-            "year", "month", "day", "hour", "dry_bulb_temp", "dew_point_temp", "relative_humidity",
-            "atmospheric_pressure", "global_horizontal_radiation", "direct_normal_radiation",
-            "diffuse_horizontal_radiation", "wind_direction", "wind_speed", "total_sky_cover"
-        ]
-        for col in numeric_columns:
-            if col in df.columns:
-                df[col] = pd.to_numeric(df[col], errors='coerce')
-                if col in ["year", "month", "day", "hour"]:
-                    df[col] = df[col].astype('Int64')
-        # Validate and correct date columns
-        if not df["month"].between(1, 12).all() or df["month"].isna().any():
-            logger.warning("Invalid or missing 'month' values detected; attempting to infer from index")
-            df["month"] = pd.Series(range(1, len(df) + 1)).mod(12).replace(0, 12).astype('Int64')
-        if not df["day"].between(1, 31).all() or df["day"].isna().any():
-            logger.warning("Invalid or missing 'day' values detected; assigning sequential days")
-            df["day"] = pd.Series(range(1, len(df) + 1)).mod(31).replace(0, 31).astype('Int64')
-        if not df["hour"].between(1, 24).all() or df["hour"].isna().any():
-            logger.warning("Invalid or missing 'hour' values detected; assigning sequential hours")
-            df["hour"] = pd.Series(range(len(df))).mod(24).add(1).astype('Int64')
-        # Calculate diffuse fraction
-        df['diffuse_fraction'] = df.apply(
-            lambda row: row['diffuse_horizontal_radiation'] / row['global_horizontal_radiation']
-            if pd.notna(row['global_horizontal_radiation']) and row['global_horizontal_radiation'] > 0 else 0.0,
-            axis=1
-        )
-        # Calculate design conditions
-        design_conditions = self._calculate_design_conditions(df)
-        # Process hourly data
-        hourly_data = self._process_hourly_data(df)
-        # Determine climate zone based on HDD and CDD
-        climate_zone = self._determine_climate_zone(
-            design_conditions["heating_degree_days"],
-            design_conditions["cooling_degree_days"]
-        )
-        # Create climate data dictionary
-        climate_data = {
-            "id": f"{location['city']}_{location['country']}_{rcp}_{year}".replace(" ", "_") if rcp and year else f"{location['city']}_{location['country']}".replace(" ", "_"),
-            "location": location,
-            "design_conditions": design_conditions,
-            "climate_zone": climate_zone,
-            "hourly_data": hourly_data,
-            "epw_filename": epw_filename,
-            "typical_extreme_periods": typical_extreme_periods,
-            "ground_temperatures": ground_temperatures
-        }
-        logger.info(f"EPW file processed successfully: {epw_filename}")
-        return climate_data
-    except Exception as e:
-        logger.error(f"Error processing EPW file: {str(e)}")
-        raise ValueError(f"Error processing EPW file: {str(e)}")
     def _calculate_design_conditions(self, df: pd.DataFrame) -> Dict[str, Any]:
         """
@@ -461,7 +433,7 @@ def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, yea
         except Exception as e:
             logger.warning(f"Error calculating Sky Clearness Index: {str(e)}, returning None")
             return None
     def _process_hourly_data(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
         """
         Process hourly data from EPW DataFrame, including dew point, Sky Clearness Index, diffuse fraction, and total sky cover.
@@ -475,30 +447,6 @@ def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, yea
         hourly_data = []
         try:
-            # Log DataFrame structure for debugging
-            logger.debug(f"Input DataFrame columns: {list(df.columns)}")
-            logger.debug(f"Input DataFrame sample: {df.head().to_dict()}")
-            # Validate presence of critical columns
-            required_columns = ["month", "day", "hour", "dry_bulb_temp"]
-            missing_columns = [col for col in required_columns if col not in df.columns]
-            if missing_columns:
-                logger.warning(f"Missing required columns in DataFrame: {missing_columns}")
-                # Initialize missing columns with defaults
-                for col in missing_columns:
-                    if col == "month":
-                        df["month"] = pd.Series(range(1, len(df) + 1)).mod(12).replace(0, 12).astype('Int64')
-                        logger.info("Initialized 'month' column with sequential values")
-                    elif col == "day":
-                        df["day"] = pd.Series(range(1, len(df) + 1)).mod(31).replace(0, 31).astype('Int64')
-                        logger.info("Initialized 'day' column with sequential values")
-                    elif col == "hour":
-                        df["hour"] = pd.Series(range(len(df))).mod(24).add(1).astype('Int64')
-                        logger.info("Initialized 'hour' column with sequential values")
-                    elif col == "dry_bulb_temp":
-                        df["dry_bulb_temp"] = 20.0
-                        logger.info("Initialized 'dry_bulb_temp' column with default value 20.0")
             # Ensure numeric columns
             numeric_columns = [
                 "dry_bulb_temp", "dew_point_temp", "relative_humidity", "atmospheric_pressure",
@@ -511,22 +459,14 @@ def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, yea
                 if col in df.columns:
                     df[col] = pd.to_numeric(df[col], errors='coerce')
-            # Ensure date columns are integers
-            for col in ["month", "day", "hour"]:
-                if col in df.columns:
-                    df[col] = pd.to_numeric(df[col], errors='coerce').astype('Int64')
-                    # Replace any remaining NaN values
-                    if col == "month" and df[col].isna().any():
-                        df[col] = pd.Series(range(1, len(df) + 1)).mod(12).replace(0, 12).astype('Int64')
-                    elif col == "day" and df[col].isna().any():
-                        df[col] = pd.Series(range(1, len(df) + 1)).mod(31).replace(0, 31).astype('Int64')
-                    elif col == "hour" and df[col].isna().any():
-                        df[col] = pd.Series(range(len(df))).mod(24).add(1).astype('Int64')
             # Process each row
             for _, row in df.iterrows():
                 if pd.isna(row["month"]) or pd.isna(row["day"]) or pd.isna(row["hour"]) or pd.isna(row["dry_bulb_temp"]):
-                    logger.warning(f"Skipping row with missing critical data: month={row.get('month')}, day={row.get('day')}, hour={row.get('hour')}, dry_bulb_temp={row.get('dry_bulb_temp')}")
                     continue  # Skip rows with missing critical data
                 # Calculate dew point temperature
@@ -570,13 +510,12 @@ def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, yea
             if len(hourly_data) < 8700:  # Allow for some missing data
                 logger.warning(f"Hourly data has {len(hourly_data)} records instead of 8760. Some records may be missing.")
-            logger.debug(f"Processed hourly_data sample: {hourly_data[:5]}")
             return hourly_data
         except Exception as e:
             logger.error(f"Error processing hourly data: {str(e)}")
             return []
     def _determine_climate_zone(self, hdd: float, cdd: float) -> str:
         """
         Determine ASHRAE climate zone based on heating and cooling degree days.
@@ -825,6 +764,7 @@ def display_climate_summary(climate_data: Dict[str, Any]):
     # Overview Tab (unchanged)
     with tab1:
         design = climate_data["design_conditions"]
         location = climate_data["location"]
@@ -969,7 +909,7 @@ def display_climate_summary(climate_data: Dict[str, Any]):
         else:
             st.warning("No hourly data available.")
-    # Sun Path Tab (unchanged)
     with tab2:
         st.subheader("Sun Path Diagram")
         st.markdown("Visualize the sun's path for the selected location and date.")
@@ -1078,41 +1018,21 @@ def display_climate_summary(climate_data: Dict[str, Any]):
             end_month = st.selectbox("End Month", list(range(1, 13)), index=11, key="wind_rose_end_month")
         try:
-            # Validate hourly_data
-            if not climate_data.get("hourly_data"):
-                st.warning("No hourly data available for wind rose visualization.")
-                logger.warning("hourly_data is empty or missing")
-                raise ValueError("hourly_data is empty or missing")
-            # Create DataFrame with explicit column mapping
-            hourly_df = pd.DataFrame(climate_data["hourly_data"], columns=[
-                "month", "day", "hour", "dry_bulb", "dew_point", "relative_humidity",
-                "atmospheric_pressure", "global_horizontal_radiation", "direct_normal_radiation",
-                "diffuse_horizontal_radiation", "wind_speed", "wind_direction",
-                "sky_clearness_index", "total_sky_cover", "diffuse_fraction"
-            ])
-            # Log DataFrame structure
-            logger.debug(f"Wind Rose hourly_df columns: {list(hourly_df.columns)}")
-            logger.debug(f"Wind Rose hourly_df sample: {hourly_df.head().to_dict()}")
-            # Check for month column
-            if "month" not in hourly_df.columns:
-                logger.warning("Missing 'month' column in hourly_df; initializing with sequential values")
-                hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
-            # Convert month to integer and handle NaN
-            hourly_df["month"] = pd.to_numeric(hourly_df["month"], errors='coerce').astype('Int64')
-            if hourly_df["month"].isna().any():
-                logger.warning("NaN values in 'month' column; replacing with sequential values")
-                hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
             # Create AnalysisPeriod for filtering
             analysis_period = AnalysisPeriod(st_month=start_month, end_month=end_month)
             # Filter data
             filtered_df = hourly_df[
-                (hourly_df["month"] >= start_month) & (hourly_df["month"] <= end_month)
             ]
             if filtered_df.empty:
@@ -1167,38 +1087,18 @@ def display_climate_summary(climate_data: Dict[str, Any]):
             show_comfort = st.checkbox("Show Comfort Polygon", value=True, key="psychro_comfort")
         try:
-            # Validate hourly_data
-            if not climate_data.get("hourly_data"):
-                st.warning("No hourly data available for psychrometric chart visualization.")
-                logger.warning("hourly_data is empty or missing")
-                raise ValueError("hourly_data is empty or missing")
-            # Create DataFrame with explicit column mapping
-            hourly_df = pd.DataFrame(climate_data["hourly_data"], columns=[
-                "month", "day", "hour", "dry_bulb", "dew_point", "relative_humidity",
-                "atmospheric_pressure", "global_horizontal_radiation", "direct_normal_radiation",
-                "diffuse_horizontal_radiation", "wind_speed", "wind_direction",
-                "sky_clearness_index", "total_sky_cover", "diffuse_fraction"
-            ])
-            # Log DataFrame structure
-            logger.debug(f"Psychrometric Chart hourly_df columns: {list(hourly_df.columns)}")
-            logger.debug(f"Psychrometric Chart hourly_df sample: {hourly_df.head().to_dict()}")
-            # Check for month column
             if "month" not in hourly_df.columns:
-                logger.warning("Missing 'month' column in hourly_df; initializing with sequential values")
-                hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
-            # Convert month to integer and handle NaN
-            hourly_df["month"] = pd.to_numeric(hourly_df["month"], errors='coerce').astype('Int64')
-            if hourly_df["month"].isna().any():
-                logger.warning("NaN values in 'month' column; replacing with sequential values")
-                hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
             # Filter data
             filtered_df = hourly_df[
-                (hourly_df["month"] >= start_month) & (hourly_df["month"] <= end_month)
             ]
             if filtered_df.empty:
@@ -1251,6 +1151,9 @@ def display_climate_summary(climate_data: Dict[str, Any]):
                 for wb_temp_target in [10, 15, 20, 25]:
                     wb_temps = np.linspace(min(dry_bulb_temps), max(dry_bulb_temps), 100)
                     wb_humid_ratios = []
                     for t in wb_temps:
                         es = 610.78 * 10 ** ((7.5 * t) / (237.3 + t))
                         e = es
@@ -1294,38 +1197,18 @@ def display_climate_summary(climate_data: Dict[str, Any]):
             end_month = st.selectbox("End Month", list(range(1, 13)), index=11, key="radiation_end_month")
         try:
-            # Validate hourly_data
-            if not climate_data.get("hourly_data"):
-                st.warning("No hourly data available for radiation diagram visualization.")
-                logger.warning("hourly_data is empty or missing")
-                raise ValueError("hourly_data is empty or missing")
-            # Create DataFrame with explicit column mapping
-            hourly_df = pd.DataFrame(climate_data["hourly_data"], columns=[
-                "month", "day", "hour", "dry_bulb", "dew_point", "relative_humidity",
-                "atmospheric_pressure", "global_horizontal_radiation", "direct_normal_radiation",
-                "diffuse_horizontal_radiation", "wind_speed", "wind_direction",
-                "sky_clearness_index", "total_sky_cover", "diffuse_fraction"
-            ])
-            # Log DataFrame structure
-            logger.debug(f"Radiation Diagram hourly_df columns: {list(hourly_df.columns)}")
-            logger.debug(f"Radiation Diagram hourly_df sample: {hourly_df.head().to_dict()}")
-            # Check for month column
             if "month" not in hourly_df.columns:
-                logger.warning("Missing 'month' column in hourly_df; initializing with sequential values")
-                hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
-            # Convert month to integer and handle NaN
-            hourly_df["month"] = pd.to_numeric(hourly_df["month"], errors='coerce').astype('Int64')
-            if hourly_df["month"].isna().any():
-                logger.warning("NaN values in 'month' column; replacing with sequential values")
-                hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
             # Filter data
             filtered_df = hourly_df[
-                (hourly_df["month"] >= start_month) & (hourly_df["month"] <= end_month)
             ]
             if filtered_df.empty:
@@ -1366,6 +1249,7 @@ def display_climate_summary(climate_data: Dict[str, Any]):
     # Sunlight Hours Tab (unchanged)
     with tab6:
         st.subheader("Sunlight Hours and Shadow Study")
         st.markdown("Visualize sunlight hours on a simple building surface.")
@@ -1470,35 +1354,6 @@ def display_climate_summary(climate_data: Dict[str, Any]):
             plot_type = st.selectbox("Plot Type", ["Line", "Scatter", "Histogram"], key="custom_plot_type")
         try:
-            # Validate hourly_data
-            if not climate_data.get("hourly_data"):
-                st.warning("No hourly data available for custom plot visualization.")
-                logger.warning("hourly_data is empty or missing")
-                raise ValueError("hourly_data is empty or missing")
-            # Create DataFrame with explicit column mapping
-            hourly_df = pd.DataFrame(climate_data["hourly_data"], columns=[
-                "month", "day", "hour", "dry_bulb", "dew_point", "relative_humidity",
-                "atmospheric_pressure", "global_horizontal_radiation", "direct_normal_radiation",
-                "diffuse_horizontal_radiation", "wind_speed", "wind_direction",
-                "sky_clearness_index", "total_sky_cover", "diffuse_fraction"
-            ])
-            # Log DataFrame structure
-            logger.debug(f"Custom Plot hourly_df columns: {list(hourly_df.columns)}")
-            logger.debug(f"Custom Plot hourly_df sample: {hourly_df.head().to_dict()}")
-            # Check for month column
-            if "month" not in hourly_df.columns:
-                logger.warning("Missing 'month' column in hourly_df; initializing with sequential values")
-                hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
-            # Convert month to integer and handle NaN
-            hourly_df["month"] = pd.to_numeric(hourly_df["month"], errors='coerce').astype('Int64')
-            if hourly_df["month"].isna().any():
-                logger.warning("NaN values in 'month' column; replacing with sequential values")
-                hourly_df["month"] = pd.Series(range(1, len(hourly_df) + 1)).mod(12).replace(0, 12).astype('Int64')
             # Map variable to data key
             variable_map = {
                 "Dry Bulb Temperature (°C)": "dry_bulb",
@@ -1516,9 +1371,18 @@ def display_climate_summary(climate_data: Dict[str, Any]):
             data_key = variable_map[variable]
             # Filter data
             filtered_df = hourly_df[
-                (hourly_df["month"] >= start_month) & (hourly_df["month"] <= end_month)
             ]
             if filtered_df.empty:

         """Initialize climate data manager."""
         pass
+    def load_epw(self, uploaded_file, location_num: str = None, rcp: str = None, year: str = None) -> Dict[str, Any]:
+        """
+        Parse an EPW file and extract climate data.
+        Args:
+            uploaded_file: The uploaded EPW file object or file content as string
+            location_num: Location number for climate projection (optional)
+            rcp: RCP scenario for climate projection (optional)
+            year: Year for climate projection (optional)
+        Returns:
+            Dict containing parsed climate data
+        """
+        try:
+            # Read the EPW file
+            if isinstance(uploaded_file, str):
+                content = uploaded_file
+                epw_filename = f"{location_num}_{rcp}_{year}.epw"
+            else:
+                content = uploaded_file.getvalue().decode('utf-8')
+                epw_filename = uploaded_file.name
+            lines = content.split('\n')
+            # Extract header information (first 8 lines)
+            header_lines = lines[:8]
+            # Parse location data from line 1
+            location_data = header_lines[0].split(',')
+            # Extract location information
+            location = {
+                "city": location_data[1].strip(),
+                "state_province": location_data[2].strip(),
+                "country": location_data[3].strip(),
+                "source": location_data[4].strip(),
+                "wmo": location_data[5].strip(),
+                "latitude": float(location_data[6]),
+                "longitude": float(location_data[7]),
+                "timezone": float(location_data[8]),
+                "elevation": float(location_data[9])
+            }
+            # Override city and state from LOCATION_MAPPING if provided
+            if location_num in LOCATION_MAPPING:
+                location["city"] = LOCATION_MAPPING[location_num]["city"]
+                location["state_province"] = LOCATION_MAPPING[location_num]["state"]
+            # Parse TYPICAL/EXTREME PERIODS
+            typical_extreme_periods = {}
+            date_pattern = r'^\d{1,2}\s*/\s*\d{1,2}$'
+            for line in lines:
+                if line.startswith("TYPICAL/EXTREME PERIODS"):
+                    parts = line.strip().split(',')
                     try:
+                        num_periods = int(parts[1])
+                    except ValueError:
+                        logger.warning("Invalid number of periods in TYPICAL/EXTREME PERIODS, skipping parsing.")
+                        break
+                    for i in range(num_periods):
+                        try:
+                            if len(parts) < 2 + i*4 + 4:
+                                logger.warning(f"Insufficient fields for period {i+1}, skipping.")
                                 continue
+                            period_name = parts[2 + i*4]
+                            period_type = parts[3 + i*4]
+                            start_date = parts[4 + i*4].strip()
+                            end_date = parts[5 + i*4].strip()
+                            if period_name in [
+                                "Summer - Week Nearest Max Temperature For Period",
+                                "Summer - Week Nearest Average Temperature For Period",
+                                "Winter - Week Nearest Min Temperature For Period",
+                                "Winter - Week Nearest Average Temperature For Period"
+                            ]:
+                                season = 'summer' if 'Summer' in period_name else 'winter'
+                                period_type = 'extreme' if 'Max' in period_name or 'Min' in period_name else 'typical'
+                                key = f"{season}_{period_type}"
+                                start_date_clean = re.sub(r'\s+', '', start_date)
+                                end_date_clean = re.sub(r'\s+', '', end_date)
+                                if not re.match(date_pattern, start_date) or not re.match(date_pattern, end_date):
+                                    logger.warning(f"Invalid date format for period {period_name}: {start_date} to {end_date}, skipping.")
+                                    continue
+                                start_month, start_day = map(int, start_date_clean.split('/'))
+                                end_month, end_day = map(int, end_date_clean.split('/'))
+                                typical_extreme_periods[key] = {
+                                    "start": {"month": start_month, "day": start_day},
+                                    "end": {"month": end_month, "day": end_day}
+                                }
+                        except (IndexError, ValueError) as e:
+                            logger.warning(f"Error parsing period {i+1}: {str(e)}, skipping.")
+                            continue
                     break
+            # Parse GROUND TEMPERATURES
+            ground_temperatures = {}
+            for line in lines:
+                if line.startswith("GROUND TEMPERATURES"):
+                    parts = line.strip().split(',')
                     try:
+                        num_depths = int(parts[1])
+                    except ValueError:
+                        logger.warning("Invalid number of depths in GROUND TEMPERATURES, skipping parsing.")
+                        break
+                    for i in range(num_depths):
+                        try:
+                            if len(parts) < 2 + i*16 + 16:
+                                logger.warning(f"Insufficient fields for ground temperature depth {i+1}, skipping.")
+                                continue
+                            depth = parts[2 + i*16]
+                            temps = [float(t) for t in parts[6 + i*16:18 + i*16] if t.strip()]
+                            if len(temps) != 12:
+                                logger.warning(f"Invalid number of temperatures for depth {depth}m, expected 12, got {len(temps)}, skipping.")
+                                continue
+                            ground_temperatures[depth] = temps
+                        except (ValueError, IndexError) as e:
+                            logger.warning(f"Error parsing ground temperatures for depth {i+1}: {str(e)}, skipping.")
                             continue
+                    break
+            # Parse data rows (starting from line 9)
+            data_lines = lines[8:]
+            # Create a DataFrame from the data rows
+            data = []
+            for line in data_lines:
+                if line.strip():  # Skip empty lines
+                    data.append(line.split(','))
+            # Define core columns (common to both 32 and 35 column formats)
+            core_columns = [
+                "year", "month", "day", "hour", "minute", "data_source", "dry_bulb_temp",
+                "dew_point_temp", "relative_humidity", "atmospheric_pressure", "extraterrestrial_radiation",
+                "extraterrestrial_radiation_normal", "horizontal_infrared_radiation", "global_horizontal_radiation",
+                "direct_normal_radiation", "diffuse_horizontal_radiation", "global_horizontal_illuminance",
+                "direct_normal_illuminance", "diffuse_horizontal_illuminance", "zenith_luminance",
+                "wind_direction", "wind_speed", "total_sky_cover", "opaque_sky_cover", "visibility",
+                "ceiling_height", "present_weather_observation", "present_weather_codes",
+                "precipitable_water", "aerosol_optical_depth", "snow_depth", "days_since_last_snowfall"
+            ]
+            # Additional columns for 35-column format
+            additional_columns = ["albedo", "liquid_precipitation_depth", "liquid_precipitation_quantity"]
+            # Determine number of columns in data
+            num_columns = len(data[0]) if data else 0
+            if num_columns not in [32, 35]:
+                raise ValueError(f"Invalid number of columns in EPW file: {num_columns}. Expected 32 or 35 columns.")
+            # Select appropriate columns based on file format
+            columns = core_columns if num_columns == 32 else core_columns + additional_columns
+            # Create DataFrame
+            df = pd.DataFrame(data, columns=columns[:num_columns])
+            # Convert numeric columns
+            numeric_columns = [
+                "dry_bulb_temp", "dew_point_temp", "relative_humidity", "atmospheric_pressure",
+                "global_horizontal_radiation", "direct_normal_radiation", "diffuse_horizontal_radiation",
+                "wind_direction", "wind_speed"
+            ]
+            for col in numeric_columns:
+                if col in df.columns:
+                    df[col] = pd.to_numeric(df[col], errors='coerce')
+            # Calculate diffuse fraction
+            df['diffuse_fraction'] = df.apply(
+                lambda row: row['diffuse_horizontal_radiation'] / row['global_horizontal_radiation'] if row['global_horizontal_radiation'] > 0 else 0.0, axis=1
+            )
+            # Calculate design conditions
+            design_conditions = self._calculate_design_conditions(df)
+            # Process hourly data
+            hourly_data = self._process_hourly_data(df)
+            # Determine climate zone based on HDD and CDD
+            climate_zone = self._determine_climate_zone(
+                design_conditions["heating_degree_days"],
+                design_conditions["cooling_degree_days"]
+            )
+            # Create climate data dictionary
+            climate_data = {
+                "id": f"{location['city']}_{location['country']}_{rcp}_{year}".replace(" ", "_") if rcp and year else f"{location['city']}_{location['country']}".replace(" ", "_"),
+                "location": location,
+                "design_conditions": design_conditions,
+                "climate_zone": climate_zone,
+                "hourly_data": hourly_data,
+                "epw_filename": epw_filename,
+                "typical_extreme_periods": typical_extreme_periods,
+                "ground_temperatures": ground_temperatures
+            }
+            logger.info(f"EPW file processed successfully: {epw_filename}")
+            return climate_data
+        except Exception as e:
+            logger.error(f"Error processing EPW file: {str(e)}")
+            raise ValueError(f"Error processing EPW file: {str(e)}")
     def _calculate_design_conditions(self, df: pd.DataFrame) -> Dict[str, Any]:
         """
         except Exception as e:
             logger.warning(f"Error calculating Sky Clearness Index: {str(e)}, returning None")
             return None
     def _process_hourly_data(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
         """
         Process hourly data from EPW DataFrame, including dew point, Sky Clearness Index, diffuse fraction, and total sky cover.
         hourly_data = []
         try:
             # Ensure numeric columns
             numeric_columns = [
                 "dry_bulb_temp", "dew_point_temp", "relative_humidity", "atmospheric_pressure",
                 if col in df.columns:
                     df[col] = pd.to_numeric(df[col], errors='coerce')
+            # Convert to integers for month, day, hour
+            df["month"] = pd.to_numeric(df["month"], errors='coerce').astype('Int64')
+            df["day"] = pd.to_numeric(df["day"], errors='coerce').astype('Int64')
+            df["hour"] = pd.to_numeric(df["hour"], errors='coerce').astype('Int64')
             # Process each row
             for _, row in df.iterrows():
                 if pd.isna(row["month"]) or pd.isna(row["day"]) or pd.isna(row["hour"]) or pd.isna(row["dry_bulb_temp"]):
                     continue  # Skip rows with missing critical data
                 # Calculate dew point temperature
             if len(hourly_data) < 8700:  # Allow for some missing data
                 logger.warning(f"Hourly data has {len(hourly_data)} records instead of 8760. Some records may be missing.")
             return hourly_data
         except Exception as e:
             logger.error(f"Error processing hourly data: {str(e)}")
             return []
     def _determine_climate_zone(self, hdd: float, cdd: float) -> str:
         """
         Determine ASHRAE climate zone based on heating and cooling degree days.
     # Overview Tab (unchanged)
     with tab1:
+        # ... (original code unchanged)
         design = climate_data["design_conditions"]
         location = climate_data["location"]
         else:
             st.warning("No hourly data available.")
+    # Sun Path Tab
     with tab2:
         st.subheader("Sun Path Diagram")
         st.markdown("Visualize the sun's path for the selected location and date.")
             end_month = st.selectbox("End Month", list(range(1, 13)), index=11, key="wind_rose_end_month")
         try:
             # Create AnalysisPeriod for filtering
             analysis_period = AnalysisPeriod(st_month=start_month, end_month=end_month)
+            # Create DataFrame and validate
+            hourly_df = pd.DataFrame(climate_data["hourly_data"])
+            logger.debug(f"hourly_data columns: {list(hourly_df.columns)}")
+            logger.debug(f"hourly_data sample: {hourly_df.head().to_dict()}")
+            if "month" not in hourly_df.columns:
+                logger.error("Missing 'month' column in hourly_data")
+                st.warning("Hourly data is missing required 'month' column. Visualizations may be limited.")
+                raise KeyError("'month' column missing in hourly_data")
             # Filter data
             filtered_df = hourly_df[
+                (hourly_df["month"].astype(int) >= start_month) & (hourly_df["month"].astype(int) <= end_month)
             ]
             if filtered_df.empty:
             show_comfort = st.checkbox("Show Comfort Polygon", value=True, key="psychro_comfort")
         try:
+            # Create DataFrame and validate
+            hourly_df = pd.DataFrame(climate_data["hourly_data"])
+            logger.debug(f"hourly_data columns: {list(hourly_df.columns)}")
+            logger.debug(f"hourly_data sample: {hourly_df.head().to_dict()}")
             if "month" not in hourly_df.columns:
+                logger.error("Missing 'month' column in hourly_data")
+                st.warning("Hourly data is missing required 'month' column. Visualizations may be limited.")
+                raise KeyError("'month' column missing in hourly_data")
             # Filter data
             filtered_df = hourly_df[
+                (hourly_df["month"].astype(int) >= start_month) & (hourly_df["month"].astype(int) <= end_month)
             ]
             if filtered_df.empty:
                 for wb_temp_target in [10, 15, 20, 25]:
                     wb_temps = np.linspace(min(dry_bulb_temps), max(dry_bulb_temps), 100)
                     wb_humid_ratios = []
+                    wb_temps_calc = ClimateDataManager._calculate_wet_bulb(
+                        np.array([t] * 100), np.array([100] * 100)
+                    )
                     for t in wb_temps:
                         es = 610.78 * 10 ** ((7.5 * t) / (237.3 + t))
                         e = es
             end_month = st.selectbox("End Month", list(range(1, 13)), index=11, key="radiation_end_month")
         try:
+            # Create DataFrame and validate
+            hourly_df = pd.DataFrame(climate_data["hourly_data"])
+            logger.debug(f"hourly_data columns: {list(hourly_df.columns)}")
+            logger.debug(f"hourly_data sample: {hourly_df.head().to_dict()}")
             if "month" not in hourly_df.columns:
+                logger.error("Missing 'month' column in hourly_data")
+                st.warning("Hourly data is missing required 'month' column. Visualizations may be limited.")
+                raise KeyError("'month' column missing in hourly_data")
             # Filter data
             filtered_df = hourly_df[
+                (hourly_df["month"].astype(int) >= start_month) & (hourly_df["month"].astype(int) <= end_month)
             ]
             if filtered_df.empty:
     # Sunlight Hours Tab (unchanged)
     with tab6:
+        # ... (original code unchanged)
         st.subheader("Sunlight Hours and Shadow Study")
         st.markdown("Visualize sunlight hours on a simple building surface.")
             plot_type = st.selectbox("Plot Type", ["Line", "Scatter", "Histogram"], key="custom_plot_type")
         try:
             # Map variable to data key
             variable_map = {
                 "Dry Bulb Temperature (°C)": "dry_bulb",
             data_key = variable_map[variable]
+            # Create DataFrame and validate
+            hourly_df = pd.DataFrame(climate_data["hourly_data"])
+            logger.debug(f"hourly_data columns: {list(hourly_df.columns)}")
+            logger.debug(f"hourly_data sample: {hourly_df.head().to_dict()}")
+            if "month" not in hourly_df.columns:
+                logger.error("Missing 'month' column in hourly_data")
+                st.warning("Hourly data is missing required 'month' column. Visualizations may be limited.")
+                raise KeyError("'month' column missing in hourly_data")
             # Filter data
             filtered_df = hourly_df[
+                (hourly_df["month"].astype(int) >= start_month) & (hourly_df["month"].astype(int) <= end_month)
             ]
             if filtered_df.empty: