Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import os | |
| def get_season(month): | |
| if month in [12, 1, 2]: | |
| return "Winter" | |
| elif month in [3, 4, 5]: | |
| return "Summer" | |
| elif month in [6, 7, 8, 9]: | |
| return "Monsoon" | |
| else: | |
| return "Post-Monsoon" | |
| def clean_data(df: pd.DataFrame) -> pd.DataFrame: | |
| df = df.copy() | |
| # Remove duplicates | |
| df.drop_duplicates(inplace=True) | |
| # Drop rows where AQI missing | |
| df.dropna(subset=["AQI"], inplace=True) | |
| # Drop columns with >50% missing | |
| threshold = 0.5 * len(df) | |
| cols_to_drop = df.columns[df.isnull().sum() > threshold] | |
| df = df.drop(columns=cols_to_drop) | |
| # Fill numeric columns with median | |
| num_cols = df.select_dtypes(include=["number"]).columns | |
| for col in num_cols: | |
| df[col] = df[col].fillna(df[col].median()) | |
| # Date features | |
| df["Date"] = pd.to_datetime(df["Date"]) | |
| df["Month"] = df["Date"].dt.month | |
| df["Year"] = df["Date"].dt.year | |
| # Season feature | |
| df["Season"] = df["Month"].apply(get_season) | |
| return df | |
| def save_clean_data(df: pd.DataFrame, path: str): | |
| # Convert to absolute path from project root | |
| base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")) | |
| full_path = os.path.join(base_dir, path) | |
| # Create directory if it doesn't exist | |
| os.makedirs(os.path.dirname(full_path), exist_ok=True) | |
| df.to_csv(full_path, index=False) |