| | """ |
| | Model Module for Daily Household Electricity Consumption Predictor |
| | |
| | This module handles data preprocessing, model training, evaluation, and prediction |
| | for the electricity consumption prediction model. |
| | """ |
| |
|
| | import numpy as np |
| | import pandas as pd |
| | from sklearn.linear_model import LinearRegression |
| | from sklearn.preprocessing import OneHotEncoder, StandardScaler |
| | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score |
| | from sklearn.pipeline import Pipeline |
| | from sklearn.compose import ColumnTransformer |
| | import joblib |
| | from typing import Tuple, Dict, Any, Optional |
| | import os |
| |
|
| |
|
| | class ElectricityConsumptionModel: |
| | """Linear regression model for predicting daily electricity consumption.""" |
| |
|
| | def __init__(self): |
| | """Initialize the model with preprocessing pipeline.""" |
| | self.model = None |
| | self.preprocessor = None |
| | self.feature_names = None |
| | self.is_trained = False |
| |
|
| | def _create_preprocessor(self) -> ColumnTransformer: |
| | """ |
| | Create preprocessing pipeline for the features. |
| | |
| | Returns: |
| | ColumnTransformer with preprocessing steps |
| | """ |
| | |
| | numerical_features = ["temperature"] |
| | numerical_transformer = StandardScaler() |
| |
|
| | |
| | categorical_features = ["day_of_week"] |
| | categorical_transformer = OneHotEncoder(drop="first", sparse=False) |
| |
|
| | |
| | boolean_features = ["major_event"] |
| | boolean_transformer = "passthrough" |
| |
|
| | |
| | preprocessor = ColumnTransformer( |
| | transformers=[ |
| | ("num", numerical_transformer, numerical_features), |
| | ("cat", categorical_transformer, categorical_features), |
| | ("bool", boolean_transformer, boolean_features), |
| | ], |
| | remainder="drop", |
| | ) |
| |
|
| | return preprocessor |
| |
|
| | def _create_pipeline(self) -> Pipeline: |
| | """ |
| | Create the complete model pipeline. |
| | |
| | Returns: |
| | Pipeline with preprocessing and model |
| | """ |
| | preprocessor = self._create_preprocessor() |
| | model = LinearRegression() |
| |
|
| | pipeline = Pipeline([("preprocessor", preprocessor), ("regressor", model)]) |
| |
|
| | return pipeline |
| |
|
| | def prepare_features(self, data: pd.DataFrame) -> pd.DataFrame: |
| | """ |
| | Prepare features for training/prediction. |
| | |
| | Args: |
| | data: Input DataFrame with raw features |
| | |
| | Returns: |
| | DataFrame with prepared features |
| | """ |
| | required_columns = ["temperature", "day_of_week", "major_event"] |
| |
|
| | |
| | missing_columns = [col for col in required_columns if col not in data.columns] |
| | if missing_columns: |
| | raise ValueError(f"Missing required columns: {missing_columns}") |
| |
|
| | |
| | if not all(data["temperature"].between(15, 35)): |
| | raise ValueError("Temperature must be between 15 and 35 degrees Celsius") |
| |
|
| | valid_days = [ |
| | "Monday", |
| | "Tuesday", |
| | "Wednesday", |
| | "Thursday", |
| | "Friday", |
| | "Saturday", |
| | "Sunday", |
| | ] |
| | if not all(day in valid_days for day in data["day_of_week"].unique()): |
| | raise ValueError(f"Day of week must be one of: {valid_days}") |
| |
|
| | if not all(data["major_event"].isin([0, 1])): |
| | raise ValueError("Major event must be 0 or 1") |
| |
|
| | return data[required_columns].copy() |
| |
|
| | def train(self, X_train: pd.DataFrame, y_train: pd.DataFrame) -> Dict[str, float]: |
| | """ |
| | Train the model on the provided data. |
| | |
| | Args: |
| | X_train: Training features |
| | y_train: Training targets |
| | |
| | Returns: |
| | Dictionary with training metrics |
| | """ |
| | |
| | X_prepared = self.prepare_features(X_train) |
| |
|
| | |
| | self.model = self._create_pipeline() |
| | self.model.fit(X_prepared, y_train["consumption_kwh"]) |
| |
|
| | |
| | self.feature_names = X_prepared.columns.tolist() |
| | self.is_trained = True |
| |
|
| | |
| | y_pred = self.model.predict(X_prepared) |
| | metrics = { |
| | "train_mse": mean_squared_error(y_train["consumption_kwh"], y_pred), |
| | "train_rmse": np.sqrt( |
| | mean_squared_error(y_train["consumption_kwh"], y_pred) |
| | ), |
| | "train_mae": mean_absolute_error(y_train["consumption_kwh"], y_pred), |
| | "train_r2": r2_score(y_train["consumption_kwh"], y_pred), |
| | } |
| |
|
| | return metrics |
| |
|
| | def evaluate(self, X_test: pd.DataFrame, y_test: pd.DataFrame) -> Dict[str, float]: |
| | """ |
| | Evaluate the model on test data. |
| | |
| | Args: |
| | X_test: Test features |
| | y_test: Test targets |
| | |
| | Returns: |
| | Dictionary with evaluation metrics |
| | """ |
| | if not self.is_trained: |
| | raise ValueError("Model must be trained before evaluation") |
| |
|
| | |
| | X_prepared = self.prepare_features(X_test) |
| |
|
| | |
| | y_pred = self.model.predict(X_prepared) |
| |
|
| | |
| | metrics = { |
| | "test_mse": mean_squared_error(y_test["consumption_kwh"], y_pred), |
| | "test_rmse": np.sqrt(mean_squared_error(y_test["consumption_kwh"], y_pred)), |
| | "test_mae": mean_absolute_error(y_test["consumption_kwh"], y_pred), |
| | "test_r2": r2_score(y_test["consumption_kwh"], y_pred), |
| | } |
| |
|
| | return metrics |
| |
|
| | def predict(self, temperature: float, day_of_week: str, major_event: int) -> float: |
| | """ |
| | Make a single prediction. |
| | |
| | Args: |
| | temperature: Average daily temperature in Celsius |
| | day_of_week: Day of the week |
| | major_event: Whether there's a major event (0 or 1) |
| | |
| | Returns: |
| | Predicted electricity consumption in kWh |
| | """ |
| | if not self.is_trained: |
| | raise ValueError("Model must be trained before making predictions") |
| |
|
| | |
| | input_data = pd.DataFrame( |
| | { |
| | "temperature": [temperature], |
| | "day_of_week": [day_of_week], |
| | "major_event": [major_event], |
| | } |
| | ) |
| |
|
| | |
| | X_prepared = self.prepare_features(input_data) |
| |
|
| | |
| | prediction = self.model.predict(X_prepared)[0] |
| |
|
| | return max(0, prediction) |
| |
|
| | def get_model_coefficients(self) -> Dict[str, Any]: |
| | """ |
| | Get model coefficients and feature names. |
| | |
| | Returns: |
| | Dictionary with model coefficients and feature information |
| | """ |
| | if not self.is_trained: |
| | raise ValueError("Model must be trained before accessing coefficients") |
| |
|
| | |
| | preprocessor = self.model.named_steps["preprocessor"] |
| | feature_names = [] |
| |
|
| | |
| | feature_names.extend(["temperature"]) |
| |
|
| | |
| | cat_transformer = preprocessor.named_transformers_["cat"] |
| | day_names = [ |
| | "Tuesday", |
| | "Wednesday", |
| | "Thursday", |
| | "Friday", |
| | "Saturday", |
| | "Sunday", |
| | ] |
| | feature_names.extend([f"day_{day.lower()}" for day in day_names]) |
| |
|
| | |
| | feature_names.extend(["major_event"]) |
| |
|
| | |
| | coefficients = self.model.named_steps["regressor"].coef_ |
| | intercept = self.model.named_steps["regressor"].intercept_ |
| |
|
| | return { |
| | "feature_names": feature_names, |
| | "coefficients": coefficients.tolist(), |
| | "intercept": float(intercept), |
| | } |
| |
|
| | def save_model(self, filepath: str) -> None: |
| | """ |
| | Save the trained model to disk. |
| | |
| | Args: |
| | filepath: Path to save the model |
| | """ |
| | if not self.is_trained: |
| | raise ValueError("Model must be trained before saving") |
| |
|
| | |
| | os.makedirs(os.path.dirname(filepath), exist_ok=True) |
| |
|
| | |
| | joblib.dump(self.model, filepath) |
| |
|
| | def load_model(self, filepath: str) -> None: |
| | """ |
| | Load a trained model from disk. |
| | |
| | Args: |
| | filepath: Path to the saved model |
| | """ |
| | if not os.path.exists(filepath): |
| | raise FileNotFoundError(f"Model file not found: {filepath}") |
| |
|
| | self.model = joblib.load(filepath) |
| | self.is_trained = True |
| |
|
| | |
| | preprocessor = self.model.named_steps["preprocessor"] |
| | self.feature_names = ["temperature", "day_of_week", "major_event"] |
| |
|