| """ |
| Integration tests for the Daily Household Electricity Consumption Predictor. |
| |
| This module contains integration tests that test the complete workflow |
| from data generation through model training to prediction. |
| """ |
|
|
| import pytest |
| import pandas as pd |
| import numpy as np |
| import tempfile |
| import os |
| from src.data_generator import DataGenerator |
| from src.model import ElectricityConsumptionModel |
| from src.app import ElectricityPredictorApp |
|
|
|
|
| class TestIntegration: |
| """Integration tests for the complete system.""" |
|
|
| def setup_method(self): |
| """Set up test environment for each test method.""" |
| self.generator = DataGenerator(seed=42) |
| self.model = ElectricityConsumptionModel() |
| self.app = ElectricityPredictorApp() |
|
|
| def test_complete_workflow(self): |
| """Test the complete workflow from data generation to prediction.""" |
| |
| data = self.generator.generate_data(n_samples=1000, noise_level=0.1) |
| assert len(data) == 1000 |
| assert all( |
| col in data.columns |
| for col in ["temperature", "day_of_week", "major_event", "consumption_kwh"] |
| ) |
|
|
| |
| train_data, val_data, test_data = self.generator.split_data(data) |
| assert len(train_data) + len(val_data) + len(test_data) == len(data) |
|
|
| |
| X_train = train_data.drop("consumption_kwh", axis=1) |
| y_train = train_data[["consumption_kwh"]] |
| train_metrics = self.model.train(X_train, y_train) |
|
|
| assert self.model.is_trained |
| assert "train_r2" in train_metrics |
| assert train_metrics["train_r2"] > 0.3 |
|
|
| |
| X_test = test_data.drop("consumption_kwh", axis=1) |
| y_test = test_data[["consumption_kwh"]] |
| test_metrics = self.model.evaluate(X_test, y_test) |
|
|
| assert "test_r2" in test_metrics |
| assert test_metrics["test_r2"] > 0.3 |
|
|
| |
| prediction1 = self.model.predict(25.0, "Monday", 0) |
| prediction2 = self.model.predict(30.0, "Saturday", 1) |
|
|
| assert prediction1 > 0 |
| assert prediction2 > 0 |
| assert ( |
| prediction2 > prediction1 |
| ) |
|
|
| def test_app_integration(self): |
| """Test the complete app workflow.""" |
| |
| data_info, training_metrics, evaluation_metrics = self.app.generate_and_train( |
| n_samples=500, |
| noise_level=0.1, |
| train_size=0.7, |
| val_size=0.15, |
| test_size=0.15, |
| ) |
|
|
| assert self.app.is_model_trained |
| assert "Data Generated Successfully!" in data_info |
| assert "Training Metrics:" in training_metrics |
| assert "Test Set Evaluation:" in evaluation_metrics |
|
|
| |
| prediction_result = self.app.predict_consumption(25.0, "Monday", False) |
| assert "Estimated Daily Electricity Consumption:" in prediction_result |
| assert "Temperature: 25.0°C" in prediction_result |
|
|
| |
| model_info = self.app.get_model_info() |
| assert "Model Information:" in model_info |
| assert "Feature Coefficients:" in model_info |
|
|
| def test_model_persistence(self): |
| """Test model saving and loading.""" |
| |
| data = self.generator.generate_data(n_samples=500) |
| train_data, _, _ = self.generator.split_data(data) |
|
|
| X_train = train_data.drop("consumption_kwh", axis=1) |
| y_train = train_data[["consumption_kwh"]] |
| self.model.train(X_train, y_train) |
|
|
| |
| with tempfile.NamedTemporaryFile(suffix=".joblib", delete=False) as tmp_file: |
| model_path = tmp_file.name |
|
|
| try: |
| self.model.save_model(model_path) |
| assert os.path.exists(model_path) |
|
|
| |
| new_model = ElectricityConsumptionModel() |
| new_model.load_model(model_path) |
|
|
| assert new_model.is_trained |
|
|
| |
| pred1 = self.model.predict(25.0, "Monday", 0) |
| pred2 = new_model.predict(25.0, "Monday", 0) |
|
|
| assert abs(pred1 - pred2) < 1e-10 |
|
|
| finally: |
| if os.path.exists(model_path): |
| os.unlink(model_path) |
|
|
| def test_data_persistence(self): |
| """Test data saving and loading.""" |
| |
| data = self.generator.generate_data(n_samples=100) |
|
|
| |
| with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_file: |
| data_path = tmp_file.name |
|
|
| try: |
| self.generator.save_data(data, data_path) |
| assert os.path.exists(data_path) |
|
|
| |
| loaded_data = self.generator.load_data(data_path) |
|
|
| |
| pd.testing.assert_frame_equal(data, loaded_data) |
|
|
| finally: |
| if os.path.exists(data_path): |
| os.unlink(data_path) |
|
|
| def test_model_performance_consistency(self): |
| """Test that model performance is consistent across runs.""" |
| |
| data = self.generator.generate_data(n_samples=1000, noise_level=0.1) |
| train_data, _, test_data = self.generator.split_data(data) |
|
|
| |
| X_train = train_data.drop("consumption_kwh", axis=1) |
| y_train = train_data[["consumption_kwh"]] |
| X_test = test_data.drop("consumption_kwh", axis=1) |
| y_test = test_data[["consumption_kwh"]] |
|
|
| r2_scores = [] |
| for _ in range(3): |
| model = ElectricityConsumptionModel() |
| model.train(X_train, y_train) |
| metrics = model.evaluate(X_test, y_test) |
| r2_scores.append(metrics["test_r2"]) |
|
|
| |
| assert max(r2_scores) - min(r2_scores) < 0.01 |
|
|
| def test_feature_importance_consistency(self): |
| """Test that feature importance is consistent with domain knowledge.""" |
| |
| data = self.generator.generate_data(n_samples=1000) |
| train_data, _, _ = self.generator.split_data(data) |
|
|
| X_train = train_data.drop("consumption_kwh", axis=1) |
| y_train = train_data[["consumption_kwh"]] |
| self.model.train(X_train, y_train) |
|
|
| |
| coefficients = self.model.get_model_coefficients() |
|
|
| |
| temp_idx = coefficients["feature_names"].index("temperature") |
| temp_coef = coefficients["coefficients"][temp_idx] |
|
|
| |
| event_idx = coefficients["feature_names"].index("major_event") |
| event_coef = coefficients["coefficients"][event_idx] |
|
|
| |
| assert temp_coef > 0 |
|
|
| |
| assert event_coef > 0 |
|
|
| def test_prediction_bounds(self): |
| """Test that predictions are within reasonable bounds.""" |
| |
| data = self.generator.generate_data(n_samples=1000) |
| train_data, _, _ = self.generator.split_data(data) |
|
|
| X_train = train_data.drop("consumption_kwh", axis=1) |
| y_train = train_data[["consumption_kwh"]] |
| self.model.train(X_train, y_train) |
|
|
| |
| predictions = [] |
|
|
| for temp in [15, 20, 25, 30, 35]: |
| for day in [ |
| "Monday", |
| "Tuesday", |
| "Wednesday", |
| "Thursday", |
| "Friday", |
| "Saturday", |
| "Sunday", |
| ]: |
| for event in [0, 1]: |
| pred = self.model.predict(temp, day, event) |
| predictions.append(pred) |
|
|
| |
| assert all(p > 0 for p in predictions) |
|
|
| |
| assert all(5 <= p <= 50 for p in predictions) |
|
|
| def test_data_quality_checks(self): |
| """Test that generated data meets quality requirements.""" |
| |
| data = self.generator.generate_data(n_samples=1000) |
|
|
| |
| assert not data.isnull().any().any() |
|
|
| |
| assert data["temperature"].dtype in [np.float64, np.float32] |
| assert data["day_of_week"].dtype == "object" |
| assert data["major_event"].dtype in [np.int64, np.int32] |
| assert data["consumption_kwh"].dtype in [np.float64, np.float32] |
|
|
| |
| assert data["temperature"].min() >= 15 |
| assert data["temperature"].max() <= 35 |
| assert all(data["major_event"].isin([0, 1])) |
| assert all(data["consumption_kwh"] > 0) |
|
|
| |
| valid_days = [ |
| "Monday", |
| "Tuesday", |
| "Wednesday", |
| "Thursday", |
| "Friday", |
| "Saturday", |
| "Sunday", |
| ] |
| assert all(day in valid_days for day in data["day_of_week"].unique()) |
|
|
| |
| temp_consumption_corr = data["temperature"].corr(data["consumption_kwh"]) |
| assert temp_consumption_corr > 0 |
|
|
| def test_error_handling(self): |
| """Test error handling in the complete workflow.""" |
| |
| with pytest.raises(ValueError): |
| self.model.predict(10.0, "Monday", 0) |
|
|
| with pytest.raises(ValueError): |
| self.model.predict(40.0, "Monday", 0) |
|
|
| |
| with pytest.raises(ValueError): |
| self.model.predict(25.0, "InvalidDay", 0) |
|
|
| |
| with pytest.raises(ValueError): |
| self.model.predict(25.0, "Monday", 2) |
|
|
| |
| untrained_model = ElectricityConsumptionModel() |
| with pytest.raises(ValueError): |
| untrained_model.predict(25.0, "Monday", 0) |
|
|
| def test_app_state_management(self): |
| """Test that app state is properly managed.""" |
| |
| assert not self.app.is_model_trained |
|
|
| |
| self.app.generate_and_train(500, 0.1, 0.7, 0.15, 0.15) |
| assert self.app.is_model_trained |
|
|
| |
| assert hasattr(self.app, "train_data") |
| assert hasattr(self.app, "val_data") |
| assert hasattr(self.app, "test_data") |
|
|
| |
| assert len(self.app.train_data) > 0 |
| assert len(self.app.val_data) > 0 |
| assert len(self.app.test_data) > 0 |
|
|