Spaces:

surahj
/

electricity-consumption-predictor

Sleeping

electricity-consumption-predictor / tests /test_integration.py

surahj

Initial commit

64f3974 9 months ago

11.1 kB

	"""
	Integration tests for the Daily Household Electricity Consumption Predictor.

	This module contains integration tests that test the complete workflow
	from data generation through model training to prediction.
	"""

	import pytest
	import pandas as pd
	import numpy as np
	import tempfile
	import os
	from src.data_generator import DataGenerator
	from src.model import ElectricityConsumptionModel
	from src.app import ElectricityPredictorApp


	class TestIntegration:
	"""Integration tests for the complete system."""

	def setup_method(self):
	"""Set up test environment for each test method."""
	self.generator = DataGenerator(seed=42)
	self.model = ElectricityConsumptionModel()
	self.app = ElectricityPredictorApp()

	def test_complete_workflow(self):
	"""Test the complete workflow from data generation to prediction."""
	# Step 1: Generate data
	data = self.generator.generate_data(n_samples=1000, noise_level=0.1)
	assert len(data) == 1000
	assert all(
	col in data.columns
	for col in ["temperature", "day_of_week", "major_event", "consumption_kwh"]
	)

	# Step 2: Split data
	train_data, val_data, test_data = self.generator.split_data(data)
	assert len(train_data) + len(val_data) + len(test_data) == len(data)

	# Step 3: Train model
	X_train = train_data.drop("consumption_kwh", axis=1)
	y_train = train_data[["consumption_kwh"]]
	train_metrics = self.model.train(X_train, y_train)

	assert self.model.is_trained
	assert "train_r2" in train_metrics
	assert train_metrics["train_r2"] > 0.3 # Reasonable performance

	# Step 4: Evaluate model
	X_test = test_data.drop("consumption_kwh", axis=1)
	y_test = test_data[["consumption_kwh"]]
	test_metrics = self.model.evaluate(X_test, y_test)

	assert "test_r2" in test_metrics
	assert test_metrics["test_r2"] > 0.3 # Reasonable performance

	# Step 5: Make predictions
	prediction1 = self.model.predict(25.0, "Monday", 0)
	prediction2 = self.model.predict(30.0, "Saturday", 1)

	assert prediction1 > 0
	assert prediction2 > 0
	assert (
	prediction2 > prediction1
	) # Higher temp + weekend + event should increase consumption

	def test_app_integration(self):
	"""Test the complete app workflow."""
	# Test data generation and training through the app
	data_info, training_metrics, evaluation_metrics = self.app.generate_and_train(
	n_samples=500,
	noise_level=0.1,
	train_size=0.7,
	val_size=0.15,
	test_size=0.15,
	)

	assert self.app.is_model_trained
	assert "Data Generated Successfully!" in data_info
	assert "Training Metrics:" in training_metrics
	assert "Test Set Evaluation:" in evaluation_metrics

	# Test prediction through the app
	prediction_result = self.app.predict_consumption(25.0, "Monday", False)
	assert "Estimated Daily Electricity Consumption:" in prediction_result
	assert "Temperature: 25.0°C" in prediction_result

	# Test model info through the app
	model_info = self.app.get_model_info()
	assert "Model Information:" in model_info
	assert "Feature Coefficients:" in model_info

	def test_model_persistence(self):
	"""Test model saving and loading."""
	# Generate data and train model
	data = self.generator.generate_data(n_samples=500)
	train_data, _, _ = self.generator.split_data(data)

	X_train = train_data.drop("consumption_kwh", axis=1)
	y_train = train_data[["consumption_kwh"]]
	self.model.train(X_train, y_train)

	# Save model
	with tempfile.NamedTemporaryFile(suffix=".joblib", delete=False) as tmp_file:
	model_path = tmp_file.name

	try:
	self.model.save_model(model_path)
	assert os.path.exists(model_path)

	# Load model in new instance
	new_model = ElectricityConsumptionModel()
	new_model.load_model(model_path)

	assert new_model.is_trained

	# Test predictions are identical
	pred1 = self.model.predict(25.0, "Monday", 0)
	pred2 = new_model.predict(25.0, "Monday", 0)

	assert abs(pred1 - pred2) < 1e-10

	finally:
	if os.path.exists(model_path):
	os.unlink(model_path)

	def test_data_persistence(self):
	"""Test data saving and loading."""
	# Generate data
	data = self.generator.generate_data(n_samples=100)

	# Save data
	with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_file:
	data_path = tmp_file.name

	try:
	self.generator.save_data(data, data_path)
	assert os.path.exists(data_path)

	# Load data
	loaded_data = self.generator.load_data(data_path)

	# Check data is identical
	pd.testing.assert_frame_equal(data, loaded_data)

	finally:
	if os.path.exists(data_path):
	os.unlink(data_path)

	def test_model_performance_consistency(self):
	"""Test that model performance is consistent across runs."""
	# Generate data
	data = self.generator.generate_data(n_samples=1000, noise_level=0.1)
	train_data, _, test_data = self.generator.split_data(data)

	# Train model multiple times with same data
	X_train = train_data.drop("consumption_kwh", axis=1)
	y_train = train_data[["consumption_kwh"]]
	X_test = test_data.drop("consumption_kwh", axis=1)
	y_test = test_data[["consumption_kwh"]]

	r2_scores = []
	for _ in range(3):
	model = ElectricityConsumptionModel()
	model.train(X_train, y_train)
	metrics = model.evaluate(X_test, y_test)
	r2_scores.append(metrics["test_r2"])

	# R² scores should be very similar (within 0.01)
	assert max(r2_scores) - min(r2_scores) < 0.01

	def test_feature_importance_consistency(self):
	"""Test that feature importance is consistent with domain knowledge."""
	# Generate data and train model
	data = self.generator.generate_data(n_samples=1000)
	train_data, _, _ = self.generator.split_data(data)

	X_train = train_data.drop("consumption_kwh", axis=1)
	y_train = train_data[["consumption_kwh"]]
	self.model.train(X_train, y_train)

	# Get coefficients
	coefficients = self.model.get_model_coefficients()

	# Find temperature coefficient
	temp_idx = coefficients["feature_names"].index("temperature")
	temp_coef = coefficients["coefficients"][temp_idx]

	# Find major event coefficient
	event_idx = coefficients["feature_names"].index("major_event")
	event_coef = coefficients["coefficients"][event_idx]

	# Temperature should have positive effect (higher temp = higher consumption)
	assert temp_coef > 0

	# Major event should have positive effect (events increase consumption)
	assert event_coef > 0

	def test_prediction_bounds(self):
	"""Test that predictions are within reasonable bounds."""
	# Generate data and train model
	data = self.generator.generate_data(n_samples=1000)
	train_data, _, _ = self.generator.split_data(data)

	X_train = train_data.drop("consumption_kwh", axis=1)
	y_train = train_data[["consumption_kwh"]]
	self.model.train(X_train, y_train)

	# Test predictions across different inputs
	predictions = []

	for temp in [15, 20, 25, 30, 35]:
	for day in [
	"Monday",
	"Tuesday",
	"Wednesday",
	"Thursday",
	"Friday",
	"Saturday",
	"Sunday",
	]:
	for event in [0, 1]:
	pred = self.model.predict(temp, day, event)
	predictions.append(pred)

	# All predictions should be positive
	assert all(p > 0 for p in predictions)

	# Predictions should be within reasonable range (5-50 kWh)
	assert all(5 <= p <= 50 for p in predictions)

	def test_data_quality_checks(self):
	"""Test that generated data meets quality requirements."""
	# Generate data
	data = self.generator.generate_data(n_samples=1000)

	# Check for missing values
	assert not data.isnull().any().any()

	# Check data types
	assert data["temperature"].dtype in [np.float64, np.float32]
	assert data["day_of_week"].dtype == "object"
	assert data["major_event"].dtype in [np.int64, np.int32]
	assert data["consumption_kwh"].dtype in [np.float64, np.float32]

	# Check value ranges
	assert data["temperature"].min() >= 15
	assert data["temperature"].max() <= 35
	assert all(data["major_event"].isin([0, 1]))
	assert all(data["consumption_kwh"] > 0)

	# Check day of week values
	valid_days = [
	"Monday",
	"Tuesday",
	"Wednesday",
	"Thursday",
	"Friday",
	"Saturday",
	"Sunday",
	]
	assert all(day in valid_days for day in data["day_of_week"].unique())

	# Check correlations make sense
	temp_consumption_corr = data["temperature"].corr(data["consumption_kwh"])
	assert temp_consumption_corr > 0 # Positive correlation

	def test_error_handling(self):
	"""Test error handling in the complete workflow."""
	# Test with invalid temperature
	with pytest.raises(ValueError):
	self.model.predict(10.0, "Monday", 0) # Temperature too low

	with pytest.raises(ValueError):
	self.model.predict(40.0, "Monday", 0) # Temperature too high

	# Test with invalid day
	with pytest.raises(ValueError):
	self.model.predict(25.0, "InvalidDay", 0)

	# Test with invalid major event
	with pytest.raises(ValueError):
	self.model.predict(25.0, "Monday", 2) # Invalid value

	# Test prediction without training
	untrained_model = ElectricityConsumptionModel()
	with pytest.raises(ValueError):
	untrained_model.predict(25.0, "Monday", 0)

	def test_app_state_management(self):
	"""Test that app state is properly managed."""
	# Initially not trained
	assert not self.app.is_model_trained

	# After training
	self.app.generate_and_train(500, 0.1, 0.7, 0.15, 0.15)
	assert self.app.is_model_trained

	# Check that data is stored
	assert hasattr(self.app, "train_data")
	assert hasattr(self.app, "val_data")
	assert hasattr(self.app, "test_data")

	# Check data sizes
	assert len(self.app.train_data) > 0
	assert len(self.app.val_data) > 0
	assert len(self.app.test_data) > 0