|
|
import warnings
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
warnings.filterwarnings("ignore", category=UserWarning)
|
|
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
|
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
import sys
|
|
|
|
|
|
|
|
|
if not hasattr(np, 'Inf'):
|
|
|
np.Inf = np.inf
|
|
|
|
|
|
|
|
|
import joblib
|
|
|
from deepchecks.tabular import Dataset
|
|
|
from deepchecks.tabular.suites import full_suite
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
|
|
|
|
|
|
|
DATA_PATH = "data/sample_wildfire.csv"
|
|
|
|
|
|
def test_data_drift_and_integrity():
|
|
|
print("\n🧪 Starting DeepChecks Suite (Drift & Integrity)...")
|
|
|
|
|
|
|
|
|
try:
|
|
|
df = pd.read_csv(DATA_PATH)
|
|
|
except FileNotFoundError:
|
|
|
print(f"❌ Error: Data file not found at {DATA_PATH}")
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
|
|
|
|
|
|
|
|
|
features = ['tmmn', 'tmmx', 'rmin', 'rmax', 'vs', 'pr', 'erc']
|
|
|
label = 'bi'
|
|
|
|
|
|
|
|
|
ds_train = Dataset(train_df[features + [label]], label=label, cat_features=[])
|
|
|
ds_test = Dataset(test_df[features + [label]], label=label, cat_features=[])
|
|
|
|
|
|
|
|
|
print("⏳ Running checks... (This handles drift, integrity, and performance)")
|
|
|
|
|
|
|
|
|
suite = full_suite()
|
|
|
|
|
|
|
|
|
result = suite.run(train_dataset=ds_train, test_dataset=ds_test)
|
|
|
|
|
|
|
|
|
print("\n" + "="*50)
|
|
|
print("📊 DEEPCHECKS RESULT SUMMARY")
|
|
|
print("="*50)
|
|
|
|
|
|
|
|
|
|
|
|
if result.passed:
|
|
|
print("\n✅ RESULT: All System Checks PASSED.")
|
|
|
else:
|
|
|
|
|
|
print("\n⚠️ RESULT: Drift or Integrity Issues Detected.")
|
|
|
print(" (This is expected in real-world scenarios due to Seasonality)")
|
|
|
|
|
|
|
|
|
not_passed = result.get_not_passed_checks()
|
|
|
if not_passed:
|
|
|
print("\n Failed Checks:")
|
|
|
for check in not_passed:
|
|
|
print(f" - {check.check.name}")
|
|
|
|
|
|
print("\n" + "="*50 + "\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
test_data_drift_and_integrity() |