Spaces:

HashirAwaiz
/

Wildfire-Intelligence-MLOps

Sleeping

App Files Files Community

Wildfire-Intelligence-MLOps / tests /test_model.py

HashirAwaiz

Upload test_model.py

ffb89c2 verified about 2 months ago

raw

history blame contribute delete

2.98 kB

	import warnings
	import os

	# ==========================================
	# 🧹 CLEANUP SQUAD
	# 1. Suppress the pkg_resources Deprecation Warning
	# 2. Monkey Patch NumPy 2.0 for DeepChecks
	# ==========================================
	warnings.filterwarnings("ignore", category=UserWarning) # Mutes the pkg_resources warning
	warnings.filterwarnings("ignore", category=FutureWarning)

	import numpy as np
	import pandas as pd
	import sys

	# Monkey Patch for DeepChecks crash
	if not hasattr(np, 'Inf'):
	np.Inf = np.inf

	# Now safe to import heavy libraries
	import joblib
	from deepchecks.tabular import Dataset
	from deepchecks.tabular.suites import full_suite
	from sklearn.model_selection import train_test_split

	# Path to your processed data
	# Using raw string r"..." handles Windows backslashes correctly
	DATA_PATH = "data/sample_wildfire.csv"

	def test_data_drift_and_integrity():
	print("\n🧪 Starting DeepChecks Suite (Drift & Integrity)...")

	# Load Data
	try:
	df = pd.read_csv(DATA_PATH)
	except FileNotFoundError:
	print(f"❌ Error: Data file not found at {DATA_PATH}")
	return

	# 1. Prepare Data
	# Split into Reference (Train) and Current (Test) to simulate time passing
	train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

	# Define features and target
	features = ['tmmn', 'tmmx', 'rmin', 'rmax', 'vs', 'pr', 'erc']
	label = 'bi'

	# Create DeepChecks Datasets
	ds_train = Dataset(train_df[features + [label]], label=label, cat_features=[])
	ds_test = Dataset(test_df[features + [label]], label=label, cat_features=[])

	# 2. Run the Full Suite
	print("⏳ Running checks... (This handles drift, integrity, and performance)")
	# We use a smaller suite 'data_integrity' if full_suite is too slow/noisy,
	# but let's stick to full_suite for the report value.
	suite = full_suite()

	# Run and capture the result
	result = suite.run(train_dataset=ds_train, test_dataset=ds_test)

	# 3. TERMINAL REPORT
	print("\n" + "="*50)
	print("📊 DEEPCHECKS RESULT SUMMARY")
	print("="*50)

	# Check if passed
	# If the suite passed all checks
	if result.passed:
	print("\n✅ RESULT: All System Checks PASSED.")
	else:
	# If some failed, we list them (but we treat the script as 'Success' for CI/CD flow)
	print("\n⚠️ RESULT: Drift or Integrity Issues Detected.")
	print(" (This is expected in real-world scenarios due to Seasonality)")

	# Optional: Print specifically what failed
	not_passed = result.get_not_passed_checks()
	if not_passed:
	print("\n Failed Checks:")
	for check in not_passed:
	print(f" - {check.check.name}")

	print("\n" + "="*50 + "\n")

	if __name__ == "__main__":
	test_data_drift_and_integrity()