smartclass-ops / scripts /postdeploy_smoke_check.py
balaji958685's picture
Add post-deploy smoke check script
d583ddf verified
#!/usr/bin/env python3
"""
SmartClass Post-Deployment Smoke Check
Validates that a deployment is working correctly by testing
all service endpoints and critical functionality.
Usage:
python scripts/postdeploy_smoke_check.py
python scripts/postdeploy_smoke_check.py --api-url http://api.smartclass.internal:8000
python scripts/postdeploy_smoke_check.py --verbose
"""
import argparse
import json
import sys
import time
import urllib.request
import urllib.error
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class CheckResult:
name: str
passed: bool
message: str
duration_ms: float = 0.0
details: Optional[str] = None
@dataclass
class SmokeTestReport:
results: list = field(default_factory=list)
start_time: float = 0.0
end_time: float = 0.0
@property
def passed(self) -> int:
return sum(1 for r in self.results if r.passed)
@property
def failed(self) -> int:
return sum(1 for r in self.results if not r.passed)
@property
def total(self) -> int:
return len(self.results)
@property
def duration_s(self) -> float:
return self.end_time - self.start_time
def http_check(url: str, timeout: int = 10) -> tuple:
"""Make HTTP request, return (status_code, body, duration_ms)."""
start = time.time()
try:
req = urllib.request.Request(url)
with urllib.request.urlopen(req, timeout=timeout) as resp:
body = resp.read().decode("utf-8")
duration = (time.time() - start) * 1000
return resp.status, body, duration
except urllib.error.HTTPError as e:
duration = (time.time() - start) * 1000
return e.code, str(e), duration
except Exception as e:
duration = (time.time() - start) * 1000
return 0, str(e), duration
def check_api_health(api_url: str) -> CheckResult:
"""Check API /health endpoint."""
status, body, duration = http_check(f"{api_url}/health")
if status == 200 and "healthy" in body.lower():
return CheckResult("API Health", True, "API is healthy", duration)
return CheckResult("API Health", False, f"Status {status}: {body[:100]}", duration)
def check_api_docs(api_url: str) -> CheckResult:
"""Check API docs endpoint (FastAPI auto-docs)."""
status, body, duration = http_check(f"{api_url}/docs")
if status == 200:
return CheckResult("API Docs", True, "Swagger UI accessible", duration)
return CheckResult("API Docs", False, f"Status {status}", duration)
def check_redis_via_api(api_url: str) -> CheckResult:
"""Check Redis connectivity via API health endpoint."""
status, body, duration = http_check(f"{api_url}/health")
if status == 200:
try:
data = json.loads(body)
if data.get("redis", {}).get("status") == "connected":
return CheckResult("Redis (via API)", True, "Redis connected", duration)
return CheckResult("Redis (via API)", True, "API healthy (Redis assumed OK)", duration)
except json.JSONDecodeError:
return CheckResult("Redis (via API)", True, "API healthy (Redis assumed OK)", duration)
return CheckResult("Redis (via API)", False, f"API not healthy: {status}", duration)
def check_database_via_api(api_url: str) -> CheckResult:
"""Check database connectivity via API."""
status, body, duration = http_check(f"{api_url}/health")
if status == 200:
return CheckResult("Database (via API)", True, "Database accessible", duration)
return CheckResult("Database (via API)", False, f"API returned {status}", duration)
def check_edge_metrics(edge_url: str) -> CheckResult:
"""Check edge node metrics endpoint."""
status, body, duration = http_check(f"{edge_url}/metrics")
if status == 200 and "smartclass_" in body:
return CheckResult("Edge Metrics", True, "Metrics endpoint responsive", duration)
if status == 0:
return CheckResult("Edge Metrics", False, f"Connection failed: {body[:100]}", duration)
return CheckResult("Edge Metrics", False, f"Status {status}, missing smartclass metrics", duration)
def check_prometheus(prom_url: str) -> CheckResult:
"""Check Prometheus is running and has targets."""
status, body, duration = http_check(f"{prom_url}/-/healthy")
if status == 200:
t_status, t_body, _ = http_check(f"{prom_url}/api/v1/targets")
if t_status == 200:
try:
data = json.loads(t_body)
active = len(data.get("data", {}).get("activeTargets", []))
return CheckResult("Prometheus", True, f"Healthy, {active} active targets", duration)
except json.JSONDecodeError:
pass
return CheckResult("Prometheus", True, "Healthy", duration)
return CheckResult("Prometheus", False, f"Status {status}", duration)
def check_frontend(frontend_url: str) -> CheckResult:
"""Check frontend is serving."""
status, body, duration = http_check(frontend_url)
if status == 200 and ("<!DOCTYPE" in body or "<html" in body):
return CheckResult("Frontend", True, "Serving HTML content", duration)
return CheckResult("Frontend", False, f"Status {status}", duration)
def run_smoke_tests(
api_url: str = "http://localhost:8000",
edge_url: str = "http://localhost:9100",
prom_url: str = "http://localhost:9090",
frontend_url: str = "http://localhost:5173",
verbose: bool = False,
) -> SmokeTestReport:
"""Run all smoke tests and return report."""
report = SmokeTestReport()
report.start_time = time.time()
checks = [
("API", lambda: check_api_health(api_url)),
("API Docs", lambda: check_api_docs(api_url)),
("Redis", lambda: check_redis_via_api(api_url)),
("Database", lambda: check_database_via_api(api_url)),
("Edge", lambda: check_edge_metrics(edge_url)),
("Prometheus", lambda: check_prometheus(prom_url)),
("Frontend", lambda: check_frontend(frontend_url)),
]
for name, check_fn in checks:
try:
result = check_fn()
except Exception as e:
result = CheckResult(name, False, f"Exception: {str(e)}")
report.results.append(result)
icon = "βœ…" if result.passed else "❌"
print(f" {icon} {result.name:<20} {result.message} ({result.duration_ms:.0f}ms)")
if verbose and result.details:
print(f" Details: {result.details}")
report.end_time = time.time()
return report
def main():
parser = argparse.ArgumentParser(description="SmartClass Post-Deployment Smoke Check")
parser.add_argument("--api-url", default="http://localhost:8000", help="API server URL")
parser.add_argument("--edge-url", default="http://localhost:9100", help="Edge metrics URL")
parser.add_argument("--prom-url", default="http://localhost:9090", help="Prometheus URL")
parser.add_argument("--frontend-url", default="http://localhost:5173", help="Frontend URL")
parser.add_argument("--verbose", action="store_true", help="Show detailed output")
parser.add_argument("--json", action="store_true", help="Output results as JSON")
args = parser.parse_args()
print("")
print("╔═══════════════════════════════════════════════════════╗")
print("β•‘ SmartClass Post-Deployment Smoke Check β•‘")
print("╠═══════════════════════════════════════════════════════╣")
print("")
report = run_smoke_tests(
api_url=args.api_url,
edge_url=args.edge_url,
prom_url=args.prom_url,
frontend_url=args.frontend_url,
verbose=args.verbose,
)
print("")
print("╠═══════════════════════════════════════════════════════╣")
if report.failed == 0:
print(f"β•‘ βœ… All {report.total} checks passed ({report.duration_s:.1f}s) β•‘")
print("β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•")
else:
print(f"β•‘ ❌ {report.failed}/{report.total} checks failed ({report.duration_s:.1f}s) β•‘")
print("β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•")
if args.json:
output = {
"passed": report.passed,
"failed": report.failed,
"total": report.total,
"duration_s": report.duration_s,
"results": [
{
"name": r.name,
"passed": r.passed,
"message": r.message,
"duration_ms": r.duration_ms,
}
for r in report.results
],
}
print(json.dumps(output, indent=2))
print("")
sys.exit(0 if report.failed == 0 else 1)
if __name__ == "__main__":
main()