#!/usr/bin/env python3 """ Autonomous Self-Healing Service — Background daemon Runs healer.py as a persistent service with logging and alerting. Usage: python healer_service.py start # Start daemon python healer_service.py stop # Stop daemon python healer_service.py status # Check status python healer_service.py logs # View logs """ import os import sys import time import json import signal import subprocess import argparse from datetime import datetime PID_FILE = "/tmp/healer_daemon.pid" LOG_FILE = "/tmp/healer_daemon.log" REPORT_FILE = "/tmp/healer_last_report.json" def start_daemon(): """Start the healer as a background process.""" if os.path.exists(PID_FILE): with open(PID_FILE) as f: old_pid = f.read().strip() if os.path.exists(f"/proc/{old_pid}"): print(f"[Service] Healer already running (PID {old_pid})") return else: os.remove(PID_FILE) # Fork to background pid = os.fork() if pid > 0: print(f"[Service] Healer daemon started (PID {pid})") with open(PID_FILE, 'w') as f: f.write(str(pid)) return # Child process os.setsid() sys.stdout = open(LOG_FILE, 'a') sys.stderr = open(LOG_FILE, 'a') print(f"\n{'='*60}") print(f"[Service] Daemon started at {datetime.utcnow().isoformat()}") print(f"{'='*60}\n") # Import and run healer from healer import SpaceHealer, HEALER_CONFIG healer = SpaceHealer() spaces = healer.discover_spaces() print(f"[Service] Monitoring {len(spaces)} spaces") print(f"[Service] Poll interval: {HEALER_CONFIG['poll_interval_seconds']}s") def handle_signal(signum, frame): print(f"[Service] Received signal {signum}, shutting down...") sys.exit(0) signal.signal(signal.SIGTERM, handle_signal) signal.signal(signal.SIGINT, handle_signal) while True: try: healer.run_cycle(spaces) healer.print_report() # Save report for external access report = healer.generate_report() with open(REPORT_FILE, 'w') as f: json.dump(report, f, indent=2) except Exception as e: print(f"[Service] ❌ Cycle error: {e}") time.sleep(HEALER_CONFIG["poll_interval_seconds"]) def stop_daemon(): """Stop the healer daemon.""" if not os.path.exists(PID_FILE): print("[Service] Healer not running") return with open(PID_FILE) as f: pid = f.read().strip() try: os.kill(int(pid), signal.SIGTERM) os.remove(PID_FILE) print(f"[Service] Healer stopped (PID {pid})") except ProcessLookupError: os.remove(PID_FILE) print("[Service] Healer was not running (stale PID file)") except Exception as e: print(f"[Service] Error stopping: {e}") def check_status(): """Check if daemon is running.""" if not os.path.exists(PID_FILE): print("[Service] Healer: STOPPED") return with open(PID_FILE) as f: pid = f.read().strip() if os.path.exists(f"/proc/{pid}"): print(f"[Service] Healer: RUNNING (PID {pid})") if os.path.exists(REPORT_FILE): with open(REPORT_FILE) as f: report = json.load(f) print(f" Last check: {report.get('generated_at', 'unknown')}") print(f" Spaces monitored: {len(report.get('spaces', []))}") print(f" Actions today: {report.get('actions_today', 0)}") print(f" Est. daily cost: ${report.get('total_estimated_daily_cost', 0):.2f}") else: print(f"[Service] Healer: STOPPED (stale PID {pid})") os.remove(PID_FILE) def view_logs(lines: int = 50): """View recent daemon logs.""" if not os.path.exists(LOG_FILE): print("[Service] No logs found") return with open(LOG_FILE) as f: all_lines = f.readlines() for line in all_lines[-lines:]: print(line.rstrip()) def main(): parser = argparse.ArgumentParser(description="Healer Service Manager") parser.add_argument("command", choices=["start", "stop", "status", "logs", "restart"]) parser.add_argument("--lines", type=int, default=50, help="Lines of log to show") args = parser.parse_args() if args.command == "start": start_daemon() elif args.command == "stop": stop_daemon() elif args.command == "restart": stop_daemon() time.sleep(1) start_daemon() elif args.command == "status": check_status() elif args.command == "logs": view_logs(args.lines) if __name__ == "__main__": main()