| |
| """ |
| Autonomous Self-Healing Service — Background daemon |
| Runs healer.py as a persistent service with logging and alerting. |
| |
| Usage: |
| python healer_service.py start # Start daemon |
| python healer_service.py stop # Stop daemon |
| python healer_service.py status # Check status |
| python healer_service.py logs # View logs |
| """ |
|
|
| import os |
| import sys |
| import time |
| import json |
| import signal |
| import subprocess |
| import argparse |
| from datetime import datetime |
|
|
| PID_FILE = "/tmp/healer_daemon.pid" |
| LOG_FILE = "/tmp/healer_daemon.log" |
| REPORT_FILE = "/tmp/healer_last_report.json" |
|
|
|
|
| def start_daemon(): |
| """Start the healer as a background process.""" |
| if os.path.exists(PID_FILE): |
| with open(PID_FILE) as f: |
| old_pid = f.read().strip() |
| if os.path.exists(f"/proc/{old_pid}"): |
| print(f"[Service] Healer already running (PID {old_pid})") |
| return |
| else: |
| os.remove(PID_FILE) |
|
|
| |
| pid = os.fork() |
| if pid > 0: |
| print(f"[Service] Healer daemon started (PID {pid})") |
| with open(PID_FILE, 'w') as f: |
| f.write(str(pid)) |
| return |
|
|
| |
| os.setsid() |
| sys.stdout = open(LOG_FILE, 'a') |
| sys.stderr = open(LOG_FILE, 'a') |
|
|
| print(f"\n{'='*60}") |
| print(f"[Service] Daemon started at {datetime.utcnow().isoformat()}") |
| print(f"{'='*60}\n") |
|
|
| |
| from healer import SpaceHealer, HEALER_CONFIG |
|
|
| healer = SpaceHealer() |
| spaces = healer.discover_spaces() |
|
|
| print(f"[Service] Monitoring {len(spaces)} spaces") |
| print(f"[Service] Poll interval: {HEALER_CONFIG['poll_interval_seconds']}s") |
|
|
| def handle_signal(signum, frame): |
| print(f"[Service] Received signal {signum}, shutting down...") |
| sys.exit(0) |
|
|
| signal.signal(signal.SIGTERM, handle_signal) |
| signal.signal(signal.SIGINT, handle_signal) |
|
|
| while True: |
| try: |
| healer.run_cycle(spaces) |
| healer.print_report() |
|
|
| |
| report = healer.generate_report() |
| with open(REPORT_FILE, 'w') as f: |
| json.dump(report, f, indent=2) |
|
|
| except Exception as e: |
| print(f"[Service] ❌ Cycle error: {e}") |
|
|
| time.sleep(HEALER_CONFIG["poll_interval_seconds"]) |
|
|
|
|
| def stop_daemon(): |
| """Stop the healer daemon.""" |
| if not os.path.exists(PID_FILE): |
| print("[Service] Healer not running") |
| return |
|
|
| with open(PID_FILE) as f: |
| pid = f.read().strip() |
|
|
| try: |
| os.kill(int(pid), signal.SIGTERM) |
| os.remove(PID_FILE) |
| print(f"[Service] Healer stopped (PID {pid})") |
| except ProcessLookupError: |
| os.remove(PID_FILE) |
| print("[Service] Healer was not running (stale PID file)") |
| except Exception as e: |
| print(f"[Service] Error stopping: {e}") |
|
|
|
|
| def check_status(): |
| """Check if daemon is running.""" |
| if not os.path.exists(PID_FILE): |
| print("[Service] Healer: STOPPED") |
| return |
|
|
| with open(PID_FILE) as f: |
| pid = f.read().strip() |
|
|
| if os.path.exists(f"/proc/{pid}"): |
| print(f"[Service] Healer: RUNNING (PID {pid})") |
| if os.path.exists(REPORT_FILE): |
| with open(REPORT_FILE) as f: |
| report = json.load(f) |
| print(f" Last check: {report.get('generated_at', 'unknown')}") |
| print(f" Spaces monitored: {len(report.get('spaces', []))}") |
| print(f" Actions today: {report.get('actions_today', 0)}") |
| print(f" Est. daily cost: ${report.get('total_estimated_daily_cost', 0):.2f}") |
| else: |
| print(f"[Service] Healer: STOPPED (stale PID {pid})") |
| os.remove(PID_FILE) |
|
|
|
|
| def view_logs(lines: int = 50): |
| """View recent daemon logs.""" |
| if not os.path.exists(LOG_FILE): |
| print("[Service] No logs found") |
| return |
|
|
| with open(LOG_FILE) as f: |
| all_lines = f.readlines() |
|
|
| for line in all_lines[-lines:]: |
| print(line.rstrip()) |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Healer Service Manager") |
| parser.add_argument("command", choices=["start", "stop", "status", "logs", "restart"]) |
| parser.add_argument("--lines", type=int, default=50, help="Lines of log to show") |
| args = parser.parse_args() |
|
|
| if args.command == "start": |
| start_daemon() |
| elif args.command == "stop": |
| stop_daemon() |
| elif args.command == "restart": |
| stop_daemon() |
| time.sleep(1) |
| start_daemon() |
| elif args.command == "status": |
| check_status() |
| elif args.command == "logs": |
| view_logs(args.lines) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|