File size: 4,721 Bytes
07846a9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | #!/usr/bin/env python3
"""
Autonomous Self-Healing Service — Background daemon
Runs healer.py as a persistent service with logging and alerting.
Usage:
python healer_service.py start # Start daemon
python healer_service.py stop # Stop daemon
python healer_service.py status # Check status
python healer_service.py logs # View logs
"""
import os
import sys
import time
import json
import signal
import subprocess
import argparse
from datetime import datetime
PID_FILE = "/tmp/healer_daemon.pid"
LOG_FILE = "/tmp/healer_daemon.log"
REPORT_FILE = "/tmp/healer_last_report.json"
def start_daemon():
"""Start the healer as a background process."""
if os.path.exists(PID_FILE):
with open(PID_FILE) as f:
old_pid = f.read().strip()
if os.path.exists(f"/proc/{old_pid}"):
print(f"[Service] Healer already running (PID {old_pid})")
return
else:
os.remove(PID_FILE)
# Fork to background
pid = os.fork()
if pid > 0:
print(f"[Service] Healer daemon started (PID {pid})")
with open(PID_FILE, 'w') as f:
f.write(str(pid))
return
# Child process
os.setsid()
sys.stdout = open(LOG_FILE, 'a')
sys.stderr = open(LOG_FILE, 'a')
print(f"\n{'='*60}")
print(f"[Service] Daemon started at {datetime.utcnow().isoformat()}")
print(f"{'='*60}\n")
# Import and run healer
from healer import SpaceHealer, HEALER_CONFIG
healer = SpaceHealer()
spaces = healer.discover_spaces()
print(f"[Service] Monitoring {len(spaces)} spaces")
print(f"[Service] Poll interval: {HEALER_CONFIG['poll_interval_seconds']}s")
def handle_signal(signum, frame):
print(f"[Service] Received signal {signum}, shutting down...")
sys.exit(0)
signal.signal(signal.SIGTERM, handle_signal)
signal.signal(signal.SIGINT, handle_signal)
while True:
try:
healer.run_cycle(spaces)
healer.print_report()
# Save report for external access
report = healer.generate_report()
with open(REPORT_FILE, 'w') as f:
json.dump(report, f, indent=2)
except Exception as e:
print(f"[Service] ❌ Cycle error: {e}")
time.sleep(HEALER_CONFIG["poll_interval_seconds"])
def stop_daemon():
"""Stop the healer daemon."""
if not os.path.exists(PID_FILE):
print("[Service] Healer not running")
return
with open(PID_FILE) as f:
pid = f.read().strip()
try:
os.kill(int(pid), signal.SIGTERM)
os.remove(PID_FILE)
print(f"[Service] Healer stopped (PID {pid})")
except ProcessLookupError:
os.remove(PID_FILE)
print("[Service] Healer was not running (stale PID file)")
except Exception as e:
print(f"[Service] Error stopping: {e}")
def check_status():
"""Check if daemon is running."""
if not os.path.exists(PID_FILE):
print("[Service] Healer: STOPPED")
return
with open(PID_FILE) as f:
pid = f.read().strip()
if os.path.exists(f"/proc/{pid}"):
print(f"[Service] Healer: RUNNING (PID {pid})")
if os.path.exists(REPORT_FILE):
with open(REPORT_FILE) as f:
report = json.load(f)
print(f" Last check: {report.get('generated_at', 'unknown')}")
print(f" Spaces monitored: {len(report.get('spaces', []))}")
print(f" Actions today: {report.get('actions_today', 0)}")
print(f" Est. daily cost: ${report.get('total_estimated_daily_cost', 0):.2f}")
else:
print(f"[Service] Healer: STOPPED (stale PID {pid})")
os.remove(PID_FILE)
def view_logs(lines: int = 50):
"""View recent daemon logs."""
if not os.path.exists(LOG_FILE):
print("[Service] No logs found")
return
with open(LOG_FILE) as f:
all_lines = f.readlines()
for line in all_lines[-lines:]:
print(line.rstrip())
def main():
parser = argparse.ArgumentParser(description="Healer Service Manager")
parser.add_argument("command", choices=["start", "stop", "status", "logs", "restart"])
parser.add_argument("--lines", type=int, default=50, help="Lines of log to show")
args = parser.parse_args()
if args.command == "start":
start_daemon()
elif args.command == "stop":
stop_daemon()
elif args.command == "restart":
stop_daemon()
time.sleep(1)
start_daemon()
elif args.command == "status":
check_status()
elif args.command == "logs":
view_logs(args.lines)
if __name__ == "__main__":
main()
|