conversation-memory / healer_service.py
ScottzillaSystems's picture
Upload healer_service.py with huggingface_hub
07846a9 verified
#!/usr/bin/env python3
"""
Autonomous Self-Healing Service — Background daemon
Runs healer.py as a persistent service with logging and alerting.
Usage:
python healer_service.py start # Start daemon
python healer_service.py stop # Stop daemon
python healer_service.py status # Check status
python healer_service.py logs # View logs
"""
import os
import sys
import time
import json
import signal
import subprocess
import argparse
from datetime import datetime
PID_FILE = "/tmp/healer_daemon.pid"
LOG_FILE = "/tmp/healer_daemon.log"
REPORT_FILE = "/tmp/healer_last_report.json"
def start_daemon():
"""Start the healer as a background process."""
if os.path.exists(PID_FILE):
with open(PID_FILE) as f:
old_pid = f.read().strip()
if os.path.exists(f"/proc/{old_pid}"):
print(f"[Service] Healer already running (PID {old_pid})")
return
else:
os.remove(PID_FILE)
# Fork to background
pid = os.fork()
if pid > 0:
print(f"[Service] Healer daemon started (PID {pid})")
with open(PID_FILE, 'w') as f:
f.write(str(pid))
return
# Child process
os.setsid()
sys.stdout = open(LOG_FILE, 'a')
sys.stderr = open(LOG_FILE, 'a')
print(f"\n{'='*60}")
print(f"[Service] Daemon started at {datetime.utcnow().isoformat()}")
print(f"{'='*60}\n")
# Import and run healer
from healer import SpaceHealer, HEALER_CONFIG
healer = SpaceHealer()
spaces = healer.discover_spaces()
print(f"[Service] Monitoring {len(spaces)} spaces")
print(f"[Service] Poll interval: {HEALER_CONFIG['poll_interval_seconds']}s")
def handle_signal(signum, frame):
print(f"[Service] Received signal {signum}, shutting down...")
sys.exit(0)
signal.signal(signal.SIGTERM, handle_signal)
signal.signal(signal.SIGINT, handle_signal)
while True:
try:
healer.run_cycle(spaces)
healer.print_report()
# Save report for external access
report = healer.generate_report()
with open(REPORT_FILE, 'w') as f:
json.dump(report, f, indent=2)
except Exception as e:
print(f"[Service] ❌ Cycle error: {e}")
time.sleep(HEALER_CONFIG["poll_interval_seconds"])
def stop_daemon():
"""Stop the healer daemon."""
if not os.path.exists(PID_FILE):
print("[Service] Healer not running")
return
with open(PID_FILE) as f:
pid = f.read().strip()
try:
os.kill(int(pid), signal.SIGTERM)
os.remove(PID_FILE)
print(f"[Service] Healer stopped (PID {pid})")
except ProcessLookupError:
os.remove(PID_FILE)
print("[Service] Healer was not running (stale PID file)")
except Exception as e:
print(f"[Service] Error stopping: {e}")
def check_status():
"""Check if daemon is running."""
if not os.path.exists(PID_FILE):
print("[Service] Healer: STOPPED")
return
with open(PID_FILE) as f:
pid = f.read().strip()
if os.path.exists(f"/proc/{pid}"):
print(f"[Service] Healer: RUNNING (PID {pid})")
if os.path.exists(REPORT_FILE):
with open(REPORT_FILE) as f:
report = json.load(f)
print(f" Last check: {report.get('generated_at', 'unknown')}")
print(f" Spaces monitored: {len(report.get('spaces', []))}")
print(f" Actions today: {report.get('actions_today', 0)}")
print(f" Est. daily cost: ${report.get('total_estimated_daily_cost', 0):.2f}")
else:
print(f"[Service] Healer: STOPPED (stale PID {pid})")
os.remove(PID_FILE)
def view_logs(lines: int = 50):
"""View recent daemon logs."""
if not os.path.exists(LOG_FILE):
print("[Service] No logs found")
return
with open(LOG_FILE) as f:
all_lines = f.readlines()
for line in all_lines[-lines:]:
print(line.rstrip())
def main():
parser = argparse.ArgumentParser(description="Healer Service Manager")
parser.add_argument("command", choices=["start", "stop", "status", "logs", "restart"])
parser.add_argument("--lines", type=int, default=50, help="Lines of log to show")
args = parser.parse_args()
if args.command == "start":
start_daemon()
elif args.command == "stop":
stop_daemon()
elif args.command == "restart":
stop_daemon()
time.sleep(1)
start_daemon()
elif args.command == "status":
check_status()
elif args.command == "logs":
view_logs(args.lines)
if __name__ == "__main__":
main()