|
|
| import os
|
| import time
|
| import signal
|
| import logging
|
| import threading
|
|
|
| logger = logging.getLogger(__name__)
|
|
|
| class EmergencyAbort:
|
| """Creates an abort file that can be touched to trigger process termination."""
|
|
|
| def __init__(self, abort_file="emergency_abort.txt", check_interval=5):
|
| self.abort_file = abort_file
|
| self.check_interval = check_interval
|
| self.running = False
|
| self.thread = None
|
|
|
|
|
| with open(self.abort_file, 'w') as f:
|
| f.write("# Emergency Abort File\n")
|
| f.write("# To abort training, update the timestamp of this file\n")
|
| f.write(f"# Last checked: {time.ctime()}\n")
|
|
|
| def _check_file(self):
|
| last_modified = os.path.getmtime(self.abort_file)
|
|
|
| while self.running:
|
| time.sleep(self.check_interval)
|
|
|
| try:
|
| current_modified = os.path.getmtime(self.abort_file)
|
|
|
| if current_modified > last_modified:
|
| logger.warning("Emergency abort file modified! Initiating abort sequence.")
|
|
|
| os.kill(os.getpid(), signal.SIGTERM)
|
| return
|
|
|
|
|
| with open(self.abort_file, 'w') as f:
|
| f.write("# Emergency Abort File\n")
|
| f.write("# To abort training, update the timestamp of this file\n")
|
| f.write(f"# Last checked: {time.ctime()}\n")
|
|
|
| last_modified = current_modified
|
|
|
| except Exception as e:
|
| logger.error(f"Error checking abort file: {e}")
|
|
|
| def start(self):
|
| """Start the abort file monitor."""
|
| self.running = True
|
| self.thread = threading.Thread(target=self._check_file)
|
| self.thread.daemon = True
|
| self.thread.start()
|
| logger.info(f"Emergency abort monitor started. Modify {self.abort_file} to terminate training.")
|
| return self
|
|
|
| def stop(self):
|
| """Stop the abort file monitor."""
|
| self.running = False
|
| if self.thread:
|
| self.thread.join(timeout=2)
|
| logger.info("Emergency abort monitor stopped.")
|
|
|