import os import subprocess import time import sys import threading from http.server import HTTPServer, SimpleHTTPRequestHandler from huggingface_hub import HfApi import torch # Start a dummy HTTP server on 7860 so Hugging Face marks the space as 'Healthy'. def start_health_server(): class HealthCheckHandler(SimpleHTTPRequestHandler): def do_GET(self): self.send_response(200) self.end_headers() self.wfile.write(b"Training in progress...") server = HTTPServer(('0.0.0.0', 7860), HealthCheckHandler) server.serve_forever() threading.Thread(target=start_health_server, daemon=True).start() def log(msg): print(msg, flush=True) def main(): repo_id = "SParsh003/LifeOS-Trainer" api = HfApi(token=os.environ.get("HF_TOKEN")) log("===== STARTING TRAINING RUNNER =====") # RELIABLE GPU DETECTION using PyTorch is_gpu = torch.cuda.is_available() if is_gpu: gpu_name = torch.cuda.get_device_name(0) log(f"GPU DETECTED: {gpu_name}. Proceeding with training setup...") else: log("No GPU detected by PyTorch! Idling to prevent loops on CPU.") while True: time.sleep(3600) if os.path.exists("training_done.txt"): log("Training already completed. Idling.") while True: time.sleep(3600) log("Cloning repository...") os.system("git clone https://huggingface.co/spaces/SParsh003/LifeOS-Personal-Chaos-Agen LifeOS") os.chdir("LifeOS") log("Installing Unsloth, TRL, and dependencies (this takes a few minutes)...") os.system("pip install unsloth") os.system("pip install --no-deps trl peft accelerate bitsandbytes") os.system("pip install datasets transformers sentencepiece pydantic fastapi requests matplotlib") log("Starting GRPO Training (50 episodes)...") cmd = [ "python", "-m", "lifeos.training.train_grpo", "--real-gpu", "--episodes", "50", "--push-to-hub", "--hub-model-id", "SParsh003/LifeOS-Trained-Agent" ] # Run and stream output process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) for line in process.stdout: print(line, end="", flush=True) process.wait() log("Training complete!") os.chdir("..") with open("training_done.txt", "w") as f: f.write("done") log("DOWNGRADING HARDWARE TO CPU TO STOP BILLING...") try: api.request_space_hardware(repo_id=repo_id, hardware="cpu-basic") log("Hardware downgraded successfully.") except Exception as e: log(f"Failed to downgrade hardware: {e}") while True: time.sleep(3600) if __name__ == '__main__': main()