sage-t2i / start_training.py
itriedcoding's picture
Upload folder using huggingface_hub
2d7087a verified
Raw
History Blame Contribute Delete
678 Bytes
"""Start training in background and write to log file."""
import subprocess, sys, os
BASE = os.path.dirname(os.path.abspath(__file__))
log = os.path.join(BASE, "training_log.txt")
# Remove partial download so torchvision can re-download cleanly
partial = os.path.join(BASE, "training_data", "stl10_binary.tar.gz")
if os.path.exists(partial):
os.remove(partial)
proc = subprocess.Popen(
[sys.executable, "-u", os.path.join(BASE, "train_local.py")],
stdout=open(log, "w", buffering=1),
stderr=subprocess.STDOUT,
cwd=BASE
)
with open(os.path.join(BASE, "train.pid"), "w") as f:
f.write(str(proc.pid))
print(f"Training started, PID: {proc.pid}, log: {log}")