Spaces:
Paused
Paused
File size: 1,531 Bytes
671ce97 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | import os
import sys
import time
from antigravity_sdk.client import RemoteGPU
# Config
SCRIPT_PATH = "examples/deep_nanogpt_resumable.py"
MAX_LOOPS = 20
def main():
if not os.path.exists(SCRIPT_PATH):
print(f"โ Script not found: {SCRIPT_PATH}")
sys.exit(1)
with open(SCRIPT_PATH, 'r') as f:
code = f.read()
print(f"๐ Launching Deep-NanoGPT Phase 2 (Resumable Training)...")
gpu = RemoteGPU()
for i in range(MAX_LOOPS):
print(f"\n๐ Loop {i+1}/{MAX_LOOPS}...")
# Determine if we should download files (only on last likely step, or check output)
# We'll enable download always, but the script only copies them to cwd at the end.
result = gpu.run(code, download_files=True, verbose=True)
output = result.output
if "TRAINING_COMPLETE" in output:
print("\nโ
Training Finished!")
break
elif "CONTINUE_TRAINING" in output:
print("โณ Chunk complete. Resuming next chunk...")
time.sleep(2) # Breathing room
elif "FATAL SCRIPT ERROR" in output:
print("โ Fatal Error on server. Stopping.")
break
else:
print("โ ๏ธ Unknown status. Stopping safely.")
break
# Final check
if os.path.exists("comparison_loss_v2.png"):
print("\nโ
Success! Saved comparison_loss_v2.png")
if __name__ == "__main__":
main()
|