File size: 1,531 Bytes
671ce97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
import sys
import time
from antigravity_sdk.client import RemoteGPU

# Config
SCRIPT_PATH = "examples/deep_nanogpt_resumable.py"
MAX_LOOPS = 20

def main():
    if not os.path.exists(SCRIPT_PATH):
        print(f"โŒ Script not found: {SCRIPT_PATH}")
        sys.exit(1)

    with open(SCRIPT_PATH, 'r') as f:
        code = f.read()

    print(f"๐Ÿš€ Launching Deep-NanoGPT Phase 2 (Resumable Training)...")
    gpu = RemoteGPU()
    
    for i in range(MAX_LOOPS):
        print(f"\n๐ŸŒ€ Loop {i+1}/{MAX_LOOPS}...")
        
        # Determine if we should download files (only on last likely step, or check output)
        # We'll enable download always, but the script only copies them to cwd at the end.
        result = gpu.run(code, download_files=True, verbose=True)
        
        output = result.output
        
        if "TRAINING_COMPLETE" in output:
            print("\nโœ… Training Finished!")
            break
            
        elif "CONTINUE_TRAINING" in output:
            print("โณ Chunk complete. Resuming next chunk...")
            time.sleep(2) # Breathing room
            
        elif "FATAL SCRIPT ERROR" in output:
             print("โŒ Fatal Error on server. Stopping.")
             break
             
        else:
             print("โš ๏ธ Unknown status. Stopping safely.")
             break
             
    # Final check
    if os.path.exists("comparison_loss_v2.png"):
        print("\nโœ… Success! Saved comparison_loss_v2.png")

if __name__ == "__main__":
    main()