AdriBat1
Add Tower of Babel (V3) experiment: 120-layer, gradient monitoring, val loss
938275a
import os
import sys
import time
from antigravity_sdk.client import RemoteGPU
SCRIPT_PATH = "examples/deep_nanogpt_v3.py"
MAX_LOOPS = 30 # 400 steps / 25 per chunk ~ 16 loops, extra buffer
def main():
if not os.path.exists(SCRIPT_PATH):
print(f"❌ Script not found: {SCRIPT_PATH}")
sys.exit(1)
with open(SCRIPT_PATH, 'r') as f:
code = f.read()
print("πŸ”οΈ Tower of Babel Experiment: 120-Layer Deep-NanoGPT")
print("πŸ“Š Features: Gradient Norm Tracking, Val Loss, Resumable")
print("-" * 50)
gpu = RemoteGPU()
for i in range(MAX_LOOPS):
print(f"\nπŸŒ€ Loop {i+1}/{MAX_LOOPS}...")
result = gpu.run(code, download_files=True, verbose=True)
output = result.output
if "TRAINING_COMPLETE" in output:
print("\nβœ… Training Finished!")
break
elif "CONTINUE_TRAINING" in output:
print("⏳ Chunk complete. Resuming...")
time.sleep(2)
elif "FATAL" in output:
print("❌ Fatal Error. Stopping.")
break
else:
print("⚠️ Unknown status. Stopping safely.")
break
if os.path.exists("dashboard.png"):
print("\nπŸ“Š Success! Saved dashboard.png")
if __name__ == "__main__":
main()