Spaces:

COINjecture
/

Mu

Sleeping

beanapologist commited on 9 days ago

Commit

549100e

1 Parent(s): 4f67ea2

Reduce GPU to 3min/15steps to fit remaining Zero GPU quota

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ def download_data():
     return os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model")
-@spaces.GPU(duration=600)  # 10 minutes
 def train_model_gpu():
     """GPU training after data is ready"""
     log = []
@@ -39,7 +39,7 @@ def train_model_gpu():
         env.update({
             "NUM_LAYERS": "3",       # Small but reasonable
             "MODEL_DIM": "96",       # Fits in GPU memory
-            "MAX_STEPS": "30",       # Quick proof of concept
             "MICRO_BATCH_SIZE": "4", # Reasonable batch
             "SEQ_LENGTH": "256",     # Moderate sequences
             "TOKENIZER_PATH": "pg-data/data/tokenizers/fineweb_1024_bpe.model",
@@ -47,7 +47,7 @@ def train_model_gpu():
         })
         yield log_step("🚀 GPU activated - starting training")
-        yield log_step("Config: 3 layers, 96-dim, 30 steps (GPU optimized)")
         yield log_step("=" * 60)
         process = subprocess.Popen(
@@ -112,7 +112,7 @@ with gr.Blocks(title="μ⁸ Kernel") as demo:
     - **δ_S = 1+√2 ≈ 2.414** silver MLP expansion
     - **μ⁸ = 1** eight-cycle attention
-    **Zero GPU enabled** - downloads data on CPU, then activates GPU for training (3L/96d/30 steps).
     """)
     btn = gr.Button("🚀 Start Training", variant="primary", size="lg")

     return os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model")
+@spaces.GPU(duration=180)  # 3 minutes (fits remaining quota)
 def train_model_gpu():
     """GPU training after data is ready"""
     log = []
         env.update({
             "NUM_LAYERS": "3",       # Small but reasonable
             "MODEL_DIM": "96",       # Fits in GPU memory
+            "MAX_STEPS": "15",       # Fast run for remaining quota
             "MICRO_BATCH_SIZE": "4", # Reasonable batch
             "SEQ_LENGTH": "256",     # Moderate sequences
             "TOKENIZER_PATH": "pg-data/data/tokenizers/fineweb_1024_bpe.model",
         })
         yield log_step("🚀 GPU activated - starting training")
+        yield log_step("Config: 3 layers, 96-dim, 15 steps (fits remaining quota)")
         yield log_step("=" * 60)
         process = subprocess.Popen(
     - **δ_S = 1+√2 ≈ 2.414** silver MLP expansion
     - **μ⁸ = 1** eight-cycle attention
+    **Zero GPU enabled** - downloads data on CPU, then activates GPU for training (3L/96d/15 steps, 3 min).
     """)
     btn = gr.Button("🚀 Start Training", variant="primary", size="lg")