Spaces:
Sleeping
Sleeping
beanapologist commited on
Commit ·
549100e
1
Parent(s): 4f67ea2
Reduce GPU to 3min/15steps to fit remaining Zero GPU quota
Browse files
app.py
CHANGED
|
@@ -24,7 +24,7 @@ def download_data():
|
|
| 24 |
|
| 25 |
return os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model")
|
| 26 |
|
| 27 |
-
@spaces.GPU(duration=
|
| 28 |
def train_model_gpu():
|
| 29 |
"""GPU training after data is ready"""
|
| 30 |
log = []
|
|
@@ -39,7 +39,7 @@ def train_model_gpu():
|
|
| 39 |
env.update({
|
| 40 |
"NUM_LAYERS": "3", # Small but reasonable
|
| 41 |
"MODEL_DIM": "96", # Fits in GPU memory
|
| 42 |
-
"MAX_STEPS": "
|
| 43 |
"MICRO_BATCH_SIZE": "4", # Reasonable batch
|
| 44 |
"SEQ_LENGTH": "256", # Moderate sequences
|
| 45 |
"TOKENIZER_PATH": "pg-data/data/tokenizers/fineweb_1024_bpe.model",
|
|
@@ -47,7 +47,7 @@ def train_model_gpu():
|
|
| 47 |
})
|
| 48 |
|
| 49 |
yield log_step("🚀 GPU activated - starting training")
|
| 50 |
-
yield log_step("Config: 3 layers, 96-dim,
|
| 51 |
yield log_step("=" * 60)
|
| 52 |
|
| 53 |
process = subprocess.Popen(
|
|
@@ -112,7 +112,7 @@ with gr.Blocks(title="μ⁸ Kernel") as demo:
|
|
| 112 |
- **δ_S = 1+√2 ≈ 2.414** silver MLP expansion
|
| 113 |
- **μ⁸ = 1** eight-cycle attention
|
| 114 |
|
| 115 |
-
**Zero GPU enabled** - downloads data on CPU, then activates GPU for training (3L/96d/
|
| 116 |
""")
|
| 117 |
|
| 118 |
btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
|
|
|
|
| 24 |
|
| 25 |
return os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model")
|
| 26 |
|
| 27 |
+
@spaces.GPU(duration=180) # 3 minutes (fits remaining quota)
|
| 28 |
def train_model_gpu():
|
| 29 |
"""GPU training after data is ready"""
|
| 30 |
log = []
|
|
|
|
| 39 |
env.update({
|
| 40 |
"NUM_LAYERS": "3", # Small but reasonable
|
| 41 |
"MODEL_DIM": "96", # Fits in GPU memory
|
| 42 |
+
"MAX_STEPS": "15", # Fast run for remaining quota
|
| 43 |
"MICRO_BATCH_SIZE": "4", # Reasonable batch
|
| 44 |
"SEQ_LENGTH": "256", # Moderate sequences
|
| 45 |
"TOKENIZER_PATH": "pg-data/data/tokenizers/fineweb_1024_bpe.model",
|
|
|
|
| 47 |
})
|
| 48 |
|
| 49 |
yield log_step("🚀 GPU activated - starting training")
|
| 50 |
+
yield log_step("Config: 3 layers, 96-dim, 15 steps (fits remaining quota)")
|
| 51 |
yield log_step("=" * 60)
|
| 52 |
|
| 53 |
process = subprocess.Popen(
|
|
|
|
| 112 |
- **δ_S = 1+√2 ≈ 2.414** silver MLP expansion
|
| 113 |
- **μ⁸ = 1** eight-cycle attention
|
| 114 |
|
| 115 |
+
**Zero GPU enabled** - downloads data on CPU, then activates GPU for training (3L/96d/15 steps, 3 min).
|
| 116 |
""")
|
| 117 |
|
| 118 |
btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
|