beanapologist commited on
Commit
549100e
·
1 Parent(s): 4f67ea2

Reduce GPU to 3min/15steps to fit remaining Zero GPU quota

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -24,7 +24,7 @@ def download_data():
24
 
25
  return os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model")
26
 
27
- @spaces.GPU(duration=600) # 10 minutes
28
  def train_model_gpu():
29
  """GPU training after data is ready"""
30
  log = []
@@ -39,7 +39,7 @@ def train_model_gpu():
39
  env.update({
40
  "NUM_LAYERS": "3", # Small but reasonable
41
  "MODEL_DIM": "96", # Fits in GPU memory
42
- "MAX_STEPS": "30", # Quick proof of concept
43
  "MICRO_BATCH_SIZE": "4", # Reasonable batch
44
  "SEQ_LENGTH": "256", # Moderate sequences
45
  "TOKENIZER_PATH": "pg-data/data/tokenizers/fineweb_1024_bpe.model",
@@ -47,7 +47,7 @@ def train_model_gpu():
47
  })
48
 
49
  yield log_step("🚀 GPU activated - starting training")
50
- yield log_step("Config: 3 layers, 96-dim, 30 steps (GPU optimized)")
51
  yield log_step("=" * 60)
52
 
53
  process = subprocess.Popen(
@@ -112,7 +112,7 @@ with gr.Blocks(title="μ⁸ Kernel") as demo:
112
  - **δ_S = 1+√2 ≈ 2.414** silver MLP expansion
113
  - **μ⁸ = 1** eight-cycle attention
114
 
115
- **Zero GPU enabled** - downloads data on CPU, then activates GPU for training (3L/96d/30 steps).
116
  """)
117
 
118
  btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
 
24
 
25
  return os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model")
26
 
27
+ @spaces.GPU(duration=180) # 3 minutes (fits remaining quota)
28
  def train_model_gpu():
29
  """GPU training after data is ready"""
30
  log = []
 
39
  env.update({
40
  "NUM_LAYERS": "3", # Small but reasonable
41
  "MODEL_DIM": "96", # Fits in GPU memory
42
+ "MAX_STEPS": "15", # Fast run for remaining quota
43
  "MICRO_BATCH_SIZE": "4", # Reasonable batch
44
  "SEQ_LENGTH": "256", # Moderate sequences
45
  "TOKENIZER_PATH": "pg-data/data/tokenizers/fineweb_1024_bpe.model",
 
47
  })
48
 
49
  yield log_step("🚀 GPU activated - starting training")
50
+ yield log_step("Config: 3 layers, 96-dim, 15 steps (fits remaining quota)")
51
  yield log_step("=" * 60)
52
 
53
  process = subprocess.Popen(
 
112
  - **δ_S = 1+√2 ≈ 2.414** silver MLP expansion
113
  - **μ⁸ = 1** eight-cycle attention
114
 
115
+ **Zero GPU enabled** - downloads data on CPU, then activates GPU for training (3L/96d/15 steps, 3 min).
116
  """)
117
 
118
  btn = gr.Button("🚀 Start Training", variant="primary", size="lg")