Spaces:
Running on Zero
Running on Zero
File size: 4,193 Bytes
83ad093 15771aa d7c4afb 4f67ea2 83ad093 4f67ea2 549100e 4f67ea2 15771aa 4f67ea2 85c4144 4f67ea2 549100e 4f67ea2 d7c4afb 85c4144 15771aa 4f67ea2 549100e 4f67ea2 85c4144 15771aa 85c4144 15771aa 85c4144 150301a 85c4144 150301a 85c4144 15771aa 85c4144 15771aa 4f67ea2 85c4144 15771aa 85c4144 15771aa 150301a 4f67ea2 549100e 15771aa 85c4144 150301a 15771aa 85c4144 83ad093 85c4144 83ad093 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | import gradio as gr
import subprocess
import sys
import os
import spaces
def download_data():
"""Download data outside GPU context to save GPU time"""
# Download parameter-golf repo if needed
if not os.path.exists("pg-data"):
subprocess.run(
["git", "clone", "--depth", "1", "https://github.com/openai/parameter-golf", "pg-data"],
capture_output=True, timeout=120
)
# Download tokenizer if not present
if not os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model"):
os.chdir("pg-data")
subprocess.run(
[sys.executable, "data/cached_challenge_fineweb.py", "--variant", "sp1024", "--train-shards", "1"],
capture_output=True, timeout=180
)
os.chdir("..")
return os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model")
@spaces.GPU(duration=180) # 3 minutes (fits remaining quota)
def train_model_gpu():
"""GPU training after data is ready"""
log = []
def log_step(msg):
log.append(msg)
return "\n".join(log)
try:
# Run training with GPU-optimized config
env = os.environ.copy()
env.update({
"NUM_LAYERS": "3", # Small but reasonable
"MODEL_DIM": "96", # Fits in GPU memory
"MAX_STEPS": "15", # Fast run for remaining quota
"MICRO_BATCH_SIZE": "4", # Reasonable batch
"SEQ_LENGTH": "256", # Moderate sequences
"TOKENIZER_PATH": "pg-data/data/tokenizers/fineweb_1024_bpe.model",
"DATA_PATH": "pg-data/data/datasets/fineweb10B_sp1024"
})
yield log_step("🚀 GPU activated - starting training")
yield log_step("Config: 3 layers, 96-dim, 15 steps (fits remaining quota)")
yield log_step("=" * 60)
process = subprocess.Popen(
[sys.executable, "train_gpt_kernel.py"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
env=env
)
for line in iter(process.stdout.readline, ''):
if line:
yield log_step(line.rstrip())
process.wait()
yield log_step("=" * 60)
if process.returncode == 0:
yield log_step("✅ Training complete!")
else:
yield log_step(f"⚠️ Exit code {process.returncode}")
except Exception as e:
yield log_step(f"❌ Error: {str(e)}")
def train_model():
"""Main entry point - prepares data then runs GPU training"""
log = []
def log_step(msg):
log.append(msg)
return "\n".join(log)
try:
yield log_step("🔄 Installing dependencies...")
subprocess.run([sys.executable, "-m", "pip", "install", "-q", "torch", "numpy", "tiktoken", "sentencepiece", "tqdm", "requests"], timeout=300)
yield log_step("✅ Dependencies ready")
yield log_step("🔄 Preparing data (outside GPU to save time)...")
data_ready = download_data()
if not data_ready:
yield log_step("❌ Data download failed")
return
yield log_step("✅ Data ready - activating GPU...")
# Now run the GPU part
for msg in train_model_gpu():
yield msg
except Exception as e:
yield log_step(f"❌ Error: {str(e)}")
with gr.Blocks(title="μ⁸ Kernel") as demo:
gr.Markdown("""
# μ⁸ Kernel Training - Parameter Golf
Formally verified LM architecture (464 Lean 4 proofs):
- **C(r) = 2r/(1+r²)** coherence activation
- **δ_S = 1+√2 ≈ 2.414** silver MLP expansion
- **μ⁸ = 1** eight-cycle attention
**Zero GPU enabled** - downloads data on CPU, then activates GPU for training (3L/96d/15 steps, 3 min).
""")
btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
out = gr.Textbox(label="Training Log", lines=25, max_lines=40, autoscroll=True)
btn.click(fn=train_model, outputs=out)
demo.queue()
demo.launch()
|