Spaces:
Running on Zero
Running on Zero
| import gradio as gr | |
| import subprocess | |
| import sys | |
| import os | |
| import spaces | |
| def download_data(): | |
| """Download data outside GPU context to save GPU time""" | |
| # Download parameter-golf repo if needed | |
| if not os.path.exists("pg-data"): | |
| subprocess.run( | |
| ["git", "clone", "--depth", "1", "https://github.com/openai/parameter-golf", "pg-data"], | |
| capture_output=True, timeout=120 | |
| ) | |
| # Download tokenizer if not present | |
| if not os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model"): | |
| os.chdir("pg-data") | |
| subprocess.run( | |
| [sys.executable, "data/cached_challenge_fineweb.py", "--variant", "sp1024", "--train-shards", "1"], | |
| capture_output=True, timeout=180 | |
| ) | |
| os.chdir("..") | |
| return os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model") | |
| # 3 minutes (fits remaining quota) | |
| def train_model_gpu(): | |
| """GPU training after data is ready""" | |
| log = [] | |
| def log_step(msg): | |
| log.append(msg) | |
| return "\n".join(log) | |
| try: | |
| # Run training with GPU-optimized config | |
| env = os.environ.copy() | |
| env.update({ | |
| "NUM_LAYERS": "3", # Small but reasonable | |
| "MODEL_DIM": "96", # Fits in GPU memory | |
| "MAX_STEPS": "15", # Fast run for remaining quota | |
| "MICRO_BATCH_SIZE": "4", # Reasonable batch | |
| "SEQ_LENGTH": "256", # Moderate sequences | |
| "TOKENIZER_PATH": "pg-data/data/tokenizers/fineweb_1024_bpe.model", | |
| "DATA_PATH": "pg-data/data/datasets/fineweb10B_sp1024" | |
| }) | |
| yield log_step("🚀 GPU activated - starting training") | |
| yield log_step("Config: 3 layers, 96-dim, 15 steps (fits remaining quota)") | |
| yield log_step("=" * 60) | |
| process = subprocess.Popen( | |
| [sys.executable, "train_gpt_kernel.py"], | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.STDOUT, | |
| text=True, | |
| bufsize=1, | |
| env=env | |
| ) | |
| for line in iter(process.stdout.readline, ''): | |
| if line: | |
| yield log_step(line.rstrip()) | |
| process.wait() | |
| yield log_step("=" * 60) | |
| if process.returncode == 0: | |
| yield log_step("✅ Training complete!") | |
| else: | |
| yield log_step(f"⚠️ Exit code {process.returncode}") | |
| except Exception as e: | |
| yield log_step(f"❌ Error: {str(e)}") | |
| def train_model(): | |
| """Main entry point - prepares data then runs GPU training""" | |
| log = [] | |
| def log_step(msg): | |
| log.append(msg) | |
| return "\n".join(log) | |
| try: | |
| yield log_step("🔄 Installing dependencies...") | |
| subprocess.run([sys.executable, "-m", "pip", "install", "-q", "torch", "numpy", "tiktoken", "sentencepiece", "tqdm", "requests"], timeout=300) | |
| yield log_step("✅ Dependencies ready") | |
| yield log_step("🔄 Preparing data (outside GPU to save time)...") | |
| data_ready = download_data() | |
| if not data_ready: | |
| yield log_step("❌ Data download failed") | |
| return | |
| yield log_step("✅ Data ready - activating GPU...") | |
| # Now run the GPU part | |
| for msg in train_model_gpu(): | |
| yield msg | |
| except Exception as e: | |
| yield log_step(f"❌ Error: {str(e)}") | |
| with gr.Blocks(title="μ⁸ Kernel") as demo: | |
| gr.Markdown(""" | |
| # μ⁸ Kernel Training - Parameter Golf | |
| Formally verified LM architecture (464 Lean 4 proofs): | |
| - **C(r) = 2r/(1+r²)** coherence activation | |
| - **δ_S = 1+√2 ≈ 2.414** silver MLP expansion | |
| - **μ⁸ = 1** eight-cycle attention | |
| **Zero GPU enabled** - downloads data on CPU, then activates GPU for training (3L/96d/15 steps, 3 min). | |
| """) | |
| btn = gr.Button("🚀 Start Training", variant="primary", size="lg") | |
| out = gr.Textbox(label="Training Log", lines=25, max_lines=40, autoscroll=True) | |
| btn.click(fn=train_model, outputs=out) | |
| demo.queue() | |
| demo.launch() | |