import gradio as gr import subprocess import sys import os import spaces def download_data(): """Download data outside GPU context to save GPU time""" # Download parameter-golf repo if needed if not os.path.exists("pg-data"): subprocess.run( ["git", "clone", "--depth", "1", "https://github.com/openai/parameter-golf", "pg-data"], capture_output=True, timeout=120 ) # Download tokenizer if not present if not os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model"): os.chdir("pg-data") subprocess.run( [sys.executable, "data/cached_challenge_fineweb.py", "--variant", "sp1024", "--train-shards", "1"], capture_output=True, timeout=180 ) os.chdir("..") return os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model") @spaces.GPU(duration=180) # 3 minutes (fits remaining quota) def train_model_gpu(): """GPU training after data is ready""" log = [] def log_step(msg): log.append(msg) return "\n".join(log) try: # Run training with GPU-optimized config env = os.environ.copy() env.update({ "NUM_LAYERS": "3", # Small but reasonable "MODEL_DIM": "96", # Fits in GPU memory "MAX_STEPS": "15", # Fast run for remaining quota "MICRO_BATCH_SIZE": "4", # Reasonable batch "SEQ_LENGTH": "256", # Moderate sequences "TOKENIZER_PATH": "pg-data/data/tokenizers/fineweb_1024_bpe.model", "DATA_PATH": "pg-data/data/datasets/fineweb10B_sp1024" }) yield log_step("🚀 GPU activated - starting training") yield log_step("Config: 3 layers, 96-dim, 15 steps (fits remaining quota)") yield log_step("=" * 60) process = subprocess.Popen( [sys.executable, "train_gpt_kernel.py"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, env=env ) for line in iter(process.stdout.readline, ''): if line: yield log_step(line.rstrip()) process.wait() yield log_step("=" * 60) if process.returncode == 0: yield log_step("✅ Training complete!") else: yield log_step(f"⚠️ Exit code {process.returncode}") except Exception as e: yield log_step(f"❌ Error: {str(e)}") def train_model(): """Main entry point - prepares data then runs GPU training""" log = [] def log_step(msg): log.append(msg) return "\n".join(log) try: yield log_step("🔄 Installing dependencies...") subprocess.run([sys.executable, "-m", "pip", "install", "-q", "torch", "numpy", "tiktoken", "sentencepiece", "tqdm", "requests"], timeout=300) yield log_step("✅ Dependencies ready") yield log_step("🔄 Preparing data (outside GPU to save time)...") data_ready = download_data() if not data_ready: yield log_step("❌ Data download failed") return yield log_step("✅ Data ready - activating GPU...") # Now run the GPU part for msg in train_model_gpu(): yield msg except Exception as e: yield log_step(f"❌ Error: {str(e)}") with gr.Blocks(title="μ⁸ Kernel") as demo: gr.Markdown(""" # μ⁸ Kernel Training - Parameter Golf Formally verified LM architecture (464 Lean 4 proofs): - **C(r) = 2r/(1+r²)** coherence activation - **δ_S = 1+√2 ≈ 2.414** silver MLP expansion - **μ⁸ = 1** eight-cycle attention **Zero GPU enabled** - downloads data on CPU, then activates GPU for training (3L/96d/15 steps, 3 min). """) btn = gr.Button("🚀 Start Training", variant="primary", size="lg") out = gr.Textbox(label="Training Log", lines=25, max_lines=40, autoscroll=True) btn.click(fn=train_model, outputs=out) demo.queue() demo.launch()