Spaces:

COINjecture
/

Mu

Running on Zero

Mu / app.py

beanapologist

Reduce GPU to 3min/15steps to fit remaining Zero GPU quota

549100e 5 days ago

4.19 kB

	import gradio as gr
	import subprocess
	import sys
	import os
	import spaces

	def download_data():
	"""Download data outside GPU context to save GPU time"""
	# Download parameter-golf repo if needed
	if not os.path.exists("pg-data"):
	subprocess.run(
	["git", "clone", "--depth", "1", "https://github.com/openai/parameter-golf", "pg-data"],
	capture_output=True, timeout=120
	)

	# Download tokenizer if not present
	if not os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model"):
	os.chdir("pg-data")
	subprocess.run(
	[sys.executable, "data/cached_challenge_fineweb.py", "--variant", "sp1024", "--train-shards", "1"],
	capture_output=True, timeout=180
	)
	os.chdir("..")

	return os.path.exists("pg-data/data/tokenizers/fineweb_1024_bpe.model")

	@spaces.GPU(duration=180) # 3 minutes (fits remaining quota)
	def train_model_gpu():
	"""GPU training after data is ready"""
	log = []

	def log_step(msg):
	log.append(msg)
	return "\n".join(log)

	try:
	# Run training with GPU-optimized config
	env = os.environ.copy()
	env.update({
	"NUM_LAYERS": "3", # Small but reasonable
	"MODEL_DIM": "96", # Fits in GPU memory
	"MAX_STEPS": "15", # Fast run for remaining quota
	"MICRO_BATCH_SIZE": "4", # Reasonable batch
	"SEQ_LENGTH": "256", # Moderate sequences
	"TOKENIZER_PATH": "pg-data/data/tokenizers/fineweb_1024_bpe.model",
	"DATA_PATH": "pg-data/data/datasets/fineweb10B_sp1024"
	})

	yield log_step("🚀 GPU activated - starting training")
	yield log_step("Config: 3 layers, 96-dim, 15 steps (fits remaining quota)")
	yield log_step("=" * 60)

	process = subprocess.Popen(
	[sys.executable, "train_gpt_kernel.py"],
	stdout=subprocess.PIPE,
	stderr=subprocess.STDOUT,
	text=True,
	bufsize=1,
	env=env
	)

	for line in iter(process.stdout.readline, ''):
	if line:
	yield log_step(line.rstrip())

	process.wait()

	yield log_step("=" * 60)
	if process.returncode == 0:
	yield log_step("✅ Training complete!")
	else:
	yield log_step(f"⚠️ Exit code {process.returncode}")

	except Exception as e:
	yield log_step(f"❌ Error: {str(e)}")

	def train_model():
	"""Main entry point - prepares data then runs GPU training"""
	log = []

	def log_step(msg):
	log.append(msg)
	return "\n".join(log)

	try:
	yield log_step("🔄 Installing dependencies...")
	subprocess.run([sys.executable, "-m", "pip", "install", "-q", "torch", "numpy", "tiktoken", "sentencepiece", "tqdm", "requests"], timeout=300)
	yield log_step("✅ Dependencies ready")

	yield log_step("🔄 Preparing data (outside GPU to save time)...")
	data_ready = download_data()

	if not data_ready:
	yield log_step("❌ Data download failed")
	return

	yield log_step("✅ Data ready - activating GPU...")

	# Now run the GPU part
	for msg in train_model_gpu():
	yield msg

	except Exception as e:
	yield log_step(f"❌ Error: {str(e)}")

	with gr.Blocks(title="μ⁸ Kernel") as demo:
	gr.Markdown("""
	# μ⁸ Kernel Training - Parameter Golf

	Formally verified LM architecture (464 Lean 4 proofs):
	- C(r) = 2r/(1+r²) coherence activation
	- δ_S = 1+√2 ≈ 2.414 silver MLP expansion
	- μ⁸ = 1 eight-cycle attention

	Zero GPU enabled - downloads data on CPU, then activates GPU for training (3L/96d/15 steps, 3 min).
	""")

	btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
	out = gr.Textbox(label="Training Log", lines=25, max_lines=40, autoscroll=True)

	btn.click(fn=train_model, outputs=out)

	demo.queue()
	demo.launch()