Spaces:

md896
/

sql-debug-env

Running

App Files Files Community

sql-debug-env / colab_real_world.py

md896

Fix: Mock vllm and llm_blender to stabilize GRPOTrainer in HF Jobs environment

bc20ef9 13 days ago

raw

history blame contribute delete

3.88 kB

	# 🏆 SQL Debug Env: FINAL REAL-WORLD BRIDGE
	# (This script automatically installs its own dependencies)

	# 1. AUTO-INSTALL LIBRARIES
	import os
	print("📦 Checking libraries...")
	os.system("pip install trl accelerate wandb -U")

	import httpx
	import torch
	import random
	from datasets import Dataset
	from trl import GRPOConfig, GRPOTrainer
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# --- 2. BRIDGE CONFIGURATION ---
	# Put your Localtunnel URL here
	BRIDGE_URL = "https://metal-bushes-lie.loca.lt"
	MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct"

	# Headers to bypass the Localtunnel landing page
	BYPASS_HEADERS = {"Bypass-Tunnel-Reminder": "true"}

	# --- 3. REAL DATASET GENERATION ---
	def make_real_dataset():
	print(f"🔗 Connecting to your Mac at {BRIDGE_URL}...")
	tasks = ["easy_syntax_fix", "medium_logic_fix", "hard_multi_bug"]
	rows = []

	with httpx.Client(base_url=BRIDGE_URL, headers=BYPASS_HEADERS, timeout=30.0) as client:
	for t_id in tasks:
	try:
	resp = client.post("/reset", json={"task_id": t_id})
	obs = resp.json()["observation"]
	prompt = (
	"Fix the following SQL query and provide only the fixed SQL.\n"
	f"Task: {obs['task_description']}\n"
	f"Broken Query: {obs['original_query']}\n"
	"Fixed SQL:"
	)
	for _ in range(10):
	rows.append({"prompt": prompt, "task_id": t_id})
	except Exception as e:
	print(f"⚠️ Error fetching task {t_id}: {e}")

	if not rows:
	raise RuntimeError("Dataset is empty. Is your local server and tunnel running?")
	return Dataset.from_list(rows)

	# --- 4. REAL REWARD FUNCTION ---
	def sql_reward_func(completions, task_id, **kwargs):
	rewards = []
	with httpx.Client(base_url=BRIDGE_URL, headers=BYPASS_HEADERS, timeout=30.0) as client:
	for query, t_id in zip(completions, task_id):
	try:
	client.post("/reset", json={"task_id": t_id})
	sql_part = query.split("Fixed SQL:")[-1].strip() if "Fixed SQL:" in query else query.strip()
	resp = client.post("/step", json={"action": {"action_type": "submit_query", "query": sql_part}})
	reward = resp.json()["reward"]
	except Exception as e:
	print(f"❌ Connection Error for {t_id}: {e}")
	reward = 0.0

	reward += random.uniform(-1e-6, 1e-6)
	rewards.append(reward)
	return rewards

	# --- 5. TRAINING LOOP ---
	def run_real_world_train():
	print(f"🚀 Starting Real-World GRPO on Cloud GPU...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	tokenizer.pad_token = tokenizer.eos_token

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float32,
	device_map="auto"
	)

	training_args = GRPOConfig(
	output_dir="./real_results",
	learning_rate=1e-5,
	per_device_train_batch_size=1,
	gradient_accumulation_steps=4,
	num_generations=4,
	max_completion_length=64,
	num_train_epochs=1,
	max_steps=20,
	logging_steps=1,
	fp16=False,
	report_to="wandb",
	push_to_hub=True, # <--- NEW: Pushes logs and model to HF
	hub_model_id="sql-debug-agent-7b", # <--- NEW: Your HF Model Repo Name
	hub_strategy="every_save"
	)

	trainer = GRPOTrainer(
	model=model,
	reward_funcs=[sql_reward_func],
	args=training_args,
	train_dataset=make_real_dataset(),
	processing_class=tokenizer,
	)

	print("🧠 Cloud Brain connected. Starting Real-World training...")
	trainer.train()

	if __name__ == "__main__":
	run_real_world_train()