Spaces:
Running
Running
File size: 2,071 Bytes
bc20ef9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | # 🏁 SQL Debug Env: Google Colab Training Starter
# 1. RUN THIS FIRST TO INSTALL
!pip install trl transformers torch datasets httpx accelerate wandb -U
# 2. THE TRAINING SCRIPT
import os
import torch
from datasets import Dataset
from trl import GRPOConfig, GRPOTrainer
from transformers import AutoTokenizer, AutoModelForCausalLM
# --- Configuration ---
MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
# --- Mock Dataset (For quick test without the local server) ---
def make_simple_dataset():
rows = []
prompt = "Fix the following SQL query: SELECT * FROM userss; Provide only the fixed SQL."
for _ in range(10):
rows.append({"prompt": prompt, "task_id": "easy_syntax_fix"})
return Dataset.from_list(rows)
# --- Mock Reward (Proves the math works on GPU) ---
def mock_reward_func(completions, **kwargs):
rewards = []
for content in completions:
# Give reward if the model actually wrote some SQL
if "SELECT" in content.upper():
rewards.append(1.0)
else:
rewards.append(0.0)
return rewards
# --- Training Loop ---
def run_colab_train():
print(f"🚀 Starting GRPO on Colab T4 GPU...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.bfloat16, # T4 supports bfloat16
device_map="auto"
)
training_args = GRPOConfig(
output_dir="./colab_results",
learning_rate=1e-5,
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
num_generations=4,
max_completion_length=64,
num_train_epochs=1,
max_steps=10,
logging_steps=1,
report_to="wandb"
)
trainer = GRPOTrainer(
model=model,
reward_funcs=[mock_reward_func],
args=training_args,
train_dataset=make_simple_dataset(),
processing_class=tokenizer,
)
trainer.train()
if __name__ == "__main__":
run_colab_train()
|