| """ |
| ======================================== |
| COLAB CODING AGENT TRAINING SCRIPT |
| Fine-tune Gemma-3-1B-IT as a Mini Claude Code |
| Optimized for Google Colab T4 GPU (16GB VRAM) |
| ======================================== |
| |
| INSTRUCTIONS: |
| 1. Open https://colab.research.google.com |
| 2. Change runtime to GPU (Runtime > Change runtime type > T4 GPU) |
| 3. Run the install cell below |
| 4. Authenticate with HuggingFace (Gemma requires license acceptance) |
| 5. Run the training script |
| """ |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| import torch |
| import gc |
| from datasets import load_dataset |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training |
| from trl import SFTTrainer, SFTConfig |
|
|
| print("=" * 70) |
| print(" MINI CODING AGENT - Fine-tune Gemma-3-1B-IT") |
| print(" Target: ~1B params | Dataset: Coding instruction pairs") |
| print("=" * 70) |
|
|
| |
| MODEL_ID = "google/gemma-3-1b-it" |
|
|
| |
| |
| DATASET_NAME = "ise-uiuc/Magicoder-OSS-Instruct-75K" |
|
|
| |
| |
|
|
| OUTPUT_DIR = "./gemma-code-agent" |
| HUB_MODEL_ID = "YOUR_USERNAME/gemma-3-1b-code-agent" |
|
|
| |
| MAX_SEQ_LENGTH = 1024 |
| NUM_EPOCHS = 2 |
| BATCH_SIZE = 1 |
| GRAD_ACCUM = 16 |
| LEARNING_RATE = 5e-5 |
| WARMUP_STEPS = 50 |
|
|
| |
| LORA_R = 16 |
| LORA_ALPHA = 32 |
| LORA_DROPOUT = 0.05 |
| LORA_MODULES = [ |
| "q_proj", "k_proj", "v_proj", "o_proj", |
| "gate_proj", "up_proj", "down_proj", |
| ] |
|
|
| |
| MAX_SAMPLES = 50000 |
| |
|
|
| |
| print("\n[1/7] Loading tokenizer...") |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) |
| if tokenizer.pad_token is None: |
| tokenizer.pad_token = tokenizer.eos_token |
| tokenizer.pad_token_id = tokenizer.eos_token_id |
|
|
| |
| tokenizer.model_max_length = MAX_SEQ_LENGTH |
| print(f" Vocab size: {len(tokenizer)}, max_length: {tokenizer.model_max_length}") |
|
|
| |
| print("\n[2/7] Loading model with 4-bit quantization (NF4)...") |
|
|
| bnb_config = BitsAndBytesConfig( |
| load_in_4bit=True, |
| bnb_4bit_compute_dtype=torch.bfloat16, |
| bnb_4bit_use_double_quant=True, |
| bnb_4bit_quant_type="nf4", |
| ) |
|
|
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_ID, |
| quantization_config=bnb_config, |
| device_map="auto", |
| trust_remote_code=True, |
| ) |
| model.gradient_checkpointing_enable() |
| model = prepare_model_for_kbit_training(model) |
| print(f" Model loaded on: {next(model.parameters()).device}") |
|
|
| |
| print("\n[3/7] Attaching LoRA adapters...") |
| lora_cfg = LoraConfig( |
| r=LORA_R, |
| lora_alpha=LORA_ALPHA, |
| target_modules=LORA_MODULES, |
| lora_dropout=LORA_DROPOUT, |
| bias="none", |
| task_type="CAUSAL_LM", |
| ) |
| model = get_peft_model(model, lora_cfg) |
| model.print_trainable_parameters() |
|
|
| |
| print(f"\n[4/7] Loading dataset: {DATASET_NAME}...") |
| ds = load_dataset(DATASET_NAME, split="train") |
|
|
| total_available = len(ds) |
| use_samples = min(MAX_SAMPLES, total_available) |
| ds = ds.shuffle(seed=42).select(range(use_samples)) |
| print(f" Using {use_samples:,} / {total_available:,} samples") |
|
|
|
|
| def format_magicoder(example): |
| return { |
| "messages": [ |
| {"role": "user", "content": example["problem"]}, |
| {"role": "assistant", "content": example["solution"]}, |
| ] |
| } |
|
|
|
|
| def format_opencode(example): |
| return { |
| "messages": [ |
| {"role": "user", "content": example["input"]}, |
| {"role": "assistant", "content": example["output"]}, |
| ] |
| } |
|
|
|
|
| if "Magicoder" in DATASET_NAME: |
| ds = ds.map(format_magicoder, remove_columns=ds.column_names) |
| else: |
| ds = ds.map(format_opencode, remove_columns=ds.column_names) |
|
|
| print(f" Dataset ready: {len(ds):,} examples") |
| print(" Sample:") |
| print(f" User: {ds[0]['messages'][0]['content'][:80]}...") |
| print(f" Assistant: {ds[0]['messages'][1]['content'][:80]}...") |
|
|
| |
| print("\n[5/7] Configuring trainer...") |
|
|
| |
| args = SFTConfig( |
| output_dir=OUTPUT_DIR, |
| num_train_epochs=NUM_EPOCHS, |
| per_device_train_batch_size=BATCH_SIZE, |
| gradient_accumulation_steps=GRAD_ACCUM, |
| learning_rate=LEARNING_RATE, |
| warmup_steps=WARMUP_STEPS, |
| lr_scheduler_type="cosine", |
| logging_steps=10, |
| save_steps=500, |
| save_total_limit=2, |
| bf16=torch.cuda.is_bf16_supported(), |
| fp16=not torch.cuda.is_bf16_supported(), |
| gradient_checkpointing=True, |
| push_to_hub=False, |
| hub_model_id=HUB_MODEL_ID, |
| report_to="none", |
| dataloader_num_workers=2, |
| remove_unused_columns=False, |
| |
| ) |
|
|
| |
| trainer = SFTTrainer( |
| model=model, |
| args=args, |
| train_dataset=ds, |
| processing_class=tokenizer, |
| ) |
|
|
| |
| print("\n[6/7] Starting training...") |
| print(f" Epochs: {NUM_EPOCHS} | Batch: {BATCH_SIZE} x {GRAD_ACCUM}") |
| print(f" LR: {LEARNING_RATE} | Warmup: {WARMUP_STEPS} | Max length: {MAX_SEQ_LENGTH}") |
| print("-" * 70) |
| trainer.train() |
|
|
| |
| print("\n[7/7] Saving model...") |
| trainer.save_model(OUTPUT_DIR) |
| tokenizer.save_pretrained(OUTPUT_DIR) |
|
|
| print(" Merging LoRA adapters into base model...") |
| merged_model = model.merge_and_unload() |
| merged_model.save_pretrained(f"{OUTPUT_DIR}-merged") |
| tokenizer.save_pretrained(f"{OUTPUT_DIR}-merged") |
|
|
| print("\n" + "=" * 70) |
| print(" TRAINING COMPLETE!") |
| print(f" LoRA model: ./{OUTPUT_DIR}/") |
| print(f" Merged model: ./{OUTPUT_DIR}-merged/") |
| print("=" * 70) |
|
|
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
| |
| |
| |
|
|
|
|
| def chat_with_agent(prompt: str, max_new_tokens: int = 512) -> str: |
| """Send a coding task to your fine-tuned agent and get a response.""" |
| messages = [{"role": "user", "content": prompt}] |
|
|
| inputs = tokenizer.apply_chat_template( |
| messages, |
| tokenize=True, |
| return_tensors="pt", |
| add_generation_prompt=True, |
| return_dict=True, |
| ).to(model.device) |
|
|
| with torch.no_grad(): |
| outputs = merged_model.generate( |
| **inputs, |
| max_new_tokens=max_new_tokens, |
| do_sample=True, |
| temperature=0.7, |
| top_p=0.95, |
| pad_token_id=tokenizer.pad_token_id, |
| ) |
|
|
| response = tokenizer.decode( |
| outputs[0][inputs["input_ids"].shape[-1]:], |
| skip_special_tokens=True |
| ) |
| return response |
|
|
|
|
| test_prompts = [ |
| "Write a Python function to find the longest common subsequence of two strings.", |
| "Create a function that checks if a linked list has a cycle using Floyd's algorithm.", |
| "Write a Python script that fetches weather data from a public API and prints the temperature.", |
| "Implement a simple LRU cache in Python using a dictionary and a doubly linked list.", |
| ] |
|
|
| print("\n" + "=" * 70) |
| print(" TESTING CODING AGENT") |
| print("=" * 70) |
|
|
| for i, prompt in enumerate(test_prompts, 1): |
| print(f"\n--- Test {i} ---") |
| print(f"User: {prompt}") |
| print(f"\nAgent: {chat_with_agent(prompt)[:500]}...") |
| print("-" * 70) |
|
|