""" Fine-tuning Llama-3.2-3B-Instruct with Unsloth for the Garbage Collecting Robot. Training data: fixed_dataset.jsonl (generated by code2.py + fixer.py) Format: {"user": "### Instruction:\n...\n\n### Input:\nENVIRONMENT STATUS:\n...", "assistant": "UP|DOWN|LEFT|RIGHT|COLLECT"} Base model: unsloth/llama-3.2-3b-instruct-bnb-4bit (same as Unsloth Studio run) Export: lora_garbage_robot/ (LoRA adapter) """ import os import json from datasets import Dataset max_seq_length = 512 # Prompts are short; 512 is well above the longest sample dtype = None # Auto-detect (float16 on T4, bfloat16 on Ampere+) load_in_4bit = True # ── Alpaca prompt — MUST match fixed_dataset.jsonl / code2.py / app.py ────── ALPACA_TEMPLATE = ( "### Instruction:\n{instruction}\n\n" "### Input:\nENVIRONMENT STATUS:\n{input}\n\n" "### Response:\n{response}" ) INSTRUCTION = ( "You are an AI brain controlling a garbage collecting robot.\n" "Reply with EXACTLY ONE of: UP DOWN LEFT RIGHT COLLECT" ) EOS_TOKEN = None # filled in after tokenizer loads def load_fixed_dataset(path: str = "fixed_dataset.jsonl") -> Dataset: """ Load fixed_dataset.jsonl produced by fixer.py. Each row: {"user": "<### Instruction:...### Input:...>", "assistant": ""} We re-format into the full Alpaca text so the model sees input + target in one string. """ rows = [] with open(path, "r") as f: for line in f: row = json.loads(line) user_text = row["user"] # already contains ### Instruction + ### Input assistant = row["assistant"] # e.g. "RIGHT" # Extract the environment status message from the user field try: env_status = user_text.split("ENVIRONMENT STATUS:\n")[1].strip() except IndexError: continue # skip malformed rows text = ALPACA_TEMPLATE.format( instruction=INSTRUCTION, input=env_status, response=assistant, ) + (EOS_TOKEN or "") rows.append({"text": text}) print(f"[Dataset] Loaded {len(rows):,} samples from {path}") return Dataset.from_list(rows) def main(): from unsloth import FastLanguageModel from trl import SFTTrainer from transformers import TrainingArguments global EOS_TOKEN print("=" * 60) print(" Fine-tuning Llama-3.2-3B-Instruct — Garbage Robot") print("=" * 60) # ── 1. Load base model (same as Unsloth Studio session) ────────────────── print("\n[1/4] Loading base model …") model, tokenizer = FastLanguageModel.from_pretrained( model_name = "unsloth/llama-3.2-3b-instruct-bnb-4bit", max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = load_in_4bit, ) EOS_TOKEN = tokenizer.eos_token # fill in for dataset formatting # ── 2. Add LoRA adapters ───────────────────────────────────────────────── print("[2/4] Attaching LoRA adapters …") model = FastLanguageModel.get_peft_model( model, r = 16, target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_alpha = 16, lora_dropout = 0, bias = "none", use_gradient_checkpointing = "unsloth", random_state = 3407, use_rslora = False, loftq_config = None, ) # ── 3. Load dataset ────────────────────────────────────────────────────── print("[3/4] Loading fixed_dataset.jsonl …") dataset = load_fixed_dataset("fixed_dataset.jsonl") # ── 4. Train ───────────────────────────────────────────────────────────── print("[4/4] Starting fine-tuning …") trainer = SFTTrainer( model = model, tokenizer = tokenizer, train_dataset = dataset, dataset_text_field = "text", max_seq_length = max_seq_length, dataset_num_proc = 2, packing = True, # efficient for short sequences args = TrainingArguments( per_device_train_batch_size = 4, gradient_accumulation_steps = 4, warmup_ratio = 0.03, num_train_epochs = 1, learning_rate = 2e-4, fp16 = not FastLanguageModel.is_bfloat16_supported(), bf16 = FastLanguageModel.is_bfloat16_supported(), logging_steps = 10, optim = "adamw_8bit", weight_decay = 0.01, lr_scheduler_type = "cosine", seed = 3407, output_dir = "outputs", save_strategy = "epoch", ), ) trainer_stats = trainer.train() print(f"\nTraining complete. Loss: {trainer_stats.training_loss:.4f}") # ── Save LoRA adapter ──────────────────────────────────────────────────── model.save_pretrained("lora_garbage_robot") tokenizer.save_pretrained("lora_garbage_robot") print("\nLoRA adapter saved to: lora_garbage_robot/") print("To export a merged model, use Unsloth Studio → Export → Merged Model.") if __name__ == "__main__": main()