Spaces:
Build error
Build error
| """ | |
| Fine-tuning Llama-3.2-3B-Instruct with Unsloth for the Garbage Collecting Robot. | |
| Training data: fixed_dataset.jsonl (generated by code2.py + fixer.py) | |
| Format: {"user": "### Instruction:\n...\n\n### Input:\nENVIRONMENT STATUS:\n...", "assistant": "UP|DOWN|LEFT|RIGHT|COLLECT"} | |
| Base model: unsloth/llama-3.2-3b-instruct-bnb-4bit (same as Unsloth Studio run) | |
| Export: lora_garbage_robot/ (LoRA adapter) | |
| """ | |
| import os | |
| import json | |
| from datasets import Dataset | |
| max_seq_length = 512 # Prompts are short; 512 is well above the longest sample | |
| dtype = None # Auto-detect (float16 on T4, bfloat16 on Ampere+) | |
| load_in_4bit = True | |
| # ── Alpaca prompt — MUST match fixed_dataset.jsonl / code2.py / app.py ────── | |
| ALPACA_TEMPLATE = ( | |
| "### Instruction:\n{instruction}\n\n" | |
| "### Input:\nENVIRONMENT STATUS:\n{input}\n\n" | |
| "### Response:\n{response}" | |
| ) | |
| INSTRUCTION = ( | |
| "You are an AI brain controlling a garbage collecting robot.\n" | |
| "Reply with EXACTLY ONE of: UP DOWN LEFT RIGHT COLLECT" | |
| ) | |
| EOS_TOKEN = None # filled in after tokenizer loads | |
| def load_fixed_dataset(path: str = "fixed_dataset.jsonl") -> Dataset: | |
| """ | |
| Load fixed_dataset.jsonl produced by fixer.py. | |
| Each row: {"user": "<### Instruction:...### Input:...>", "assistant": "<ACTION>"} | |
| We re-format into the full Alpaca text so the model sees input + target in one string. | |
| """ | |
| rows = [] | |
| with open(path, "r") as f: | |
| for line in f: | |
| row = json.loads(line) | |
| user_text = row["user"] # already contains ### Instruction + ### Input | |
| assistant = row["assistant"] # e.g. "RIGHT" | |
| # Extract the environment status message from the user field | |
| try: | |
| env_status = user_text.split("ENVIRONMENT STATUS:\n")[1].strip() | |
| except IndexError: | |
| continue # skip malformed rows | |
| text = ALPACA_TEMPLATE.format( | |
| instruction=INSTRUCTION, | |
| input=env_status, | |
| response=assistant, | |
| ) + (EOS_TOKEN or "") | |
| rows.append({"text": text}) | |
| print(f"[Dataset] Loaded {len(rows):,} samples from {path}") | |
| return Dataset.from_list(rows) | |
| def main(): | |
| from unsloth import FastLanguageModel | |
| from trl import SFTTrainer | |
| from transformers import TrainingArguments | |
| global EOS_TOKEN | |
| print("=" * 60) | |
| print(" Fine-tuning Llama-3.2-3B-Instruct — Garbage Robot") | |
| print("=" * 60) | |
| # ── 1. Load base model (same as Unsloth Studio session) ────────────────── | |
| print("\n[1/4] Loading base model …") | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name = "unsloth/llama-3.2-3b-instruct-bnb-4bit", | |
| max_seq_length = max_seq_length, | |
| dtype = dtype, | |
| load_in_4bit = load_in_4bit, | |
| ) | |
| EOS_TOKEN = tokenizer.eos_token # fill in for dataset formatting | |
| # ── 2. Add LoRA adapters ───────────────────────────────────────────────── | |
| print("[2/4] Attaching LoRA adapters …") | |
| model = FastLanguageModel.get_peft_model( | |
| model, | |
| r = 16, | |
| target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", | |
| "gate_proj", "up_proj", "down_proj"], | |
| lora_alpha = 16, | |
| lora_dropout = 0, | |
| bias = "none", | |
| use_gradient_checkpointing = "unsloth", | |
| random_state = 3407, | |
| use_rslora = False, | |
| loftq_config = None, | |
| ) | |
| # ── 3. Load dataset ────────────────────────────────────────────────────── | |
| print("[3/4] Loading fixed_dataset.jsonl …") | |
| dataset = load_fixed_dataset("fixed_dataset.jsonl") | |
| # ── 4. Train ───────────────────────────────────────────────────────────── | |
| print("[4/4] Starting fine-tuning …") | |
| trainer = SFTTrainer( | |
| model = model, | |
| tokenizer = tokenizer, | |
| train_dataset = dataset, | |
| dataset_text_field = "text", | |
| max_seq_length = max_seq_length, | |
| dataset_num_proc = 2, | |
| packing = True, # efficient for short sequences | |
| args = TrainingArguments( | |
| per_device_train_batch_size = 4, | |
| gradient_accumulation_steps = 4, | |
| warmup_ratio = 0.03, | |
| num_train_epochs = 1, | |
| learning_rate = 2e-4, | |
| fp16 = not FastLanguageModel.is_bfloat16_supported(), | |
| bf16 = FastLanguageModel.is_bfloat16_supported(), | |
| logging_steps = 10, | |
| optim = "adamw_8bit", | |
| weight_decay = 0.01, | |
| lr_scheduler_type = "cosine", | |
| seed = 3407, | |
| output_dir = "outputs", | |
| save_strategy = "epoch", | |
| ), | |
| ) | |
| trainer_stats = trainer.train() | |
| print(f"\nTraining complete. Loss: {trainer_stats.training_loss:.4f}") | |
| # ── Save LoRA adapter ──────────────────────────────────────────────────── | |
| model.save_pretrained("lora_garbage_robot") | |
| tokenizer.save_pretrained("lora_garbage_robot") | |
| print("\nLoRA adapter saved to: lora_garbage_robot/") | |
| print("To export a merged model, use Unsloth Studio → Export → Merged Model.") | |
| if __name__ == "__main__": | |
| main() |