|
|
| import os
|
| print("Installing dependencies...")
|
| os.system("pip install -q -U torch datasets trl peft transformers unsloth")
|
|
|
| import json
|
| from datasets import load_dataset
|
|
|
| print("1. Generating Dataset locally (No API limits!)...")
|
| dataset = load_dataset("gsm8k", "main", split="train")
|
|
|
| def format_gsm8k(example):
|
| parts = example["answer"].split("####")
|
| reasoning = [step.strip() for step in parts[0].strip().split('\n') if step.strip()]
|
| final_answer = parts[1].strip() if len(parts) > 1 else ""
|
|
|
| json_data = {
|
| "final_answer": final_answer,
|
| "reasoning_trace": reasoning,
|
| "confidence_explanation": "Deterministic symbolic steps logically verified."
|
| }
|
|
|
| return {
|
| "messages": [
|
| {"role": "system", "content": "You are an MVM2 math reasoning agent. You strictly output JSON triplets: {final_answer, reasoning_trace, confidence_explanation}."},
|
| {"role": "user", "content": example["question"]},
|
| {"role": "assistant", "content": json.dumps(json_data)}
|
| ]
|
| }
|
|
|
| print("Mapping dataset to MVM2 Triplets...")
|
| formatted_dataset = dataset.map(format_gsm8k, remove_columns=["question", "answer"])
|
|
|
| small_dataset = formatted_dataset.select(range(1000))
|
|
|
| print("2. Starting Unsloth Training on T4 GPU...")
|
| from unsloth import FastLanguageModel
|
| from trl import SFTTrainer
|
| from transformers import TrainingArguments
|
|
|
| model, tokenizer = FastLanguageModel.from_pretrained(
|
| model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit",
|
| max_seq_length = 2048,
|
| dtype = None,
|
| load_in_4bit = True,
|
| )
|
|
|
| model = FastLanguageModel.get_peft_model(
|
| model,
|
| r = 16,
|
| target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
|
| lora_alpha = 16,
|
| lora_dropout = 0,
|
| bias = "none",
|
| use_gradient_checkpointing = "unsloth",
|
| random_state = 3407,
|
| )
|
|
|
| def format_chatml(examples):
|
| texts = []
|
| for messages in examples["messages"]:
|
| text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
|
| texts.append(text)
|
| return {"text": texts}
|
|
|
| train_data = small_dataset.map(format_chatml, batched=True)
|
|
|
| trainer = SFTTrainer(
|
| model = model,
|
| tokenizer = tokenizer,
|
| train_dataset = train_data,
|
| dataset_text_field = "text",
|
| max_seq_length = 2048,
|
| dataset_num_proc = 2,
|
| args = TrainingArguments(
|
| per_device_train_batch_size = 2,
|
| gradient_accumulation_steps = 4,
|
| max_steps = 60,
|
| learning_rate = 2e-4,
|
| fp16 = True,
|
| logging_steps = 10,
|
| optim = "adamw_8bit",
|
| output_dir = "outputs",
|
| ),
|
| )
|
|
|
| trainer.train()
|
|
|
| model.save_pretrained("mvm2_lora_model")
|
| tokenizer.save_pretrained("mvm2_lora_model")
|
| print("\n✅ Training Complete! The LoRA adapter is saved to 'mvm2_lora_model'.")
|
|
|