| """ |
| tool_trainer_intensive.py - Intensive Training for 80% Target |
| |
| This trainer implements: |
| 1. 10+ epochs (vs 3 before) |
| 2. Better learning rate schedule |
| 3. Optimized training parameters |
| 4. Progress monitoring for 80% target |
| """ |
|
|
| import torch |
| from transformers import ( |
| AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, |
| DataCollatorForLanguageModeling |
| ) |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training |
| from datasets import Dataset |
| import json |
| import time |
|
|
| def load_training_data(file_path="tool_pairs_massive.jsonl"): |
| """Load the massive training dataset.""" |
| pairs = [] |
| with open(file_path, 'r') as f: |
| for line in f: |
| pairs.append(json.loads(line.strip())) |
| return pairs |
|
|
| def format_training_data(pairs, tokenizer): |
| """Format training data for the model.""" |
| formatted = [] |
| for pair in pairs: |
| |
| full_text = pair["prompt"] + pair["chosen"] + tokenizer.eos_token |
| formatted.append({"text": full_text}) |
| return formatted |
|
|
| def tokenize_function(examples, tokenizer, max_length=400): |
| """Tokenize with optimized settings for intensive training.""" |
| tokenized = tokenizer( |
| examples["text"], |
| truncation=True, |
| padding="max_length", |
| max_length=max_length, |
| return_tensors=None |
| ) |
| |
| |
| tokenized["labels"] = tokenized["input_ids"] |
| return tokenized |
|
|
| def main(): |
| print("π INTENSIVE Training: SmolLM3-3B for 80% Target") |
| print("=" * 60) |
| |
| |
| device = "mps" if torch.backends.mps.is_available() else "cpu" |
| print(f"β
Using device: {device}") |
| |
| start_time = time.time() |
| |
| |
| print("π₯ Loading SmolLM3-3B...") |
| model_name = "HuggingFaceTB/SmolLM3-3B" |
| |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| if tokenizer.pad_token is None: |
| tokenizer.pad_token = tokenizer.eos_token |
| |
| model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| torch_dtype=torch.float32, |
| device_map={"": device} if device == "mps" else "auto" |
| ) |
| |
| print(f"β
Model loaded: {model.num_parameters() / 1e9:.1f}B params") |
| |
| |
| print("π© Setting up enhanced LoRA (rank 32)...") |
| lora_config = LoraConfig( |
| r=32, |
| lora_alpha=64, |
| target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], |
| lora_dropout=0.1, |
| bias="none", |
| task_type="CAUSAL_LM" |
| ) |
| |
| model = get_peft_model(model, lora_config) |
| trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) |
| total_params = sum(p.numel() for p in model.parameters()) |
| print(f"π― Trainable: {trainable_params:,} parameters ({100 * trainable_params / total_params:.2f}%)") |
| |
| |
| print("π Loading massive training data...") |
| pairs = load_training_data() |
| print(f"β
{len(pairs)} training examples ready") |
| |
| |
| print("π€ Tokenizing massive dataset...") |
| formatted_data = format_training_data(pairs, tokenizer) |
| dataset = Dataset.from_list(formatted_data) |
| |
| tokenized_dataset = dataset.map( |
| lambda x: tokenize_function(x, tokenizer), |
| batched=True, |
| remove_columns=dataset.column_names |
| ) |
| print(f"π Tokenized {len(tokenized_dataset)} examples") |
| |
| |
| print("βοΈ Configuring intensive training...") |
| training_args = TrainingArguments( |
| output_dir="./smollm3_intensive", |
| num_train_epochs=12, |
| per_device_train_batch_size=2, |
| gradient_accumulation_steps=4, |
| warmup_steps=100, |
| learning_rate=3e-5, |
| lr_scheduler_type="cosine", |
| weight_decay=0.01, |
| logging_steps=10, |
| save_steps=100, |
| save_total_limit=3, |
| push_to_hub=False, |
| report_to=None, |
| dataloader_pin_memory=False, |
| fp16=False, |
| gradient_checkpointing=True, |
| max_grad_norm=1.0, |
| adam_epsilon=1e-8, |
| adam_beta1=0.9, |
| adam_beta2=0.999, |
| ) |
| |
| |
| data_collator = DataCollatorForLanguageModeling( |
| tokenizer=tokenizer, |
| mlm=False, |
| pad_to_multiple_of=8, |
| ) |
| |
| |
| print("ποΈ Initializing intensive trainer...") |
| trainer = Trainer( |
| model=model, |
| args=training_args, |
| train_dataset=tokenized_dataset, |
| data_collator=data_collator, |
| ) |
| |
| |
| print("π― Starting INTENSIVE training...") |
| print(f"π Dataset: {len(pairs)} examples") |
| print(f"π Epochs: 12 (vs 3 before)") |
| print(f"π Learning rate: 3e-5 with cosine schedule") |
| print(f"β±οΈ Expected time: ~10-15 minutes") |
| print("π Monitoring for dramatic improvement...") |
| |
| train_result = trainer.train() |
| |
| training_time = time.time() - start_time |
| print(f"\nπ INTENSIVE Training completed!") |
| print(f"π Final loss: {train_result.training_loss:.4f}") |
| print(f"β±οΈ Training time: {training_time:.1f}s") |
| |
| |
| print("πΎ Saving intensively trained model...") |
| model.save_pretrained("./smollm3_intensive") |
| tokenizer.save_pretrained("./smollm3_intensive") |
| |
| |
| print("π§ͺ Quick validation test...") |
| model.eval() |
| test_input = "Get weather for New York" |
| inputs = tokenizer(test_input, return_tensors="pt").to(device) |
| |
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=100, |
| temperature=0.1, |
| do_sample=True, |
| pad_token_id=tokenizer.eos_token_id |
| ) |
| |
| response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) |
| print(f"π€ Model response: {response}") |
| |
| |
| try: |
| parsed = json.loads(response.strip()) |
| print(f"β
Valid JSON! {parsed}") |
| except json.JSONDecodeError as e: |
| print(f"β JSON error: {e}") |
| |
| print(f"\nπ Intensive training complete!") |
| print(f"π Ready for 80% target evaluation") |
| |
| return model, tokenizer |
|
|
| if __name__ == "__main__": |
| model, tokenizer = main() |