""" WebScrapeAgent — Standalone Training Script ============================================= Run this on any machine with a 16GB+ GPU. Usage: pip install unsloth trl peft transformers accelerate datasets bitsandbytes python train.py # Or with custom settings: python train.py --epochs 3 --lr 5e-5 --lora-r 64 --output my-org/my-model """ # CRITICAL: import unsloth FIRST import unsloth import os import sys import argparse import torch from datasets import load_dataset from trl import SFTTrainer, SFTConfig from unsloth import FastLanguageModel, is_bfloat16_supported from unsloth.chat_templates import get_chat_template, train_on_responses_only def parse_args(): p = argparse.ArgumentParser(description="Train WebScrapeAgent") p.add_argument("--model", default="unsloth/Qwen2.5-7B-Instruct-bnb-4bit", help="Base model") p.add_argument("--dataset", default="sukritvemula/webscrape-agent-training-data", help="Training dataset") p.add_argument("--output", default="sukritvemula/WebScrapeAgent-7B-v1", help="Output model name on Hub") p.add_argument("--max-seq-len", type=int, default=4096, help="Max sequence length") p.add_argument("--lora-r", type=int, default=32, help="LoRA rank") p.add_argument("--lora-alpha", type=int, default=32, help="LoRA alpha") p.add_argument("--lr", type=float, default=1e-4, help="Learning rate") p.add_argument("--epochs", type=int, default=2, help="Number of epochs") p.add_argument("--batch-size", type=int, default=1, help="Per-device batch size") p.add_argument("--grad-accum", type=int, default=16, help="Gradient accumulation steps") p.add_argument("--no-push", action="store_true", help="Don't push to Hub") p.add_argument("--save-local", default="./webscrape-agent-local", help="Local save path") return p.parse_args() def main(): args = parse_args() print("=" * 60) print("WebScrapeAgent — Training") print("=" * 60) print(f" GPU: {torch.cuda.get_device_name() if torch.cuda.is_available() else 'CPU'}") if torch.cuda.is_available(): print(f" VRAM: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB") # 1. Load model print(f"\n[1/5] Loading: {args.model}") model, tokenizer = FastLanguageModel.from_pretrained( model_name=args.model, max_seq_length=args.max_seq_len, dtype=None, load_in_4bit=True, ) # 2. LoRA print(f"[2/5] Applying LoRA (r={args.lora_r}, alpha={args.lora_alpha})") model = FastLanguageModel.get_peft_model( model, r=args.lora_r, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_alpha=args.lora_alpha, lora_dropout=0.0, bias="none", use_gradient_checkpointing="unsloth", random_state=42, ) trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) total = sum(p.numel() for p in model.parameters()) print(f" Trainable: {trainable:,} / {total:,} ({trainable/total*100:.2f}%)") # 3. Chat template + dataset tokenizer = get_chat_template(tokenizer, chat_template="qwen-2.5") print(f"[3/5] Loading dataset: {args.dataset}") dataset = load_dataset(args.dataset) train_ds = dataset["train"] def format_to_text(examples): texts = [] for msgs in examples["messages"]: try: text = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=False) texts.append(text) except Exception: text = "" for msg in msgs: text += f"<|im_start|>{msg['role']}\n{msg['content']}<|im_end|>\n" texts.append(text) return {"text": texts} train_ds = train_ds.map(format_to_text, batched=True, num_proc=2, remove_columns=train_ds.column_names) def filter_length(example): tokens = tokenizer(example["text"], truncation=False) return len(tokens["input_ids"]) <= args.max_seq_len orig = len(train_ds) train_ds = train_ds.filter(filter_length, num_proc=2) print(f" {len(train_ds)} / {orig} examples ({len(train_ds)/orig*100:.1f}% kept after length filter)") # 4. Trainer print(f"[4/5] Setting up trainer") training_args = SFTConfig( output_dir="./webscrape-checkpoints", num_train_epochs=args.epochs, per_device_train_batch_size=args.batch_size, gradient_accumulation_steps=args.grad_accum, optim="adamw_8bit", learning_rate=args.lr, weight_decay=0.01, lr_scheduler_type="cosine", warmup_ratio=0.03, max_grad_norm=0.3, fp16=not is_bfloat16_supported(), bf16=is_bfloat16_supported(), max_seq_length=args.max_seq_len, dataset_text_field="text", packing=False, logging_steps=10, logging_first_step=True, disable_tqdm=False, save_strategy="steps", save_steps=500, save_total_limit=2, push_to_hub=not args.no_push, hub_model_id=args.output if not args.no_push else None, hub_strategy="end", seed=42, dataset_num_proc=2, ) trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=train_ds, args=training_args, ) trainer = train_on_responses_only(trainer) # 5. Train print(f"[5/5] Training...") print(f" Effective batch: {args.batch_size * args.grad_accum}") print(f" LR: {args.lr}, Epochs: {args.epochs}") stats = trainer.train() print(f"\n✅ Done! Loss: {stats.training_loss:.4f}") # Save model.save_pretrained(args.save_local) tokenizer.save_pretrained(args.save_local) print(f" Saved locally: {args.save_local}") if not args.no_push: print(f" Pushing merged model to Hub: {args.output}") model.push_to_hub_merged(args.output, tokenizer, save_method="merged_16bit") model.push_to_hub(args.output + "-lora", tokenizer) print(f" ✅ https://huggingface.co/{args.output}") print(f" ✅ https://huggingface.co/{args.output}-lora") if __name__ == "__main__": main()