| """ |
| OncoAgent — QLoRA Fine-Tuning Script (Unsloth-Accelerated). |
| |
| Uses Unsloth's FastLanguageModel for 2x faster training with 60% less VRAM |
| on AMD Instinct MI300X (ROCm). 4-bit NF4 quantization via bitsandbytes. |
| |
| Key features: |
| - Unsloth-optimized kernels for Attention + Loss computation |
| - Tier-adaptive hyperparameters (batch size, LoRA rank, seq length) |
| - Automatic checkpoint resume on crash recovery |
| - Eval split monitoring to detect overfitting |
| - Training metadata saved for reproducibility audits |
| |
| Hardware Target: AMD Instinct MI300X (gfx942) via ROCm 7.0 + HIP. |
| Rule Compliance: #3 (Qwen 3.5/3.6, format ChatML), #14 (QLoRA + bitsandbytes + PEFT), |
| #22 (reproducibility seeds), #24 (.env), #26 (type hints). |
| """ |
|
|
| import argparse |
| import json |
| import os |
| import time |
| import logging |
| from dataclasses import dataclass |
| from typing import Dict, List, Optional, Any |
|
|
| |
| |
| os.environ["HSA_OVERRIDE_GFX_VERSION"] = "9.4.2" |
| os.environ["HF_HUB_DISABLE_XET"] = "1" |
|
|
| from dotenv import load_dotenv |
| load_dotenv() |
|
|
| import torch |
| from datasets import Dataset |
|
|
| |
| from unsloth import FastLanguageModel |
| from trl import SFTTrainer, SFTConfig |
|
|
| |
| SEED: int = 42 |
| torch.manual_seed(SEED) |
| if torch.cuda.is_available(): |
| torch.cuda.manual_seed_all(SEED) |
|
|
| |
| logging.basicConfig( |
| level=os.getenv("LOG_LEVEL", "INFO"), |
| format="%(asctime)s [%(levelname)s] %(message)s", |
| ) |
| logger = logging.getLogger(__name__) |
|
|
|
|
| |
|
|
| @dataclass |
| class TierConfig: |
| """Tier-specific hyperparameters tuned for MI300X 192GB HBM3.""" |
|
|
| model_id: str |
| lora_r: int |
| lora_alpha: int |
| max_seq_length: int |
| batch_size: int |
| gradient_accumulation: int |
| learning_rate: float |
| num_epochs: int |
| save_steps: int |
|
|
|
|
| TIER_CONFIGS: Dict[int, TierConfig] = { |
| 1: TierConfig( |
| model_id="Qwen/Qwen3.5-9B", |
| lora_r=16, |
| lora_alpha=32, |
| max_seq_length=2048, |
| batch_size=8, |
| gradient_accumulation=2, |
| learning_rate=2e-4, |
| num_epochs=3, |
| save_steps=500, |
| ), |
| 2: TierConfig( |
| model_id="Qwen/Qwen3.6-27B", |
| lora_r=32, |
| lora_alpha=64, |
| max_seq_length=2048, |
| batch_size=4, |
| gradient_accumulation=4, |
| learning_rate=1e-4, |
| num_epochs=1, |
| save_steps=250, |
| ), |
| } |
|
|
| |
| OUTPUT_DIR: str = os.path.join("models", "oncoagent_adapters") |
| TRAIN_FILE: str = os.path.join("data", "final", "train_oncoagent.jsonl") |
| EVAL_FILE: str = os.path.join("data", "final", "train_oncoagent_eval.jsonl") |
|
|
| LORA_DROPOUT: float = 0.0 |
| LORA_TARGET_MODULES: List[str] = [ |
| "q_proj", "k_proj", "v_proj", "o_proj", |
| "gate_proj", "up_proj", "down_proj", |
| ] |
|
|
| WARMUP_RATIO: float = 0.03 |
| WEIGHT_DECAY: float = 0.01 |
| MAX_GRAD_NORM: float = 1.0 |
|
|
| os.makedirs(OUTPUT_DIR, exist_ok=True) |
| os.makedirs(os.path.join("data", "final"), exist_ok=True) |
|
|
|
|
| |
|
|
| def load_jsonl_dataset(path: str, label: str = "data") -> Dataset: |
| """Load a JSONL file into a HuggingFace Dataset. |
| |
| Args: |
| path: Path to the JSONL file. |
| label: Human-readable label for logging. |
| |
| Returns: |
| HuggingFace Dataset object. |
| |
| Raises: |
| FileNotFoundError: If the file does not exist. |
| """ |
| if not os.path.exists(path): |
| raise FileNotFoundError( |
| f"{label} file not found: {path}. " |
| "Run download_hf_datasets.py and synthetic_generator.py first, " |
| "then unify with dataset_builder.py." |
| ) |
|
|
| texts: List[str] = [] |
| with open(path, "r", encoding="utf-8") as f: |
| for line in f: |
| try: |
| entry = json.loads(line.strip()) |
| text = entry.get("text", "") |
| if text: |
| texts.append(text) |
| except json.JSONDecodeError: |
| continue |
|
|
| logger.info(f"📊 Loaded {len(texts):,} {label} samples from {path}") |
| return Dataset.from_dict({"text": texts}) |
|
|
|
|
| |
|
|
| def setup_model_and_tokenizer( |
| config: TierConfig, |
| ) -> tuple: |
| """Load model with Unsloth's FastLanguageModel (4-bit, LoRA-ready). |
| |
| Args: |
| config: The tier-specific configuration. |
| |
| Returns: |
| Tuple of (model, tokenizer). |
| """ |
| |
|
|
| hf_token: Optional[str] = os.getenv("HF_TOKEN") |
|
|
| logger.info(f"📦 Loading model via Unsloth: {config.model_id}") |
| logger.info(f" Device: cuda (ROCm maps cuda→hip, gfx942 via HSA_OVERRIDE)") |
| logger.info(f" Quantization: 4-bit NF4 (Unsloth managed)") |
| logger.info(f" LoRA rank: {config.lora_r}, alpha: {config.lora_alpha}") |
|
|
| |
| model, tokenizer = FastLanguageModel.from_pretrained( |
| model_name=config.model_id, |
| max_seq_length=config.max_seq_length, |
| load_in_4bit=True, |
| token=hf_token, |
| ) |
|
|
| |
| |
| actual_tokenizer = tokenizer.tokenizer if hasattr(tokenizer, "tokenizer") else tokenizer |
| logger.info(f" Tokenizer type: {type(tokenizer).__name__} → inner: {type(actual_tokenizer).__name__}") |
| logger.info(f" EOS token: {actual_tokenizer.eos_token!r}") |
|
|
| if actual_tokenizer.pad_token is None: |
| actual_tokenizer.pad_token = actual_tokenizer.eos_token |
|
|
| |
| model = FastLanguageModel.get_peft_model( |
| model, |
| r=config.lora_r, |
| lora_alpha=config.lora_alpha, |
| lora_dropout=LORA_DROPOUT, |
| target_modules=LORA_TARGET_MODULES, |
| bias="none", |
| use_gradient_checkpointing="unsloth", |
| random_state=SEED, |
| ) |
|
|
| trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) |
| total = sum(p.numel() for p in model.parameters()) |
| logger.info( |
| f"🧠 LoRA applied: {trainable:,} trainable / {total:,} total " |
| f"({100 * trainable / total:.2f}%)" |
| ) |
|
|
| return model, actual_tokenizer |
|
|
|
|
| |
|
|
| def _save_training_metadata( |
| output_dir: str, |
| config: TierConfig, |
| train_samples: int, |
| eval_samples: int, |
| duration_seconds: float, |
| final_metrics: Optional[Dict[str, Any]] = None, |
| ) -> None: |
| """Save a JSON metadata file alongside the adapter for audit trails. |
| |
| Args: |
| output_dir: Directory to save to. |
| config: The tier config used. |
| train_samples: Number of training samples. |
| eval_samples: Number of eval samples. |
| duration_seconds: Wall-clock training time. |
| final_metrics: Optional metrics from trainer. |
| """ |
| meta: Dict[str, Any] = { |
| "project": "OncoAgent", |
| "model_id": config.model_id, |
| "tier": 1 if "9B" in config.model_id else 2, |
| "lora_r": config.lora_r, |
| "lora_alpha": config.lora_alpha, |
| "max_seq_length": config.max_seq_length, |
| "effective_batch_size": config.batch_size * config.gradient_accumulation, |
| "learning_rate": config.learning_rate, |
| "num_epochs": config.num_epochs, |
| "train_samples": train_samples, |
| "eval_samples": eval_samples, |
| "duration_seconds": round(duration_seconds, 1), |
| "duration_human": time.strftime("%Hh %Mm %Ss", time.gmtime(duration_seconds)), |
| "hardware": "AMD Instinct MI300X (192GB HBM3)", |
| "framework": "Unsloth 2026.5.2 + ROCm 7.0 + bitsandbytes (NF4) + PEFT/LoRA", |
| "seed": SEED, |
| } |
| if final_metrics: |
| meta["final_metrics"] = { |
| k: round(v, 6) if isinstance(v, float) else v |
| for k, v in final_metrics.items() |
| } |
|
|
| path = os.path.join(output_dir, "training_metadata.json") |
| with open(path, "w", encoding="utf-8") as f: |
| json.dump(meta, f, indent=2, ensure_ascii=False) |
| logger.info(f"📝 Training metadata saved to {path}") |
|
|
|
|
| |
|
|
| def train( |
| tier: int, |
| max_steps: int = -1, |
| resume: bool = False, |
| ) -> str: |
| """Execute the Unsloth-accelerated QLoRA training pipeline. |
| |
| Args: |
| tier: The architectural tier to train (1 for 9B, 2 for 27B). |
| max_steps: If > 0, stop training after this many steps (useful for dry runs). |
| resume: If True, resume from the last checkpoint in the output directory. |
| |
| Returns: |
| Path to the saved adapter directory. |
| """ |
| |
|
|
| config = TIER_CONFIGS.get(tier) |
| if not config: |
| raise ValueError(f"Invalid tier: {tier}. Must be 1 or 2.") |
|
|
| logger.info(f"🚀 Starting OncoAgent Unsloth Training Pipeline (Tier {tier})") |
| logger.info("=" * 60) |
| logger.info(f" Model: {config.model_id}") |
| logger.info(f" Engine: Unsloth FastLanguageModel (2x speedup)") |
| logger.info(f" LoRA rank: {config.lora_r}") |
| logger.info(f" Batch size: {config.batch_size} × {config.gradient_accumulation} = {config.batch_size * config.gradient_accumulation}") |
| logger.info(f" Learning rate: {config.learning_rate}") |
| logger.info(f" Epochs: {config.num_epochs}") |
| logger.info(f" Seq length: {config.max_seq_length}") |
| logger.info(f" HSA_GFX: {os.environ.get('HSA_OVERRIDE_GFX_VERSION', 'NOT SET')}") |
| logger.info("=" * 60) |
|
|
| |
| model, tokenizer = setup_model_and_tokenizer(config) |
| train_dataset = load_jsonl_dataset(TRAIN_FILE, "training") |
|
|
| |
| eval_dataset: Optional[Dataset] = None |
| eval_samples: int = 0 |
| try: |
| eval_dataset = load_jsonl_dataset(EVAL_FILE, "evaluation") |
| eval_samples = len(eval_dataset) |
| except FileNotFoundError: |
| logger.warning("⚠️ Eval file not found — training without evaluation metrics") |
|
|
| |
| tier_output_dir: str = os.path.join(OUTPUT_DIR, f"tier{tier}") |
| os.makedirs(tier_output_dir, exist_ok=True) |
|
|
| |
| resume_checkpoint: Optional[str] = None |
| if resume: |
| checkpoints = sorted([ |
| os.path.join(tier_output_dir, d) |
| for d in os.listdir(tier_output_dir) |
| if d.startswith("checkpoint-") |
| ]) |
| if checkpoints: |
| resume_checkpoint = checkpoints[-1] |
| logger.info(f"♻️ Resuming from checkpoint: {resume_checkpoint}") |
| else: |
| logger.warning("⚠️ --resume requested but no checkpoint found, starting fresh") |
|
|
| |
| sft_config = SFTConfig( |
| output_dir=tier_output_dir, |
| num_train_epochs=config.num_epochs, |
| per_device_train_batch_size=config.batch_size, |
| gradient_accumulation_steps=config.gradient_accumulation, |
| learning_rate=config.learning_rate, |
| weight_decay=WEIGHT_DECAY, |
| warmup_steps=int(WARMUP_RATIO * (len(train_dataset) // (config.batch_size * config.gradient_accumulation))), |
| lr_scheduler_type="cosine", |
| logging_steps=10, |
| max_steps=max_steps, |
| save_steps=config.save_steps, |
| save_total_limit=3, |
| fp16=not torch.cuda.is_bf16_supported(), |
| bf16=torch.cuda.is_bf16_supported(), |
| gradient_checkpointing=True, |
| gradient_checkpointing_kwargs={"use_reentrant": False}, |
| max_grad_norm=MAX_GRAD_NORM, |
| seed=SEED, |
| report_to="none", |
| optim="adamw_8bit", |
| |
| max_length=config.max_seq_length, |
| packing=True, |
| dataset_text_field="text", |
| eos_token=None, |
| |
| eval_strategy="no", |
| ) |
|
|
| |
| trainer = SFTTrainer( |
| model=model, |
| processing_class=tokenizer, |
| train_dataset=train_dataset, |
| args=sft_config, |
| ) |
|
|
| |
| t0 = time.time() |
| logger.info("🏋️ Training started...") |
| train_result = trainer.train(resume_from_checkpoint=resume_checkpoint) |
| duration = time.time() - t0 |
|
|
| |
| final_path: str = os.path.join(tier_output_dir, "final") |
| model.save_pretrained(final_path) |
| tokenizer.save_pretrained(final_path) |
|
|
| |
| _save_training_metadata( |
| output_dir=final_path, |
| config=config, |
| train_samples=len(train_dataset), |
| eval_samples=eval_samples, |
| duration_seconds=duration, |
| final_metrics=train_result.metrics if train_result else None, |
| ) |
|
|
| |
| logger.info("=" * 60) |
| logger.info(f"🏁 TRAINING COMPLETE FOR TIER {tier} ({config.model_id})") |
| logger.info(f" Adapter saved to: {final_path}") |
| logger.info(f" Train samples: {len(train_dataset):,}") |
| logger.info(f" Eval samples: {eval_samples:,}") |
| logger.info(f" Duration: {time.strftime('%Hh %Mm %Ss', time.gmtime(duration))}") |
| logger.info(f" Final train loss: {train_result.metrics.get('train_loss', 'N/A')}") |
| if eval_dataset: |
| eval_results = trainer.evaluate() |
| logger.info(f" Final eval loss: {eval_results.get('eval_loss', 'N/A')}") |
| logger.info("=" * 60) |
|
|
| |
| del model, trainer |
| if torch.cuda.is_available(): |
| torch.cuda.empty_cache() |
|
|
| return final_path |
|
|
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser( |
| description="OncoAgent Unsloth QLoRA Fine-Tuning (Dual-Tier Qwen Architecture)" |
| ) |
| parser.add_argument( |
| "--tier", |
| type=int, |
| choices=[1, 2], |
| required=True, |
| help="Select the architectural tier to train (1 = Qwen 3.5 9B Speed, 2 = Qwen 3.6 27B Reasoning)", |
| ) |
| parser.add_argument( |
| "--max_steps", |
| type=int, |
| default=-1, |
| help="Maximum number of training steps (useful for validation/dry-runs)", |
| ) |
| parser.add_argument( |
| "--resume", |
| action="store_true", |
| help="Resume from the last checkpoint (critical for crash recovery on cloud instances)", |
| ) |
| args = parser.parse_args() |
|
|
| train(args.tier, max_steps=args.max_steps, resume=args.resume) |
|
|