Spaces:

DGXAI
/

driftcall

Runtime error

App Files Files Community

driftcall / cells /step_12_gemma_boot.py

saumilyajj

Upload folder using huggingface_hub

b43d8da verified about 1 month ago

raw

history blame contribute delete

7.74 kB

	"""Gemma 3n E2B boot via Unsloth FastModel (docs/modules/training.md §3.1).

	Contract:
	- Base model: ``unsloth/gemma-3n-E2B-it`` (4-bit Dynamic
	NF4 quantization).
	- Precision: hardware-aware.
	V100 (sm_70) — explicit FP16 (``dtype=torch.float16``); Gemma 3n is
	BF16-native, so we force FP16 on V100 to avoid BF16 software-emulation
	slowdown / numerical instability.
	H100 (sm_90) — BF16 (``dtype=torch.bfloat16``); uses native tensor cores.
	- LoRA: r=16, α=32, dropout=0.05, vision towers frozen, language + attention
	+ MLP trainable via Unsloth's multimodal API (``finetune_vision_layers=False,
	finetune_language_layers=True, finetune_attention_modules=True,
	finetune_mlp_modules=True``), Unsloth gradient checkpointing,
	``random_state=3407``.
	- V100 halt: ``next(model.parameters()).dtype`` MUST be ``torch.float16``
	after FP16 load; any BF16 parameter triggers :class:`BF16SlippageError`
	before optimizer build.
	- H100 halt: ``next(model.parameters()).dtype`` MUST be ``torch.bfloat16``
	after BF16 load; any FP16 parameter triggers :class:`FP16SlippageError`
	before optimizer build.

	Heavy imports (``unsloth``, ``torch``) are deferred inside functions so this
	cell loads on CPU-only CI runners where Unsloth is not installed. Tests mock
	``FastModel.from_pretrained`` and ``FastModel.get_peft_model``.
	"""

	from __future__ import annotations

	from dataclasses import dataclass
	from typing import Any, Literal

	BASE_MODEL_ID: str = "unsloth/gemma-3n-E2B-it"
	MAX_SEQ_LENGTH: int = 4096
	LORA_R: int = 16
	LORA_ALPHA: int = 32
	LORA_DROPOUT: float = 0.05
	LORA_RANDOM_STATE: int = 3407

	# Gemma 3n multimodal LoRA flags — vision/audio towers stay frozen so GRPO
	# trains only the language stack (Unsloth Gemma 3N notebook §fine-tune).
	FINETUNE_VISION_LAYERS: bool = False
	FINETUNE_LANGUAGE_LAYERS: bool = True
	FINETUNE_ATTENTION_MODULES: bool = True
	FINETUNE_MLP_MODULES: bool = True

	HardwareT = Literal["v100", "h100"]
	ALLOWED_HARDWARE: tuple[HardwareT, ...] = ("v100", "h100")


	class BF16SlippageError(AssertionError):
	"""Raised when the loaded model has any BF16 parameter on V100.

	V100 (sm_70) lacks BF16 tensor cores. Silent BF16 via software emulation
	causes ~10x slowdown plus numerical-instability patterns in
	``docs/modules/training.md §7a``. Halt before the optimizer is built.
	"""


	class FP16SlippageError(AssertionError):
	"""Raised when the loaded model has any FP16 parameter on H100.

	H100 (sm_90) has native BF16 tensor cores. Running FP16 on H100 means
	leaving native hardware capability unused and may cause gradient underflow
	at large batch sizes. Halt before the optimizer is built.
	"""


	@dataclass(frozen=True)
	class BootConfig:
	"""Arguments to :func:`boot_gemma`. Frozen per DriftCall immutability rule."""

	base_model_id: str = BASE_MODEL_ID
	max_seq_length: int = MAX_SEQ_LENGTH
	load_in_4bit: bool = True
	lora_r: int = LORA_R
	lora_alpha: int = LORA_ALPHA
	lora_dropout: float = LORA_DROPOUT
	lora_random_state: int = LORA_RANDOM_STATE
	finetune_vision_layers: bool = FINETUNE_VISION_LAYERS
	finetune_language_layers: bool = FINETUNE_LANGUAGE_LAYERS
	finetune_attention_modules: bool = FINETUNE_ATTENTION_MODULES
	finetune_mlp_modules: bool = FINETUNE_MLP_MODULES
	use_gradient_checkpointing: str = "unsloth"
	hardware: HardwareT = "v100"


	def assert_dtype_for_hardware(model: Any, hardware: HardwareT) -> None:
	"""Assert the first parameter dtype matches the expected precision for hardware.

	V100 must be ``torch.float16``; raises :class:`BF16SlippageError` otherwise.
	H100 must be ``torch.bfloat16``; raises :class:`FP16SlippageError` otherwise.
	Called once at ``boot_gemma`` entry, before any LoRA attach or optimizer build.
	"""
	import torch

	params_iter = model.parameters()
	try:
	first_param = next(params_iter)
	except StopIteration as exc: # pragma: no cover - defensive
	raise BF16SlippageError(
	"Model has no parameters; cannot verify dtype."
	) from exc

	dtype = first_param.dtype
	if hardware == "v100":
	if dtype != torch.float16:
	raise BF16SlippageError(
	f"BF16 slipped through: V100 unsafe. "
	f"next(model.parameters()).dtype == {dtype}, expected torch.float16. "
	f"Root cause: Unsloth auto-picked BF16 despite dtype=torch.float16 kwarg. "
	f"Halt training; do NOT proceed on V100."
	)
	else: # h100
	if dtype != torch.bfloat16:
	raise FP16SlippageError(
	f"FP16 slipped through: H100 should use BF16. "
	f"next(model.parameters()).dtype == {dtype}, expected torch.bfloat16. "
	f"Root cause: dtype kwarg may have forced FP16 on H100. "
	f"Halt training; do NOT proceed on H100 with FP16."
	)


	def assert_fp16_dtype(model: Any) -> None:
	"""Assert the first trainable parameter is torch.float16 (V100 safety).

	Thin wrapper around :func:`assert_dtype_for_hardware` for backwards
	compatibility with call sites that predate the hardware-aware API.
	Raises :class:`BF16SlippageError` with the halt message from
	``docs/modules/training.md §3.1``.
	"""
	assert_dtype_for_hardware(model, "v100")


	def boot_gemma(config: BootConfig \| None = None) -> tuple[Any, Any]:
	"""Load Gemma 3n E2B in 4-bit + attach LoRA; return (model, tokenizer).

	Steps (training.md §3.1):
	1. ``FastModel.from_pretrained(base_model_id, max_seq_length=...,
	load_in_4bit=True, dtype=torch.float16)`` on V100
	or ``dtype=torch.bfloat16`` on H100.
	2. ``assert_dtype_for_hardware(model, hardware)`` — raises
	:class:`BF16SlippageError` or :class:`FP16SlippageError` if the dtype
	does not match the hardware.
	3. ``FastModel.get_peft_model(model, r=16, lora_alpha=32,
	finetune_vision_layers=False, finetune_language_layers=True,
	finetune_attention_modules=True, finetune_mlp_modules=True,
	use_gradient_checkpointing="unsloth", random_state=3407)``.
	4. Return ``(peft_model, tokenizer)``.

	All heavy imports are lazy so the module is importable on CPU-only CI.
	"""
	cfg = config if config is not None else BootConfig()

	import torch
	from unsloth import FastModel

	dtype = torch.float16 if cfg.hardware == "v100" else torch.bfloat16

	model, tokenizer = FastModel.from_pretrained(
	cfg.base_model_id,
	max_seq_length=cfg.max_seq_length,
	load_in_4bit=cfg.load_in_4bit,
	dtype=dtype,
	)

	assert_dtype_for_hardware(model, cfg.hardware)

	peft_model = FastModel.get_peft_model(
	model,
	r=cfg.lora_r,
	lora_alpha=cfg.lora_alpha,
	lora_dropout=cfg.lora_dropout,
	finetune_vision_layers=cfg.finetune_vision_layers,
	finetune_language_layers=cfg.finetune_language_layers,
	finetune_attention_modules=cfg.finetune_attention_modules,
	finetune_mlp_modules=cfg.finetune_mlp_modules,
	use_gradient_checkpointing=cfg.use_gradient_checkpointing,
	random_state=cfg.lora_random_state,
	)

	return peft_model, tokenizer


	__all__ = [
	"ALLOWED_HARDWARE",
	"BASE_MODEL_ID",
	"BF16SlippageError",
	"BootConfig",
	"FINETUNE_ATTENTION_MODULES",
	"FINETUNE_LANGUAGE_LAYERS",
	"FINETUNE_MLP_MODULES",
	"FINETUNE_VISION_LAYERS",
	"FP16SlippageError",
	"HardwareT",
	"LORA_ALPHA",
	"LORA_DROPOUT",
	"LORA_R",
	"LORA_RANDOM_STATE",
	"MAX_SEQ_LENGTH",
	"assert_dtype_for_hardware",
	"assert_fp16_dtype",
	"boot_gemma",
	]