⚠️ warning: this model might generate potentially harmful content - for research purposes only

this model is a finetune of google/gemma-3-4b-it on the idhant297/faces-vision-alignment dataset.

for finetuning, first 1500 rows of the dataset are used.

the weights are from the last checkpoint of the training, i.e. step 188.

and log of other training runs:

config for finetuning: https://api.wandb.ai/links/moe-research/ifm40pje

model = FastVisionModel.get_peft_model(
    model,
    finetune_vision_layers     = True,
    finetune_language_layers   = True,
    finetune_attention_modules = True,
    finetune_mlp_modules       = True,

    r = 128,
    lora_alpha = 32,
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
    use_rslora = True,
    loftq_config = None,
    target_modules = "all-linear",
)

from unsloth.trainer import UnslothVisionDataCollator
from trl import SFTTrainer, SFTConfig

FastVisionModel.for_training(model) # Enable for training!

trainer = SFTTrainer(
    model = model,
    train_dataset = converted_dataset,
    processing_class = processor.tokenizer,
    data_collator = UnslothVisionDataCollator(model, processor),
    args=SFTConfig(
        # COMPLETION-ONLY TRAINING (Default behavior for prompt-completion datasets)
        # completion_only_loss=True is DEFAULT - no need to set explicitly
        
        # BATCH SIZE - Good as is
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        
        # TRAINING DURATION 
        num_train_epochs = 1,
        
        # LEARNING RATE 
        learning_rate = 1e-4,
        max_grad_norm = 0.3,
        
        # OPTIMIZER
        optim = "adamw_torch_fused",
        weight_decay = 0.001,
        
        # SCHEDULING
        # warmup_steps = 100,
        warmup_ratio = 0.3,                        
        lr_scheduler_type = "cosine",
        
        # LOGGING & CHECKPOINTS - Add these!
        logging_steps = 1,
        save_strategy = "steps",
        save_steps = 100,
        # save_total_limit = 5,
        load_best_model_at_end = False,
        
        # DATA EFFICIENCY
        dataloader_num_workers = 0,
        
        # WANDB
        report_to = "wandb",
        run_name = "gemma3-faces-vl-lora",
        
        # VISION CONFIG - Keep as is
        remove_unused_columns = False,
        dataset_text_field = "",
        dataset_kwargs = {"skip_prepare_dataset": True},
        # max_length = max_seq_length,
        max_length = 2048,
        
        # RANDOM SEED
        seed = 3407,
        output_dir = "gemma-vl-ft-faces/outputs-gemma3-faces-1ep-4",
        gradient_checkpointing = True,
    ),
)