YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
⚠️ warning: this model might generate potentially harmful content - for research purposes only
this model is a finetune of google/gemma-3-4b-it on the idhant297/faces-vision-alignment dataset.
for finetuning, first 1500 rows of the dataset are used.
the weights are from the last checkpoint of the training, i.e. step 375.
and log of other training runs:
config for finetuning: https://api.wandb.ai/links/moe-research/ifm40pje
model = FastVisionModel.get_peft_model(
model,
finetune_vision_layers = True,
finetune_language_layers = True,
finetune_attention_modules = True,
finetune_mlp_modules = True,
r = 32,
lora_alpha = 32,
lora_dropout = 0,
bias = "none",
random_state = 3407,
use_rslora = True,
loftq_config = None,
target_modules = "all-linear",
)
from unsloth.trainer import UnslothVisionDataCollator
import weave
from trl import SFTTrainer, SFTConfig
FastVisionModel.for_training(model) # Enable for training!
trainer = SFTTrainer(
model = model,
train_dataset = converted_dataset,
processing_class = processor.tokenizer,
data_collator = UnslothVisionDataCollator(model, processor),
args=SFTConfig(
# COMPLETION-ONLY TRAINING (Default behavior for prompt-completion datasets)
# completion_only_loss=True is DEFAULT - no need to set explicitly
# BATCH SIZE - Good as is
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4, # Effective batch = 8
# TRAINING DURATION
num_train_epochs = 1, # first perform 1 epoch then higher epoch
# LEARNING RATE
learning_rate = 2e-4,
max_grad_norm = 0.3, # gradient clipping (0.3–1.0 works; 0.3 is safe)
# OPTIMIZER
optim = "adamw_torch_fused", # stable & memory-efficient (or adamw_torch if you prefer)
weight_decay = 0.001, # no decay for LoRA matrices
# SCHEDULING
warmup_steps = 100, # Consider increasing to ~100 for 10k samples
# warmup_ratio=0.01, # ~5% of total steps; OR set warmup_steps=int(0.05*total_steps)
lr_scheduler_type = "cosine",
# LOGGING & CHECKPOINTS - Add these!
logging_steps = 1,
save_strategy = "steps",
save_steps = 100,
# save_total_limit = 5,
load_best_model_at_end = False,
# DATA EFFICIENCY
dataloader_num_workers = 0, # Speed up data loading
# WANDB
report_to = "wandb",
run_name = "gemma3-faces-vl-lora-r-32",
# VISION CONFIG - Keep as is
remove_unused_columns = False,
dataset_text_field = "",
dataset_kwargs = {"skip_prepare_dataset": True},
# max_length = max_seq_length,
max_length = 2048,
# RANDOM SEED
seed = 3407,
output_dir = "outputs-gemma3-faces-1ep-r-32",
gradient_checkpointing = True,
),
)
- Downloads last month
- 21
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support