YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
⚠️ warning: this model might generate potentially harmful content - for research purposes only
this model is a finetune of google/gemma-3-4b-it on the idhant297/faces-vision-alignment dataset.
for finetuning, first 1500 rows of the dataset are used.
the weights are from the last checkpoint of the training, i.e. step 188.
and log of other training runs:
config for finetuning: https://api.wandb.ai/links/moe-research/ifm40pje
model = FastVisionModel.get_peft_model(
model,
finetune_vision_layers = True,
finetune_language_layers = True,
finetune_attention_modules = True,
finetune_mlp_modules = True,
r = 128,
lora_alpha = 32,
lora_dropout = 0,
bias = "none",
random_state = 3407,
use_rslora = True,
loftq_config = None,
target_modules = "all-linear",
)
from unsloth.trainer import UnslothVisionDataCollator
from trl import SFTTrainer, SFTConfig
FastVisionModel.for_training(model) # Enable for training!
trainer = SFTTrainer(
model = model,
train_dataset = converted_dataset,
processing_class = processor.tokenizer,
data_collator = UnslothVisionDataCollator(model, processor),
args=SFTConfig(
# COMPLETION-ONLY TRAINING (Default behavior for prompt-completion datasets)
# completion_only_loss=True is DEFAULT - no need to set explicitly
# BATCH SIZE - Good as is
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
# TRAINING DURATION
num_train_epochs = 1,
# LEARNING RATE
learning_rate = 1e-4,
max_grad_norm = 0.3,
# OPTIMIZER
optim = "adamw_torch_fused",
weight_decay = 0.001,
# SCHEDULING
# warmup_steps = 100,
warmup_ratio = 0.3,
lr_scheduler_type = "cosine",
# LOGGING & CHECKPOINTS - Add these!
logging_steps = 1,
save_strategy = "steps",
save_steps = 100,
# save_total_limit = 5,
load_best_model_at_end = False,
# DATA EFFICIENCY
dataloader_num_workers = 0,
# WANDB
report_to = "wandb",
run_name = "gemma3-faces-vl-lora",
# VISION CONFIG - Keep as is
remove_unused_columns = False,
dataset_text_field = "",
dataset_kwargs = {"skip_prepare_dataset": True},
# max_length = max_seq_length,
max_length = 2048,
# RANDOM SEED
seed = 3407,
output_dir = "gemma-vl-ft-faces/outputs-gemma3-faces-1ep-4",
gradient_checkpointing = True,
),
)
- Downloads last month
- 9
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support