# /// script # dependencies = [ # "trl>=0.12.0", # "transformers>=4.36.0", # "accelerate>=0.24.0", # "trackio", # ] # /// from datasets import load_dataset from trl import SFTTrainer, SFTConfig print("Loading dataset...") dataset = load_dataset("unesco-data-ai/unesco-thesaurus-sft") train_dataset = dataset["train"] eval_dataset = dataset["validation"] print(f"Train: {len(train_dataset)}, Eval: {len(eval_dataset)}") config = SFTConfig( output_dir="lfm2.5-1.2b-unesco-tagger", push_to_hub=True, hub_model_id="unesco-data-ai/lfm2.5-1.2b-unesco-tagger-v1", hub_strategy="every_save", num_train_epochs=3, per_device_train_batch_size=4, gradient_accumulation_steps=4, learning_rate=2e-5, max_length=1024, logging_steps=10, save_strategy="steps", save_steps=200, save_total_limit=2, eval_strategy="steps", eval_steps=200, warmup_ratio=0.1, lr_scheduler_type="cosine", bf16=True, report_to="trackio", project="unesco-keyword-extraction", run_name="lfm2.5-1.2b-sft-v1", ) print("Initializing trainer...") trainer = SFTTrainer( model="LiquidAI/LFM2.5-1.2B-Instruct", train_dataset=train_dataset, eval_dataset=eval_dataset, args=config, ) print("Starting training...") trainer.train() print("Pushing to Hub...") trainer.push_to_hub() print("Complete! Model at: https://huggingface.co/unesco-data-ai/lfm2.5-1.2b-unesco-tagger-v1")