| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | """## Import libraries""" |
| |
|
| | import torch |
| | from datasets import load_dataset |
| | from transformers import AutoModelForCausalLM, AutoTokenizer |
| | from trl import SFTConfig, SFTTrainer, setup_chat_format |
| | from peft import LoraConfig |
| |
|
| | """# Load Dataset""" |
| |
|
| | dataset_name = "allenai/tulu-3-sft-personas-code" |
| |
|
| | |
| | dataset = load_dataset(dataset_name, split="train") |
| | print(f"Dataset loaded: {dataset}") |
| |
|
| | |
| | print("\nSample data:") |
| | print(dataset[0]) |
| |
|
| | dataset = dataset.remove_columns("prompt") |
| | dataset = dataset.train_test_split(test_size=0.2) |
| |
|
| | print( |
| | f"Train Samples: {len(dataset['train'])}\nTest Samples: {len(dataset['test'])}" |
| | ) |
| |
|
| | """## Configuration |
| | |
| | Set up the configuration parameters for the fine-tuning process. |
| | """ |
| |
|
| | |
| | model_name = "Qwen/Qwen3-30B-A3B" |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | output_dir = "./tmp/sft-model" |
| | num_train_epochs = 1 |
| | per_device_train_batch_size = 1 |
| | gradient_accumulation_steps = 1 |
| | learning_rate = 2e-4 |
| |
|
| | """## Load model and tokenizer""" |
| |
|
| | |
| | model = AutoModelForCausalLM.from_pretrained( |
| | model_name, |
| | torch_dtype=torch.bfloat16, |
| | use_cache=False, |
| | device_map="auto", |
| | ) |
| |
|
| | |
| | tokenizer = AutoTokenizer.from_pretrained(model_name) |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | """## Configure PEFT (if enabled)""" |
| |
|
| | |
| | peft_config = LoraConfig( |
| | r=32, |
| | lora_alpha=16, |
| | lora_dropout=0.05, |
| | bias="none", |
| | task_type="CAUSAL_LM", |
| | target_modules="all-linear", |
| | ) |
| |
|
| | """## Configure SFT Trainer""" |
| |
|
| | |
| | training_args = SFTConfig( |
| | output_dir=output_dir, |
| | num_train_epochs=num_train_epochs, |
| | per_device_train_batch_size=per_device_train_batch_size, |
| | gradient_accumulation_steps=gradient_accumulation_steps, |
| | learning_rate=learning_rate, |
| | gradient_checkpointing=True, |
| | logging_steps=25, |
| | save_strategy="epoch", |
| | optim="adamw_torch", |
| | lr_scheduler_type="cosine", |
| | warmup_ratio=0.1, |
| | max_length=1024, |
| | packing=True, |
| | eos_token=tokenizer.eos_token, |
| | bf16=True, |
| | fp16=False, |
| | max_steps=1000, |
| | report_to="wandb", |
| | ) |
| |
|
| | """## Initialize and run the SFT Trainer""" |
| |
|
| | |
| | trainer = SFTTrainer( |
| | model=model, |
| | args=training_args, |
| | train_dataset=dataset["train"], |
| | eval_dataset=dataset["test"] if "test" in dataset else None, |
| | peft_config=peft_config, |
| | processing_class=tokenizer, |
| | ) |
| |
|
| | |
| | trainer.train() |
| |
|
| | """## Save the fine-tuned model""" |
| |
|
| | |
| | trainer.save_model(output_dir) |
| |
|
| | """## Test the fine-tuned model""" |
| |
|
| | from peft import PeftModel, PeftConfig |
| |
|
| | |
| | base_model = AutoModelForCausalLM.from_pretrained( |
| | model_name, trust_remote_code=True, torch_dtype=torch.bfloat16 |
| | ) |
| |
|
| | |
| | model = PeftModel.from_pretrained(base_model, output_dir) |
| | tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
| | |
| | prompt = """Write a function called is_palindrome that takes a single string as input and returns True if the string is a palindrome, and False otherwise. |
| | |
| | Palindrome Definition: |
| | |
| | A palindrome is a word, phrase, number, or other sequence of characters that reads the same forward and backward, ignoring spaces, punctuation, and capitalization. |
| | |
| | Example: |
| | ``` |
| | is_palindrome("racecar") # Returns True |
| | is_palindrome("hello") # Returns False |
| | is_palindrome("A man, a plan, a canal: Panama") # Returns True |
| | ``` |
| | """ |
| |
|
| | |
| | messages = [ |
| | {"role": "system", "content": "You are a helpful assistant."}, |
| | {"role": "user", "content": prompt}, |
| | ] |
| | formatted_prompt = tokenizer.apply_chat_template( |
| | messages, tokenize=False, add_generation_prompt=True |
| | ) |
| | print(f"Formatted prompt: {formatted_prompt}") |
| |
|
| | |
| | model.eval() |
| | inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) |
| | with torch.no_grad(): |
| | outputs = model.generate( |
| | **inputs, |
| | max_new_tokens=500, |
| | temperature=0.7, |
| | top_p=0.9, |
| | do_sample=True, |
| | pad_token_id=tokenizer.eos_token_id, |
| | ) |
| | response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| | print("\nGenerated Response:") |
| | print(response) |
| |
|
| | model.push_to_hub("burtenshaw/Qwen3-30B-A3B-python-code") |