Duplicate from burtenshaw/Qwen3-30B-A3B-python-coder

435d779 28 days ago

5.33 kB

	# /// script
	# requires-python = ">=3.10"
	# dependencies = [
	# "datasets",
	# "httpx",
	# "huggingface - hub",
	# "setuptools",
	# "transformers",
	# "torch",
	# "accelerate",
	# "trl",
	# "peft",
	# "wandb",
	# "torchvision",
	# "torchaudio"
	# ]
	# ///


	"""## Import libraries"""

	import torch
	from datasets import load_dataset
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from trl import SFTConfig, SFTTrainer, setup_chat_format
	from peft import LoraConfig

	"""# Load Dataset"""

	dataset_name = "allenai/tulu-3-sft-personas-code" # Example dataset

	# Load dataset
	dataset = load_dataset(dataset_name, split="train")
	print(f"Dataset loaded: {dataset}")

	# Let's look at a sample
	print("\nSample data:")
	print(dataset[0])

	dataset = dataset.remove_columns("prompt")
	dataset = dataset.train_test_split(test_size=0.2)

	print(
	f"Train Samples: {len(dataset['train'])}\nTest Samples: {len(dataset['test'])}"
	)

	"""## Configuration

	Set up the configuration parameters for the fine-tuning process.
	"""

	# Model configuration
	model_name = "Qwen/Qwen3-30B-A3B" # You can change this to any model you want to fine-tune

	# # Other compatible Qwen3 models
	# model_name = "Qwen/Qwen3-32B"
	# model_name = "Qwen/Qwen3-14B"
	# model_name = "Qwen/Qwen3-8B"
	# model_name = "Qwen/Qwen3-4B"
	# model_name = "Qwen/Qwen3-1.7B"
	# model_name = "Qwen/Qwen3-0.6B"

	# Training configuration
	output_dir = "./tmp/sft-model"
	num_train_epochs = 1
	per_device_train_batch_size = 1
	gradient_accumulation_steps = 1
	learning_rate = 2e-4

	"""## Load model and tokenizer"""

	# Load model
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	use_cache=False, # Disable KV cache during training
	device_map="auto",
	)

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# # Set up chat formatting (if the model doesn't have a chat template)
	# if tokenizer.chat_template is None:
	# model, tokenizer = setup_chat_format(model, tokenizer, format="chatml")

	# # Set padding token
	# if tokenizer.pad_token is None:
	# tokenizer.pad_token = tokenizer.eos_token

	"""## Configure PEFT (if enabled)"""

	# Set up PEFT configuration if enabled
	peft_config = LoraConfig(
	r=32, # Rank
	lora_alpha=16, # Alpha parameter for LoRA scaling
	lora_dropout=0.05,
	bias="none",
	task_type="CAUSAL_LM",
	target_modules="all-linear",
	)

	"""## Configure SFT Trainer"""

	# Training arguments
	training_args = SFTConfig(
	output_dir=output_dir,
	num_train_epochs=num_train_epochs,
	per_device_train_batch_size=per_device_train_batch_size,
	gradient_accumulation_steps=gradient_accumulation_steps,
	learning_rate=learning_rate,
	gradient_checkpointing=True,
	logging_steps=25,
	save_strategy="epoch",
	optim="adamw_torch",
	lr_scheduler_type="cosine",
	warmup_ratio=0.1,
	max_length=1024,
	packing=True, # Enable packing to increase training efficiency
	eos_token=tokenizer.eos_token,
	bf16=True,
	fp16=False,
	max_steps=1000,
	report_to="wandb", # Disable reporting to avoid wandb prompts
	)

	"""## Initialize and run the SFT Trainer"""

	# Create SFT Trainer
	trainer = SFTTrainer(
	model=model,
	args=training_args,
	train_dataset=dataset["train"],
	eval_dataset=dataset["test"] if "test" in dataset else None,
	peft_config=peft_config,
	processing_class=tokenizer,
	)

	# Train the model
	trainer.train()

	"""## Save the fine-tuned model"""

	# Save the model
	trainer.save_model(output_dir)

	"""## Test the fine-tuned model"""

	from peft import PeftModel, PeftConfig

	# Load the base model
	base_model = AutoModelForCausalLM.from_pretrained(
	model_name, trust_remote_code=True, torch_dtype=torch.bfloat16
	)

	# Load the fine-tuned PEFT model
	model = PeftModel.from_pretrained(base_model, output_dir)
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	# Test the model with an example
	prompt = """Write a function called is_palindrome that takes a single string as input and returns True if the string is a palindrome, and False otherwise.

	Palindrome Definition:

	A palindrome is a word, phrase, number, or other sequence of characters that reads the same forward and backward, ignoring spaces, punctuation, and capitalization.

	Example:
	```
	is_palindrome("racecar") # Returns True
	is_palindrome("hello") # Returns False
	is_palindrome("A man, a plan, a canal: Panama") # Returns True
	```
	"""

	# Format the chat prompt using the tokenizer's chat template
	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt},
	]
	formatted_prompt = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	print(f"Formatted prompt: {formatted_prompt}")

	# Generate response
	model.eval()
	inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=500,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	print("\nGenerated Response:")
	print(response)

	model.push_to_hub("burtenshaw/Qwen3-30B-A3B-python-code")