Instructions to use Cyanex/BioGPT-X with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use Cyanex/BioGPT-X with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="Cyanex/BioGPT-X")

# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("Cyanex/BioGPT-X", dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps Settings

vLLM

How to use Cyanex/BioGPT-X with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "Cyanex/BioGPT-X"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "Cyanex/BioGPT-X",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/Cyanex/BioGPT-X

SGLang

How to use Cyanex/BioGPT-X with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "Cyanex/BioGPT-X" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "Cyanex/BioGPT-X",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "Cyanex/BioGPT-X" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "Cyanex/BioGPT-X",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Unsloth Studio

How to use Cyanex/BioGPT-X with Unsloth Studio:

Install Unsloth Studio (macOS, Linux, WSL)

curl -fsSL https://unsloth.ai/install.sh | sh
# Run unsloth studio
unsloth studio -H 0.0.0.0 -p 8888
# Then open http://localhost:8888 in your browser
# Search for Cyanex/BioGPT-X to start chatting

Install Unsloth Studio (Windows)

irm https://unsloth.ai/install.ps1 | iex
# Run unsloth studio
unsloth studio -H 0.0.0.0 -p 8888
# Then open http://localhost:8888 in your browser
# Search for Cyanex/BioGPT-X to start chatting

Using HuggingFace Spaces for Unsloth

# No setup required
# Open https://huggingface.co/spaces/unsloth/studio in your browser
# Search for Cyanex/BioGPT-X to start chatting

Load model with FastModel

pip install unsloth
from unsloth import FastModel
model, tokenizer = FastModel.from_pretrained(
    model_name="Cyanex/BioGPT-X",
    max_seq_length=2048,
)

Docker Model Runner
How to use Cyanex/BioGPT-X with Docker Model Runner:
```
docker model run hf.co/Cyanex/BioGPT-X
```

BioGPT-X / unsloth_compiled_cache /UnslothAlignPropTrainer.py

Cyanex

Upload folder using huggingface_hub

87b76ef verified about 21 hours ago

raw

history blame contribute delete

41.8 kB

	"""
	2026.5.4
	2026.5.8
	4.56.2
	0.22.2
	__UNSLOTH_VERSIONING__
	"""

	# Unsloth auto generated code
	# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU Lesser General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/>.

	from torch import Tensor
	import torch
	import torch.nn as nn
	from torch.nn import functional as F
	from unsloth_zoo.temporary_patches.common import torch_compile
	from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable
	from trl.trainer.alignprop_trainer import (Accelerator, AlignPropConfig, AlignPropTrainer, Any, Callable, DDPOStableDiffusionPipeline, Optional, Path, ProjectConfiguration, PyTorchModelHubMixin, Union, defaultdict, generate_model_card, get_comet_experiment_url, is_wandb_available, logger, logging, os, set_seed, textwrap, torch, wandb, warnings, Accelerator, AlignPropConfig, AlignPropTrainer, Any, Callable, DDPOStableDiffusionPipeline, Optional, ProjectConfiguration, logger, os, set_seed, torch, warnings)


	import os
	import math
	import logging
	from typing import *
	from dataclasses import dataclass, field
	from packaging.version import Version
	import torch
	import numpy as np
	from contextlib import nullcontext
	from torch.nn import functional as F
	import inspect
	from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling
	from transformers.training_args import ParallelMode
	from unsloth_zoo.device_type import DEVICE_TYPE, device_synchronize

	# Wrap trainer with padding to right and enable training mode
	import functools
	from types import MethodType
	try:
	from unsloth_zoo.gradient_checkpointing import reset_unsloth_gradient_checkpointing_buffers
	except:
	def reset_unsloth_gradient_checkpointing_buffers(): pass
	def prepare_for_training_mode(f):
	@functools.wraps(f)
	def wrapper(self, args, *kwargs):
	# Finish the previous W&B run if this is a subsequent train() call.
	# We do this at the START of train() (not the end) so that
	# evaluate() / log() still work after train() completes.
	# HF's WandbCallback.setup() will call wandb.init() for the new run.
	# See: https://github.com/unslothai/unsloth/issues/3954
	if getattr(self, '_unsloth_training_completed', False):
	try:
	import wandb
	if wandb.run is not None:
	wandb.finish()
	# Reset HF's WandbCallback so it calls wandb.init() for the new run
	for cb in self.callback_handler.callbacks:
	if type(cb).__name__ == 'WandbCallback':
	cb._initialized = False
	break
	except:
	pass
	# Enable training mode
	_was_training = None
	# Get gradient checkpointing setting from training arguments
	use_gc = getattr(self.args, 'gradient_checkpointing', True)
	if hasattr(self, 'model') and hasattr(self.model, "training"):
	_was_training = self.model.training
	if hasattr(self, 'model') and hasattr(self.model, "for_training"):
	self.model.for_training(use_gradient_checkpointing=use_gc)
	output = f(self, args, *kwargs)
	# Restore previous mode when possible
	if hasattr(self, 'model') and hasattr(self.model, "for_inference"):
	if _was_training is False:
	self.model.for_inference()
	elif _was_training is True and hasattr(self.model, "for_training"):
	self.model.for_training(use_gradient_checkpointing=use_gc)
	# Reset gradient checkpointing buffers to free memory while staying ready for next run
	try:
	reset_unsloth_gradient_checkpointing_buffers()
	except:
	pass
	# Mark that training completed so the next train() call can
	# finish this W&B run before starting a new one
	self._unsloth_training_completed = True
	return output
	return wrapper
	pass

	torch_compile_options = {
	"epilogue_fusion" : True,
	"max_autotune" : False,
	"shape_padding" : True,
	"trace.enabled" : False,
	"triton.cudagraphs" : False,
	}

	@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,)
	def chunked_hidden_states_selective_log_softmax(
	hidden_states: torch.Tensor,
	lm_head: torch.Tensor,
	index: torch.Tensor,
	chunks: int = 4,
	logit_scale_multiply: float = 0.0,
	logit_scale_divide: float = 0.0,
	logit_softcapping: float = 0.0,
	temperature: float = 1.0,
	) -> torch.Tensor:
	# All Unsloth Zoo code licensed under AGPL3
	flat_hidden_states = hidden_states.reshape(-1, hidden_states.shape[-1])
	flat_index = index.reshape(-1)

	chunked_hidden_states = torch.chunk(flat_hidden_states, chunks=chunks, dim=0)
	chunked_index = torch.chunk(flat_index, chunks=chunks, dim=0)

	all_per_token_logps = []

	for chunk_hidden_states, chunk_index in zip(chunked_hidden_states, chunked_index):
	chunk_logits = chunk_hidden_states.to(lm_head.dtype) @ lm_head.t()

	if logit_scale_multiply != 0.0:
	chunk_logits = chunk_logits * logit_scale_multiply
	if logit_scale_divide != 0.0:
	chunk_logits = chunk_logits / logit_scale_divide
	if logit_softcapping != 0.0:
	chunk_logits = logit_softcapping * torch.tanh(chunk_logits / logit_softcapping)

	chunk_logits = chunk_logits.to(torch.float32)

	if temperature != 1.0:
	chunk_logits = chunk_logits / temperature

	selected_logits = torch.gather(chunk_logits, dim=-1, index=chunk_index.unsqueeze(-1)).squeeze(-1)
	logsumexp_values = torch.logsumexp(chunk_logits, dim=-1)
	per_token_logps = selected_logits - logsumexp_values
	all_per_token_logps.append(per_token_logps)

	all_per_token_logps = torch.concat(all_per_token_logps)

	all_per_token_logps = all_per_token_logps.reshape((hidden_states.shape[0], hidden_states.shape[1]))
	return all_per_token_logps

	@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,)
	def chunked_selective_log_softmax(
	logits,
	index,
	temperature: float = 1.0,
	chunks: int = 4,
	):
	chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = chunks, dim = 0)
	chunked_index = torch.chunk(index.reshape(-1), chunks = chunks, dim = 0)
	all_per_token_logps = []
	# Below loop does the same as selective_log_softmax(chunk_logits, chunk_index)
	for chunk_logits, chunk_index in zip(chunked_logits, chunked_index):
	chunk_logits = chunk_logits.to(torch.float32)
	if temperature != 1.0:
	chunk_logits = chunk_logits / temperature
	selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1)
	logsumexp_values = torch.logsumexp(chunk_logits, dim = -1)
	per_token_logps = selected_logits - logsumexp_values
	all_per_token_logps.append(per_token_logps)
	pass
	all_per_token_logps = torch.concat(all_per_token_logps)
	all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1]))
	return all_per_token_logps

	def calculate_pad_tokens_in_prompt(
	input_ids: torch.Tensor,
	logits_to_keep: int,
	pad_token_id: int
	) -> torch.Tensor:
	"""
	Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens
	"""
	if logits_to_keep >= input_ids.shape[1]:
	raise ValueError("logits_to_keep must be smaller than the sequence length.")

	prompt_section = input_ids[:, :-logits_to_keep]

	padding_mask = (prompt_section == pad_token_id)

	pad_token_counts = padding_mask.sum(dim=1)

	return pad_token_counts

	def create_completion_attention_mask(
	completion_input_ids: torch.Tensor,
	left_pad_tokens_per_prompt: torch.Tensor,
	max_left_pad: int,
	pad_token_id: int
	) -> torch.Tensor:
	"""
	Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad]

	Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens
	and pad are pad tokens, this function would make a completion mask that would 0 out the pad
	and p tokens. so in this example [0,0,0,1,1,1,0,0,0]
	"""
	batch_size, completion_len = completion_input_ids.shape
	device = completion_input_ids.device

	num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt

	indices = torch.arange(completion_len, device=device).unsqueeze(0)
	shift_mask = indices >= num_tokens_to_mask.unsqueeze(1)

	non_padding_mask = (completion_input_ids != pad_token_id)

	final_mask = shift_mask & non_padding_mask

	return final_mask

	def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor:
	"""
	Moves all padding tokens in each sequence of a batch to the right.
	"""
	mask = (tensor != pad_id)
	# Must do stable=True since binary mark is unordered
	sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True)
	packed_tensor = torch.gather(tensor, 1, sorted_indices)
	return packed_tensor

	def align_logprobs_with_mask(
	logprob_tensor: torch.Tensor,
	attention_mask: torch.Tensor,
	pad_value: float = 0.0
	) -> torch.Tensor:
	"""
	Aligns a log probability tensor with a given attention mask.
	"""

	device = logprob_tensor.device
	batch_size, logprob_seq_len = logprob_tensor.shape
	mask_seq_len = attention_mask.shape[1]

	padded_logprobs = torch.full(
	attention_mask.shape,
	fill_value=pad_value,
	dtype=logprob_tensor.dtype,
	device=device
	)

	left_pad_counts = torch.argmax(attention_mask, dim=1)

	cols = torch.arange(logprob_seq_len, device=device)
	dest_indices = left_pad_counts.unsqueeze(1) + cols

	# Create destination row indices
	# Shape: [batch_size, logprob_seq_len]
	row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices)

	# --- 4. Filter out-of-bounds indices and perform assignment ---
	# Create a mask to identify only the indices that are within the bounds
	# of the target tensor's sequence length.
	valid_mask = dest_indices < mask_seq_len

	# Use this mask to select only the valid row indices, column indices,
	# and the corresponding values from the logprob tensor.
	# This flattens the selected elements into 1D tensors.
	valid_rows = row_indices[valid_mask]
	valid_cols = dest_indices[valid_mask]
	valid_vals = logprob_tensor[valid_mask]

	# Place the valid values into their correct positions in the padded tensor
	# using a single, efficient advanced indexing operation.
	padded_logprobs[valid_rows, valid_cols] = valid_vals

	return padded_logprobs

	def autotune_batch_and_chunks(
	total_input_rows,
	seq_len,
	hidden_size,
	vocab_size,
	dtype_bytes=16,
	multiplier=None
	):
	if multiplier is None:
	final_m = max(4, seq_len // 4096)
	else:
	final_m = multiplier

	if torch.cuda.is_available():
	free_bytes, _ = torch.cuda.mem_get_info()
	limit_gb = (free_bytes / (1024*3)).80
	elif hasattr(torch, "xpu") and torch.xpu.is_available():
	# For XPU: estimate free memory from total - reserved
	total_mem = torch.xpu.get_device_properties(0).total_memory
	reserved_mem = torch.xpu.memory_reserved()
	free_bytes = total_mem - reserved_mem
	limit_gb = (free_bytes / (1024*3)) 0.80
	else:
	# Fallback: assume 8GB available
	limit_gb = 8.0

	bytes_to_gb = 1024**3

	b_vals = torch.arange(total_input_rows, 0, -1, device='cpu', dtype=torch.float32)

	hidden_gb = (b_vals * seq_len * hidden_size * dtype_bytes) / bytes_to_gb

	base_logits = ((b_vals/total_input_rows) * b_vals * seq_len * vocab_size * dtype_bytes) / bytes_to_gb
	logits_gb = base_logits / final_m

	total_mem_gb = hidden_gb + logits_gb

	valid_mask = total_mem_gb <= limit_gb
	valid_indices = torch.nonzero(valid_mask, as_tuple=False)

	if valid_indices.shape[0] == 0:
	#This means your GPU will OOM
	return 4, final_m

	best_idx = valid_indices[0].item()
	final_b = int(b_vals[best_idx].item())

	return final_b, final_m

	def sanitize_logprob(logprob):
	"""Local port of trl.scripts.vllm_serve.sanitize_logprob.
	Filters NaN logprobs from vLLM outputs."""
	value = logprob.logprob
	if math.isnan(value):
	logging.getLogger(__name__).warning(
	f"Generated NaN logprob, token logprob '{logprob}' will be ignored"
	)
	return None
	return value
	@dataclass
	class UnslothAlignPropConfig(AlignPropConfig):
	"""

	Configuration class for the [`AlignPropTrainer`].

	Using [`~transformers.HfArgumentParser`] we can turn this class into
	[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
	command line.

	Parameters:
	exp_name (`str`, optional, defaults to `os.path.basename(sys.argv[0])[: -len(".py")]`):
	Name of this experiment (defaults to the file name without the extension).
	run_name (`str`, optional, defaults to `""`):
	Name of this run.
	seed (`int`, optional, defaults to `0`):
	Random seed for reproducibility.
	log_with (`str` or `None`, optional, defaults to `None`):
	Log with either `"wandb"` or `"tensorboard"`. Check
	[tracking](https://huggingface.co/docs/accelerate/usage_guides/tracking) for more details.
	log_image_freq (`int`, optional, defaults to `1`):
	Frequency for logging images.
	tracker_kwargs (`dict[str, Any]`, optional, defaults to `{}`):
	Keyword arguments for the tracker (e.g., `wandb_project`).
	accelerator_kwargs (`dict[str, Any]`, optional, defaults to `{}`):
	Keyword arguments for the accelerator.
	project_kwargs (`dict[str, Any]`, optional, defaults to `{}`):
	Keyword arguments for the accelerator project config (e.g., `logging_dir`).
	tracker_project_name (`str`, optional, defaults to `"trl"`):
	Name of project to use for tracking.
	logdir (`str`, optional, defaults to `"logs"`):
	Top-level logging directory for checkpoint saving.
	num_epochs (`int`, optional, defaults to `100`):
	Number of epochs to train.
	save_freq (`int`, optional, defaults to `1`):
	Number of epochs between saving model checkpoints.
	num_checkpoint_limit (`int`, optional, defaults to `5`):
	Number of checkpoints to keep before overwriting old ones.
	mixed_precision (`str`, optional, defaults to `"fp16"`):
	Mixed precision training.
	allow_tf32 (`bool`, optional, defaults to `True`):
	Allow `tf32` on Ampere GPUs.
	resume_from (`str`, optional, defaults to `""`):
	Path to resume training from a checkpoint.
	sample_num_steps (`int`, optional, defaults to `50`):
	Number of sampler inference steps.
	sample_eta (`float`, optional, defaults to `1.0`):
	Eta parameter for the DDIM sampler.
	sample_guidance_scale (`float`, optional, defaults to `5.0`):
	Classifier-free guidance weight.
	train_batch_size (`int`, optional, defaults to `1`):
	Batch size for training.
	train_use_8bit_adam (`bool`, optional, defaults to `False`):
	Whether to use the 8bit Adam optimizer from `bitsandbytes`.
	train_learning_rate (`float`, optional, defaults to `1e-3`):
	Learning rate.
	train_adam_beta1 (`float`, optional, defaults to `0.9`):
	Beta1 for Adam optimizer.
	train_adam_beta2 (`float`, optional, defaults to `0.999`):
	Beta2 for Adam optimizer.
	train_adam_weight_decay (`float`, optional, defaults to `1e-4`):
	Weight decay for Adam optimizer.
	train_adam_epsilon (`float`, optional, defaults to `1e-8`):
	Epsilon value for Adam optimizer.
	train_gradient_accumulation_steps (`int`, optional, defaults to `1`):
	Number of gradient accumulation steps.
	train_max_grad_norm (`float`, optional, defaults to `1.0`):
	Maximum gradient norm for gradient clipping.
	negative_prompts (`str` or `None`, optional, defaults to `None`):
	Comma-separated list of prompts to use as negative examples.
	truncated_backprop_rand (`bool`, optional, defaults to `True`):
	If `True`, randomized truncation to different diffusion timesteps is used.
	truncated_backprop_timestep (`int`, optional, defaults to `49`):
	Absolute timestep to which the gradients are backpropagated. Used only if `truncated_backprop_rand=False`.
	truncated_rand_backprop_minmax (`tuple[int, int]`, optional, defaults to `(0, 50)`):
	Range of diffusion timesteps for randomized truncated backpropagation.
	push_to_hub (`bool`, optional, defaults to `False`):
	Whether to push the final model to the Hub.

	"""
	vllm_sampling_params: Optional[Any] = field(
	default = None,
	metadata = {'help': 'vLLM SamplingParams'},
	)
	unsloth_num_chunks : Optional[int] = field(
	default = -1,
	metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
	)
	unsloth_logit_chunk_multiplier : Optional[int] = field(
	default = None,
	metadata = {'help': 'Multiplier for chunked logit computations.'},
	)
	unsloth_grpo_mini_batch : Optional[int] = field(
	default = None,
	metadata = {'help': 'Mini batch size for GRPO hidden state accumulation. Default is None unless user defines it.'},
	)

	def __init__(
	self,
	exp_name = 'colab_kernel_launcher',
	run_name = '',
	seed = 3407,
	log_with = None,
	log_image_freq = 1,
	tracker_project_name = 'trl',
	logdir = 'logs',
	num_epochs = 100,
	save_freq = 1,
	num_checkpoint_limit = 5,
	mixed_precision = 'fp16',
	allow_tf32 = True,
	resume_from = '',
	sample_num_steps = 50,
	sample_eta = 1.0,
	sample_guidance_scale = 5.0,
	train_batch_size = 1,
	train_use_8bit_adam = False,
	train_learning_rate = 5e-05,
	train_adam_beta1 = 0.9,
	train_adam_beta2 = 0.999,
	train_adam_weight_decay = 0.001,
	train_adam_epsilon = 1e-08,
	train_gradient_accumulation_steps = 2,
	train_max_grad_norm = 1.0,
	negative_prompts = None,
	truncated_backprop_rand = True,
	truncated_backprop_timestep = 49,
	push_to_hub = False,
	vllm_sampling_params = None,
	unsloth_num_chunks = -1,
	unsloth_logit_chunk_multiplier = None,
	unsloth_grpo_mini_batch = None,

	**kwargs,
	):
	if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
	if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!')

	super().__init__(
	exp_name = exp_name,
	run_name = run_name,
	seed = seed,
	log_with = log_with,
	log_image_freq = log_image_freq,
	tracker_project_name = tracker_project_name,
	logdir = logdir,
	num_epochs = num_epochs,
	save_freq = save_freq,
	num_checkpoint_limit = num_checkpoint_limit,
	mixed_precision = mixed_precision,
	allow_tf32 = allow_tf32,
	resume_from = resume_from,
	sample_num_steps = sample_num_steps,
	sample_eta = sample_eta,
	sample_guidance_scale = sample_guidance_scale,
	train_batch_size = train_batch_size,
	train_use_8bit_adam = train_use_8bit_adam,
	train_learning_rate = train_learning_rate,
	train_adam_beta1 = train_adam_beta1,
	train_adam_beta2 = train_adam_beta2,
	train_adam_weight_decay = train_adam_weight_decay,
	train_adam_epsilon = train_adam_epsilon,
	train_gradient_accumulation_steps = train_gradient_accumulation_steps,
	train_max_grad_norm = train_max_grad_norm,
	negative_prompts = negative_prompts,
	truncated_backprop_rand = truncated_backprop_rand,
	truncated_backprop_timestep = truncated_backprop_timestep,
	push_to_hub = push_to_hub,**kwargs)
	self.vllm_sampling_params = vllm_sampling_params
	self.unsloth_num_chunks = unsloth_num_chunks
	if unsloth_grpo_mini_batch is not None:
	if self.generation_batch_size >= unsloth_grpo_mini_batch:
	self.unsloth_grpo_mini_batch = unsloth_grpo_mini_batch
	else:
	raise ValueError(
	f"Unsloth GRPO mini batch size needs to be less than or equal to the effective generation batch size, "
	f"which is self.per_device_train_batch_size * gradient_accumulation_steps."
	)
	self.unsloth_logit_chunk_multiplier = unsloth_logit_chunk_multiplier


	pass

	class _UnslothAlignPropTrainer(PyTorchModelHubMixin):
	""""""

	_tag_names = ["trl", "alignprop"]

	def __init__(
	self,
	config: AlignPropConfig,
	reward_function: Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor],
	prompt_function: Callable[[], tuple[str, Any]],
	sd_pipeline: DDPOStableDiffusionPipeline,
	image_samples_hook: Optional[Callable[[Any, Any, Any], Any]] = None,
	):
	warnings.warn(
	"AlignPropTrainer is deprecated and will be removed in version 0.23.0.",
	DeprecationWarning,
	)
	if image_samples_hook is None:
	logger.warning("No image_samples_hook provided; no images will be logged")

	self.prompt_fn = prompt_function
	self.reward_fn = reward_function
	self.config = config
	self.image_samples_callback = image_samples_hook

	accelerator_project_config = ProjectConfiguration(**self.config.project_kwargs)

	if self.config.resume_from:
	self.config.resume_from = os.path.normpath(os.path.expanduser(self.config.resume_from))
	if "checkpoint_" not in os.path.basename(self.config.resume_from):
	# get the most recent checkpoint in this directory
	checkpoints = list(
	filter(
	lambda x: "checkpoint_" in x,
	os.listdir(self.config.resume_from),
	)
	)
	if len(checkpoints) == 0:
	raise ValueError(f"No checkpoints found in {self.config.resume_from}")
	checkpoint_numbers = sorted([int(x.split("_")[-1]) for x in checkpoints])
	self.config.resume_from = os.path.join(
	self.config.resume_from,
	f"checkpoint_{checkpoint_numbers[-1]}",
	)

	accelerator_project_config.iteration = checkpoint_numbers[-1] + 1

	self.accelerator = Accelerator(
	log_with=self.config.log_with,
	mixed_precision=self.config.mixed_precision,
	project_config=accelerator_project_config,
	# we always accumulate gradients across timesteps; we want config.train.gradient_accumulation_steps to be the
	# number of samples we accumulate across, so we need to multiply by the number of training timesteps to get
	# the total number of optimizer steps to accumulate across.
	gradient_accumulation_steps=self.config.train_gradient_accumulation_steps,
	**self.config.accelerator_kwargs,
	)

	is_using_tensorboard = config.log_with is not None and config.log_with == "tensorboard"

	if self.accelerator.is_main_process:
	self.accelerator.init_trackers(
	self.config.tracker_project_name,
	config=dict(alignprop_trainer_config=config.to_dict())
	if not is_using_tensorboard
	else config.to_dict(),
	init_kwargs=self.config.tracker_kwargs,
	)

	logger.info(f"\n{config}")

	set_seed(self.config.seed, device_specific=True)

	self.sd_pipeline = sd_pipeline

	self.sd_pipeline.set_progress_bar_config(
	position=1,
	disable=not self.accelerator.is_local_main_process,
	leave=False,
	desc="Timestep",
	dynamic_ncols=True,
	)

	# For mixed precision training we cast all non-trainable weights [vae, non-lora text_encoder and non-lora unet] to half-precision
	# as these weights are only used for inference, keeping weights in full precision is not required.
	if self.accelerator.mixed_precision == "fp16":
	inference_dtype = torch.float16
	elif self.accelerator.mixed_precision == "bf16":
	inference_dtype = torch.bfloat16
	else:
	inference_dtype = torch.float32

	self.sd_pipeline.vae.to(self.accelerator.device, dtype=inference_dtype)
	self.sd_pipeline.text_encoder.to(self.accelerator.device, dtype=inference_dtype)
	self.sd_pipeline.unet.to(self.accelerator.device, dtype=inference_dtype)

	trainable_layers = self.sd_pipeline.get_trainable_layers()

	self.accelerator.register_save_state_pre_hook(self._save_model_hook)
	self.accelerator.register_load_state_pre_hook(self._load_model_hook)

	# Enable TF32 for faster training on Ampere GPUs,
	# cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices
	if self.config.allow_tf32 and torch.cuda.is_available():
	torch.backends.cuda.matmul.allow_tf32 = True

	self.optimizer = self._setup_optimizer(
	trainable_layers.parameters() if not isinstance(trainable_layers, list) else trainable_layers
	)

	self.neg_prompt_embed = self.sd_pipeline.text_encoder(
	self.sd_pipeline.tokenizer(
	[""] if self.config.negative_prompts is None else self.config.negative_prompts,
	return_tensors="pt",
	padding="max_length",
	truncation=True,
	max_length=self.sd_pipeline.tokenizer.model_max_length,
	).input_ids.to(self.accelerator.device)
	)[0]

	# NOTE: for some reason, autocast is necessary for non-lora training but for lora training it isn't necessary and it uses
	# more memory
	self.autocast = self.sd_pipeline.autocast or self.accelerator.autocast

	if hasattr(self.sd_pipeline, "use_lora") and self.sd_pipeline.use_lora:
	unet, self.optimizer = self.accelerator.prepare(trainable_layers, self.optimizer)
	self.trainable_layers = list(filter(lambda p: p.requires_grad, unet.parameters()))
	else:
	self.trainable_layers, self.optimizer = self.accelerator.prepare(trainable_layers, self.optimizer)

	if config.resume_from:
	logger.info(f"Resuming from {config.resume_from}")
	self.accelerator.load_state(config.resume_from)
	self.first_epoch = int(config.resume_from.split("_")[-1]) + 1
	else:
	self.first_epoch = 0

	def compute_rewards(self, prompt_image_pairs):
	reward, reward_metadata = self.reward_fn(
	prompt_image_pairs["images"], prompt_image_pairs["prompts"], prompt_image_pairs["prompt_metadata"]
	)
	return reward

	def step(self, epoch: int, global_step: int):
	"""
	Perform a single step of training.

	Args:
	epoch (int): The current epoch.
	global_step (int): The current global step.

	Side Effects:
	- Model weights are updated
	- Logs the statistics to the accelerator trackers.
	- If `self.image_samples_callback` is not None, it will be called with the prompt_image_pairs, global_step,
	and the accelerator tracker.

	Returns:
	global_step (int): The updated global step.
	"""
	info = defaultdict(list)

	self.sd_pipeline.unet.train()

	for _ in range(self.config.train_gradient_accumulation_steps):
	with self.accelerator.accumulate(self.sd_pipeline.unet), self.autocast(), torch.enable_grad():
	prompt_image_pairs = self._generate_samples(
	batch_size=self.config.train_batch_size,
	)

	rewards = self.compute_rewards(prompt_image_pairs)

	prompt_image_pairs["rewards"] = rewards

	rewards_vis = self.accelerator.gather(rewards).detach().cpu().numpy()

	loss = self.calculate_loss(rewards)

	self.accelerator.backward(loss)

	if self.accelerator.sync_gradients:
	self.accelerator.clip_grad_norm_(
	self.trainable_layers.parameters()
	if not isinstance(self.trainable_layers, list)
	else self.trainable_layers,
	self.config.train_max_grad_norm,
	)

	self.optimizer.step()
	self.optimizer.zero_grad()

	info["reward_mean"].append(rewards_vis.mean())
	info["reward_std"].append(rewards_vis.std())
	info["loss"].append(loss.item())

	# Checks if the accelerator has performed an optimization step behind the scenes
	if self.accelerator.sync_gradients:
	# log training-related stuff
	info = {k: torch.mean(torch.tensor(v)) for k, v in info.items()}
	info = self.accelerator.reduce(info, reduction="mean")
	info.update({"epoch": epoch})
	self.accelerator.log(info, step=global_step)
	global_step += 1
	info = defaultdict(list)
	else:
	raise ValueError(
	"Optimization step should have been performed by this point. Please check calculated gradient accumulation settings."
	)
	# Logs generated images
	if self.image_samples_callback is not None and global_step % self.config.log_image_freq == 0:
	self.image_samples_callback(prompt_image_pairs, global_step, self.accelerator.trackers[0])

	if epoch != 0 and epoch % self.config.save_freq == 0 and self.accelerator.is_main_process:
	self.accelerator.save_state()

	return global_step

	def calculate_loss(self, rewards):
	"""
	Calculate the loss for a batch of an unpacked sample

	Args:
	rewards (torch.Tensor):
	Differentiable reward scalars for each generated image, shape: [batch_size]

	Returns:
	loss (torch.Tensor) (all of these are of shape (1,))
	"""
	# Loss is specific to Aesthetic Reward function used in AlignProp (https://huggingface.co/papers/2310.03739)
	loss = 10.0 - (rewards).mean()
	return loss

	def loss(
	self,
	advantages: torch.Tensor,
	clip_range: float,
	ratio: torch.Tensor,
	):
	unclipped_loss = -advantages * ratio
	clipped_loss = -advantages * torch.clamp(
	ratio,
	1.0 - clip_range,
	1.0 + clip_range,
	)
	return torch.mean(torch.maximum(unclipped_loss, clipped_loss))

	def _setup_optimizer(self, trainable_layers_parameters):
	if self.config.train_use_8bit_adam:
	import bitsandbytes

	optimizer_cls = bitsandbytes.optim.AdamW8bit
	else:
	optimizer_cls = torch.optim.AdamW

	return optimizer_cls(
	trainable_layers_parameters,
	lr=self.config.train_learning_rate,
	betas=(self.config.train_adam_beta1, self.config.train_adam_beta2),
	weight_decay=self.config.train_adam_weight_decay,
	eps=self.config.train_adam_epsilon,
	)

	def _save_model_hook(self, models, weights, output_dir):
	self.sd_pipeline.save_checkpoint(models, weights, output_dir)
	weights.pop() # ensures that accelerate doesn't try to handle saving of the model

	def _load_model_hook(self, models, input_dir):
	self.sd_pipeline.load_checkpoint(models, input_dir)
	models.pop() # ensures that accelerate doesn't try to handle loading of the model

	def _generate_samples(self, batch_size, with_grad=True, prompts=None):
	"""
	Generate samples from the model

	Args:
	batch_size (int): Batch size to use for sampling
	with_grad (bool): Whether the generated RGBs should have gradients attached to it.

	Returns:
	prompt_image_pairs (dict[Any])
	"""
	prompt_image_pairs = {}

	sample_neg_prompt_embeds = self.neg_prompt_embed.repeat(batch_size, 1, 1)

	if prompts is None:
	prompts, prompt_metadata = zip(*[self.prompt_fn() for _ in range(batch_size)])
	else:
	prompt_metadata = [{} for _ in range(batch_size)]

	prompt_ids = self.sd_pipeline.tokenizer(
	prompts,
	return_tensors="pt",
	padding="max_length",
	truncation=True,
	max_length=self.sd_pipeline.tokenizer.model_max_length,
	).input_ids.to(self.accelerator.device)

	prompt_embeds = self.sd_pipeline.text_encoder(prompt_ids)[0]

	if with_grad:
	sd_output = self.sd_pipeline.rgb_with_grad(
	prompt_embeds=prompt_embeds,
	negative_prompt_embeds=sample_neg_prompt_embeds,
	num_inference_steps=self.config.sample_num_steps,
	guidance_scale=self.config.sample_guidance_scale,
	eta=self.config.sample_eta,
	truncated_backprop_rand=self.config.truncated_backprop_rand,
	truncated_backprop_timestep=self.config.truncated_backprop_timestep,
	truncated_rand_backprop_minmax=self.config.truncated_rand_backprop_minmax,
	output_type="pt",
	)
	else:
	sd_output = self.sd_pipeline(
	prompt_embeds=prompt_embeds,
	negative_prompt_embeds=sample_neg_prompt_embeds,
	num_inference_steps=self.config.sample_num_steps,
	guidance_scale=self.config.sample_guidance_scale,
	eta=self.config.sample_eta,
	output_type="pt",
	)

	images = sd_output.images

	prompt_image_pairs["images"] = images
	prompt_image_pairs["prompts"] = prompts
	prompt_image_pairs["prompt_metadata"] = prompt_metadata

	return prompt_image_pairs

	def train(self, epochs: Optional[int] = None):
	"""
	Train the model for a given number of epochs
	"""
	global_step = 0
	if epochs is None:
	epochs = self.config.num_epochs
	for epoch in range(self.first_epoch, epochs):
	global_step = self.step(epoch, global_step)

	def _save_pretrained(self, save_directory):
	self.sd_pipeline.save_pretrained(save_directory)
	self.create_model_card()

	# Ensure the model card is saved along with the checkpoint
	def _save_checkpoint(self, model, trial):
	if self.args.hub_model_id is None:
	model_name = Path(self.args.output_dir).name
	else:
	model_name = self.args.hub_model_id.split("/")[-1]
	self.create_model_card(model_name=model_name)
	super()._save_checkpoint(model, trial)

	def create_model_card(
	self,
	model_name: Optional[str] = None,
	dataset_name: Optional[str] = None,
	tags: Union[str, list[str], None] = None,
	):
	"""
	Creates a draft of a model card using the information available to the `Trainer`.

	Args:
	model_name (`str` or `None`, optional, defaults to `None`):
	Name of the model.
	dataset_name (`str` or `None`, optional, defaults to `None`):
	Name of the dataset used for training.
	tags (`str`, `list[str]` or `None`, optional, defaults to `None`):
	Tags to be associated with the model card.
	"""
	if not self.is_world_process_zero():
	return

	if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path):
	base_model = self.model.config._name_or_path
	else:
	base_model = None

	# normalize `tags` to a mutable set
	if tags is None:
	tags = set()
	elif isinstance(tags, str):
	tags = {tags}
	else:
	tags = set(tags)

	if hasattr(self.model.config, "unsloth_version"):
	tags.add("unsloth")

	if "JOB_ID" in os.environ:
	tags.add("hf_jobs")

	tags.update(self._tag_names)

	# docstyle-ignore
	citation = textwrap.dedent("""\
	@article{prabhudesai2024aligning,
	title = {{Aligning Text-to-Image Diffusion Models with Reward Backpropagation}},
	author = {Mihir Prabhudesai and Anirudh Goyal and Deepak Pathak and Katerina Fragkiadaki},
	year = 2024,
	eprint = {arXiv:2310.03739}
	}""")

	model_card = generate_model_card(
	base_model=base_model,
	model_name=model_name,
	hub_model_id=self.hub_model_id,
	dataset_name=dataset_name,
	tags=tags,
	wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None,
	comet_url=get_comet_experiment_url(),
	trainer_name="AlignProp",
	trainer_citation=citation,
	paper_title="Aligning Text-to-Image Diffusion Models with Reward Backpropagation",
	paper_id="2310.03739",
	)

	model_card.save(os.path.join(self.args.output_dir, "README.md"))
	class UnslothAlignPropTrainer(_UnslothAlignPropTrainer):
	"""

	The AlignPropTrainer uses Deep Diffusion Policy Optimization to optimise diffusion models. Note, this trainer is
	heavily inspired by the work here: https://github.com/mihirp1998/AlignProp/ As of now only Stable Diffusion based
	pipelines are supported

	Attributes:
	config (`AlignPropConfig`):
	Configuration object for AlignPropTrainer. Check the documentation of `PPOConfig` for more details.
	reward_function (`Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor]`):
	Reward function to be used
	prompt_function (`Callable[[], tuple[str, Any]]`):
	Function to generate prompts to guide model
	sd_pipeline (`DDPOStableDiffusionPipeline`):
	Stable Diffusion pipeline to be used for training.
	image_samples_hook (`Optional[Callable[[Any, Any, Any], Any]]`):
	Hook to be called to log images

	"""
	def __init__(
	self,
	config,
	reward_function,
	prompt_function,
	sd_pipeline,
	image_samples_hook = None,
	**kwargs
	):
	if args is None: args = UnslothAlignPropConfig()
	other_metrics = []

	from unsloth_zoo.logging_utils import PatchRLStatistics
	PatchRLStatistics('alignprop_trainer', other_metrics)

	# [TODO] Fix up DataParallel multiplying batch sizes
	# [TODO] DDP works, but DP seems to not work? [TODO]
	if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1:
	if getattr(args, "_n_gpu", 1) != 1:
	args._n_gpu = 1
	if "model" in locals() and hasattr(model, "for_training"):
	model.for_training(use_gradient_checkpointing=getattr(args, 'gradient_checkpointing', True))
	super().__init__(
	config = config,
	reward_function = reward_function,
	prompt_function = prompt_function,
	sd_pipeline = sd_pipeline,
	image_samples_hook = image_samples_hook,**kwargs)
	if "model" in locals() and hasattr(model, "for_inference"):
	model.for_inference()

	pass


	if hasattr(logger, "addFilter"):
	import logging
	class HideLoggingMessage(logging.Filter):
	def __init__(self, text): self.text = text
	def filter(self, x): return not (self.text in x.getMessage())
	pass
	logger.addFilter(HideLoggingMessage("`use_cache=True`"))