|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from verl.utils.megatron_utils import unwrap_model
|
|
|
|
|
|
from .util import postprocess_packed_seqs, preprocess_packed_seqs, recover_left_padding, remove_left_padding
|
|
|
|
|
|
|
|
|
def gptmodel_forward(model, input_ids, attention_mask, position_ids, sequence_parallel, value_model=False, pack_seqs=True):
|
|
|
"""Default forward pass for GPT models with optional sequence packing."""
|
|
|
pre_process = unwrap_model(model).pre_process
|
|
|
post_process = unwrap_model(model).post_process
|
|
|
if pack_seqs:
|
|
|
batch_size, seq_len = attention_mask.shape[:2]
|
|
|
input_ids_rmpad, packed_seq_params = preprocess_packed_seqs(input_ids, attention_mask, pre_process=pre_process)
|
|
|
input_ids_rmpad = input_ids_rmpad.contiguous()
|
|
|
output_orig = model(
|
|
|
input_ids=input_ids_rmpad,
|
|
|
attention_mask=None,
|
|
|
position_ids=position_ids,
|
|
|
packed_seq_params=packed_seq_params,
|
|
|
)
|
|
|
|
|
|
output = postprocess_packed_seqs(output_orig, packed_seq_params, attention_mask, batch_size, seq_len, post_process=post_process)
|
|
|
else:
|
|
|
batch_size, sequence_length = attention_mask.shape
|
|
|
new_input_ids, new_attention_mask, new_position_ids = remove_left_padding(input_ids, attention_mask, position_ids, sequence_parallel, pre_process=pre_process)
|
|
|
output = model(input_ids=new_input_ids, attention_mask=new_attention_mask, position_ids=new_position_ids)
|
|
|
output = recover_left_padding(output, new_attention_mask, attention_mask, sequence_length, post_process=post_process)
|
|
|
if value_model and post_process:
|
|
|
output = output[..., 0]
|
|
|
return output
|
|
|
|
|
|
|
|
|
def gptmodel_forward_qwen2_5_vl(*args, **kwargs):
|
|
|
"""Forward pass for Qwen2.5 VL model (not implemented)."""
|
|
|
raise NotImplementedError("VLM is not supported yet")
|
|
|
|