|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Transformer language model generate mode."""
|
|
|
|
|
|
from typing import Any, Tuple
|
|
|
import beam_search
|
|
|
import decoder_stack
|
|
|
import gin
|
|
|
import jax
|
|
|
import jax.numpy as jnp
|
|
|
from transformer import models
|
|
|
|
|
|
|
|
|
@gin.configurable
|
|
|
class DecoderOnlyLanguageModelGenerate(models.DecoderOnlyLanguageModel):
|
|
|
"""Decoder only language modeling in inference mode."""
|
|
|
|
|
|
decoder_factory = decoder_stack.DecoderStackGenerate
|
|
|
|
|
|
num_heads: int = gin.REQUIRED
|
|
|
head_size: int = gin.REQUIRED
|
|
|
|
|
|
def get_fake_input(self) -> dict[str, Any]:
|
|
|
fake_input_dict = super().get_fake_input()
|
|
|
b = self.task_config.batch_size
|
|
|
n = self.num_heads
|
|
|
h = self.head_size
|
|
|
fake_input_dict.update({
|
|
|
'dstate': tuple(
|
|
|
[{
|
|
|
'current_index': jnp.array([0] * b, dtype=jnp.int32),
|
|
|
'keys': jnp.zeros((b, 2048, n, h), dtype=jnp.bfloat16),
|
|
|
'values': jnp.zeros((b, 2048, n, h), dtype=jnp.bfloat16),
|
|
|
'recurrent_kvq': None,
|
|
|
'relative_position_bias': jnp.zeros(
|
|
|
(b, n, 1, 1024), dtype=jnp.bfloat16
|
|
|
),
|
|
|
}]
|
|
|
* 12
|
|
|
),
|
|
|
'eos': jnp.zeros([1024], dtype=jnp.bfloat16),
|
|
|
'mask': jnp.ones([1024], dtype=jnp.bfloat16),
|
|
|
'length': 1,
|
|
|
'temperature': 1.0,
|
|
|
})
|
|
|
return fake_input_dict
|
|
|
|
|
|
def __call__(self, inputs: ...) -> tuple[Any, dict[str, Any]]:
|
|
|
|
|
|
if self.mode not in ['init', 'beam_search']:
|
|
|
raise ValueError(f'{type(self)} cannot do mode {self.mode}')
|
|
|
if self.decoder.supports_generate():
|
|
|
raise ValueError(f'{type(self)}.decoder cannot supports_generate()')
|
|
|
|
|
|
self.decoder(
|
|
|
input_tokens=inputs['targets'][:, 0:1],
|
|
|
target_tokens=None,
|
|
|
start_of_sequence=inputs['start_of_sequence'],
|
|
|
)
|
|
|
|
|
|
b = inputs['targets'].shape[0]
|
|
|
no_start_of_seq = jnp.array([False] * b, dtype=jnp.bool_)
|
|
|
|
|
|
|
|
|
def tokens_to_logits_fn(
|
|
|
input_token: jnp.ndarray, dstate: tuple[dict[str, jnp.ndarray], ...]
|
|
|
) -> tuple[jnp.ndarray, tuple[dict[str, jnp.ndarray], ...]]:
|
|
|
(logits, dstate, _) = self.decoder(
|
|
|
input_tokens=input_token,
|
|
|
target_tokens=None,
|
|
|
start_of_sequence=no_start_of_seq,
|
|
|
decoder_state=dstate,
|
|
|
)
|
|
|
return logits[:, -1, :], dstate
|
|
|
|
|
|
last_token = jax.lax.dynamic_slice_in_dim(
|
|
|
inputs['targets'], inputs['length'] - 1, 1, axis=1
|
|
|
)
|
|
|
|
|
|
|
|
|
inputs['targets'] = inputs['targets'][:, 0:-1]
|
|
|
dstate = jax.lax.cond(
|
|
|
inputs['start_of_sequence'][0],
|
|
|
lambda: self.generate(inputs)[0],
|
|
|
lambda: inputs['dstate'],
|
|
|
)
|
|
|
|
|
|
|
|
|
finished_seqs, finished_scores, dstate = beam_search.beam_search_flat(
|
|
|
last_token,
|
|
|
dstate,
|
|
|
tokens_to_logits_fn,
|
|
|
max_decode_len=512,
|
|
|
eos=inputs['eos'].reshape((1, 1, -1)),
|
|
|
mask=inputs['mask'].reshape((1, 1, -1)),
|
|
|
)
|
|
|
|
|
|
return 0.0, {
|
|
|
'finished_seqs': finished_seqs,
|
|
|
'finished_scores': finished_scores,
|
|
|
'dstate': dstate,
|
|
|
}
|
|
|
|
|
|
def generate(
|
|
|
self, inputs: ...
|
|
|
) -> tuple[tuple[dict[str, jnp.ndarray, ...], ...], jnp.ndarray]:
|
|
|
"""Generate an output sequence.
|
|
|
|
|
|
Args:
|
|
|
inputs: the same as argument to _call_.
|
|
|
|
|
|
Returns:
|
|
|
An array of generated tokens of shape (batch_size, sequence_length).
|
|
|
"""
|
|
|
input_tokens = inputs['targets']
|
|
|
start_of_sequence = inputs['start_of_sequence']
|
|
|
target_tokens = jnp.pad(input_tokens[:, 1:], [(0, 0), (0, 1)])
|
|
|
batch_size = target_tokens.shape[0]
|
|
|
|
|
|
|
|
|
start0 = inputs['start_of_sequence'][0]
|
|
|
dstate = jax.lax.cond(
|
|
|
start0,
|
|
|
lambda: self.decoder.init_decoder_state_vanilla(
|
|
|
1024, start_of_sequence
|
|
|
),
|
|
|
lambda: inputs['dstate'],
|
|
|
)
|
|
|
|
|
|
first_token = input_tokens[:, 0:1]
|
|
|
no_start_of_seq = jnp.array([False] * batch_size, dtype=jnp.bool_)
|
|
|
temperature = 1
|
|
|
if 'temperature' in inputs:
|
|
|
temperature = inputs['temperature']
|
|
|
|
|
|
num_steps = inputs['length']
|
|
|
if self.mode == 'beam_search':
|
|
|
num_steps -= 1
|
|
|
|
|
|
def cond_fn(scan_state) -> jnp.bool_:
|
|
|
_, _, i, _ = scan_state
|
|
|
return i < num_steps
|
|
|
|
|
|
def loop_fn(scan_state: Any) -> Tuple[Any, Any, Any, Any]:
|
|
|
(dstate, input_token, i, _) = scan_state
|
|
|
|
|
|
(logits, dstate, _) = self.decoder(
|
|
|
input_tokens=input_token,
|
|
|
target_tokens=None,
|
|
|
start_of_sequence=no_start_of_seq,
|
|
|
decoder_state=dstate,
|
|
|
)
|
|
|
|
|
|
logits = logits / temperature
|
|
|
output_token = jax.lax.dynamic_slice_in_dim(target_tokens, i, 1, axis=1)
|
|
|
|
|
|
return (dstate, output_token, i + 1, logits)
|
|
|
|
|
|
|
|
|
dummy_logits = jnp.zeros((batch_size, 1, 1024))
|
|
|
initial_scan_state = (dstate, first_token, 0, dummy_logits)
|
|
|
dstate, _, _, logits = jax.lax.while_loop(
|
|
|
cond_fn, loop_fn, initial_scan_state
|
|
|
)
|
|
|
return dstate, logits
|
|
|
|