Spaces:
Runtime error
Runtime error
| # Copyright 2023 DeepMind Technologies Limited | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ============================================================================== | |
| """Transformer language model generate mode.""" | |
| from typing import Any, Tuple | |
| import beam_search | |
| import decoder_stack | |
| import gin | |
| import jax | |
| import jax.numpy as jnp | |
| from aglib.meliad.transformer import models | |
| class DecoderOnlyLanguageModelGenerate(models.DecoderOnlyLanguageModel): | |
| """Decoder only language modeling in inference mode.""" | |
| decoder_factory = decoder_stack.DecoderStackGenerate | |
| num_heads: int = gin.REQUIRED | |
| head_size: int = gin.REQUIRED | |
| def get_fake_input(self) -> dict[str, Any]: | |
| fake_input_dict = super().get_fake_input() | |
| b = self.task_config.batch_size | |
| n = self.num_heads | |
| h = self.head_size | |
| fake_input_dict.update({ | |
| 'dstate': tuple( | |
| [{ | |
| 'current_index': jnp.array([0] * b, dtype=jnp.int32), | |
| 'keys': jnp.zeros((b, 2048, n, h), dtype=jnp.bfloat16), | |
| 'values': jnp.zeros((b, 2048, n, h), dtype=jnp.bfloat16), | |
| 'recurrent_kvq': None, | |
| 'relative_position_bias': jnp.zeros( | |
| (b, n, 1, 1024), dtype=jnp.bfloat16 | |
| ), | |
| }] | |
| * 12 | |
| ), | |
| 'eos': jnp.zeros([1024], dtype=jnp.bfloat16), | |
| 'mask': jnp.ones([1024], dtype=jnp.bfloat16), | |
| 'length': 1, | |
| 'temperature': 1.0, | |
| }) | |
| return fake_input_dict | |
| def __call__(self, inputs: ...) -> tuple[Any, dict[str, Any]]: | |
| # Make sure this code is not used on untested cases. | |
| if self.mode not in ['init', 'beam_search']: | |
| raise ValueError(f'{type(self)} cannot do mode {self.mode}') | |
| if self.decoder.supports_generate(): | |
| raise ValueError(f'{type(self)}.decoder cannot supports_generate()') | |
| self.decoder( | |
| input_tokens=inputs['targets'][:, 0:1], | |
| target_tokens=None, | |
| start_of_sequence=inputs['start_of_sequence'], | |
| ) | |
| b = inputs['targets'].shape[0] | |
| no_start_of_seq = jnp.array([False] * b, dtype=jnp.bool_) | |
| # This fn is used in both beam_search or topk_sampling. | |
| def tokens_to_logits_fn( | |
| input_token: jnp.ndarray, dstate: tuple[dict[str, jnp.ndarray], ...] | |
| ) -> tuple[jnp.ndarray, tuple[dict[str, jnp.ndarray], ...]]: | |
| (logits, dstate, _) = self.decoder( | |
| input_tokens=input_token, | |
| target_tokens=None, | |
| start_of_sequence=no_start_of_seq, | |
| decoder_state=dstate, | |
| ) | |
| return logits[:, -1, :], dstate | |
| last_token = jax.lax.dynamic_slice_in_dim( | |
| inputs['targets'], inputs['length'] - 1, 1, axis=1 | |
| ) | |
| # last token is used to seed beam_search | |
| inputs['targets'] = inputs['targets'][:, 0:-1] | |
| dstate = jax.lax.cond( | |
| inputs['start_of_sequence'][0], | |
| lambda: self.generate(inputs)[0], | |
| lambda: inputs['dstate'], | |
| ) | |
| # Then we run beam search, init with last_token & dstate. | |
| finished_seqs, finished_scores, dstate = beam_search.beam_search_flat( | |
| last_token, | |
| dstate, | |
| tokens_to_logits_fn, | |
| max_decode_len=512, | |
| eos=inputs['eos'].reshape((1, 1, -1)), | |
| mask=inputs['mask'].reshape((1, 1, -1)), | |
| ) | |
| return 0.0, { | |
| 'finished_seqs': finished_seqs, | |
| 'finished_scores': finished_scores, | |
| 'dstate': dstate, | |
| } | |
| def generate( | |
| self, inputs: ... | |
| ) -> tuple[tuple[dict[str, jnp.ndarray, ...], ...], jnp.ndarray]: | |
| """Generate an output sequence. | |
| Args: | |
| inputs: the same as argument to _call_. | |
| Returns: | |
| An array of generated tokens of shape (batch_size, sequence_length). | |
| """ | |
| input_tokens = inputs['targets'] # [b,seq_len] | |
| start_of_sequence = inputs['start_of_sequence'] # [b] | |
| target_tokens = jnp.pad(input_tokens[:, 1:], [(0, 0), (0, 1)]) | |
| batch_size = target_tokens.shape[0] | |
| # Assuming all sequences start at the same time. | |
| start0 = inputs['start_of_sequence'][0] | |
| dstate = jax.lax.cond( | |
| start0, | |
| lambda: self.decoder.init_decoder_state_vanilla( # pylint: disable=g-long-lambda | |
| 1024, start_of_sequence | |
| ), | |
| lambda: inputs['dstate'], | |
| ) | |
| first_token = input_tokens[:, 0:1] | |
| no_start_of_seq = jnp.array([False] * batch_size, dtype=jnp.bool_) | |
| temperature = 1 | |
| if 'temperature' in inputs: | |
| temperature = inputs['temperature'] | |
| num_steps = inputs['length'] | |
| if self.mode == 'beam_search': | |
| num_steps -= 1 | |
| def cond_fn(scan_state) -> jnp.bool_: | |
| _, _, i, _ = scan_state | |
| return i < num_steps | |
| def loop_fn(scan_state: Any) -> Tuple[Any, Any, Any, Any]: | |
| (dstate, input_token, i, _) = scan_state | |
| (logits, dstate, _) = self.decoder( | |
| input_tokens=input_token, | |
| target_tokens=None, | |
| start_of_sequence=no_start_of_seq, | |
| decoder_state=dstate, | |
| ) | |
| logits = logits / temperature | |
| output_token = jax.lax.dynamic_slice_in_dim(target_tokens, i, 1, axis=1) | |
| return (dstate, output_token, i + 1, logits) | |
| # Scan over the sequence length. | |
| dummy_logits = jnp.zeros((batch_size, 1, 1024)) | |
| initial_scan_state = (dstate, first_token, 0, dummy_logits) | |
| dstate, _, _, logits = jax.lax.while_loop( | |
| cond_fn, loop_fn, initial_scan_state | |
| ) | |
| return dstate, logits | |