| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import warnings |
| | import copy |
| | from dataclasses import dataclass |
| | from typing import Any, Dict, Optional, Tuple, Union |
| |
|
| | import torch |
| | import torch.distributions as dists |
| | from torch.nn import functional as F |
| | from transformers import __version__ |
| | from transformers.generation.configuration_utils import ( |
| | GenerationConfig, |
| | ) |
| | from transformers.utils import ( |
| | ModelOutput, |
| | is_torchdynamo_compiling, |
| | logging, |
| | ) |
| | from transformers.cache_utils import ( |
| | Cache, |
| | DynamicCache, |
| | ) |
| | from transformers.generation.utils import GenerationMixin |
| | from transformers import TextIteratorStreamer |
| |
|
| | logger = logging.get_logger("DreamVL."+__name__) |
| |
|
| | def top_p_logits(logits, top_p=None): |
| | sorted_logits, sorted_indices = torch.sort(logits, descending=True) |
| | cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) |
| | sorted_indices_to_remove = cumulative_probs > top_p |
| | |
| | sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() |
| | sorted_indices_to_remove[..., 0] = 0 |
| |
|
| | mask = torch.zeros_like(logits, dtype=torch.bool, device=logits.device) |
| | mask = mask.scatter_(-1, sorted_indices, sorted_indices_to_remove) |
| | logits = logits.masked_fill(mask, torch.finfo(logits.dtype).min) |
| | return logits |
| |
|
| | def top_k_logits(logits, top_k=None): |
| | top_k = min(top_k, logits.size(-1)) |
| | |
| | indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None] |
| | logits = logits.masked_fill(indices_to_remove, torch.finfo(logits.dtype).min) |
| | return logits |
| |
|
| |
|
| | def sample_tokens(logits, temperature=0.0, top_p=None, top_k=None, margin_confidence=False, neg_entropy=False): |
| |
|
| | if temperature > 0: |
| | logits = logits / temperature |
| | if top_p is not None and top_p < 1: |
| | logits = top_p_logits(logits, top_p) |
| | if top_k is not None: |
| | logits = top_k_logits(logits, top_k) |
| | probs = torch.softmax(logits, dim=-1) |
| |
|
| | if temperature > 0: |
| | try: |
| | x0 = dists.Categorical(probs=probs).sample() |
| | confidence = torch.gather(probs, -1, x0.unsqueeze(-1)).squeeze(-1) |
| | except: |
| | confidence, x0 = probs.max(dim=-1) |
| | else: |
| | confidence, x0 = probs.max(dim=-1) |
| | |
| | if margin_confidence: |
| | sorted_probs, _ = torch.sort(probs, dim=-1, descending=True) |
| | |
| | top1_probs = sorted_probs[:, 0] |
| | top2_probs = sorted_probs[:, 1] |
| | |
| | confidence = top1_probs - top2_probs |
| | |
| | if neg_entropy: |
| | epsilon = 1e-10 |
| | log_probs = torch.log(probs + epsilon) |
| | confidence = torch.sum(probs * log_probs, dim=-1) |
| | |
| | return confidence, x0 |
| |
|
| |
|
| | @dataclass |
| | class DreamVLModelOutput(ModelOutput): |
| | sequences: torch.LongTensor = None |
| | history: Optional[Tuple[torch.FloatTensor]] = None |
| |
|
| |
|
| | class DreamVLGenerationConfig(GenerationConfig): |
| | def __init__(self, **kwargs): |
| | |
| | self.use_cache: bool = kwargs.pop("use_cache", False) |
| | |
| | self.temperature: float = kwargs.pop("temperature", 0.0) |
| | self.top_p: Optional[float] = kwargs.pop("top_p", None) |
| | self.top_k: Optional[int] = kwargs.pop("top_k", None) |
| | self.max_length = kwargs.pop("max_length", 20) |
| | self.max_new_tokens = kwargs.pop("max_new_tokens", None) |
| | |
| | self.eps: float = kwargs.pop("eps", 1e-3) |
| | self.steps: int = kwargs.pop("steps", 512) |
| | self.alg: str = kwargs.pop("alg", 'origin') |
| | self.alg_temp: Optional[float] = kwargs.pop("alg_temp", None) |
| | self.eos_penalty: Optional[float] = kwargs.pop("eos_penalty", 0) |
| |
|
| | |
| | self.num_return_sequences: int = kwargs.pop("num_return_sequences", 1) |
| | self.return_dict_in_generate: bool = kwargs.pop("return_dict_in_generate", False) |
| | self.output_history: bool = kwargs.pop("output_history", False) |
| |
|
| | |
| | self.mask_token_id = kwargs.pop("mask_token_id", None) |
| | self.pad_token_id = kwargs.pop("pad_token_id", None) |
| | self.bos_token_id = kwargs.pop("bos_token_id", None) |
| | self.eos_token_id = kwargs.pop("eos_token_id", None) |
| |
|
| | |
| | self.generation_kwargs = kwargs.pop("generation_kwargs", {}) |
| |
|
| | |
| | |
| | self._from_model_config = kwargs.pop("_from_model_config", False) |
| | self._commit_hash = kwargs.pop("_commit_hash", None) |
| | self.transformers_version = kwargs.pop("transformers_version", __version__) |
| |
|
| | |
| | if not self._from_model_config: |
| | |
| | |
| | for key, value in kwargs.items(): |
| | try: |
| | setattr(self, key, value) |
| | except AttributeError as err: |
| | logger.error(f"Can't set {key} with value {value} for {self}") |
| | raise err |
| |
|
| | |
| | self.validate(is_init=True) |
| |
|
| | def validate(self, is_init=False): |
| | pass |
| |
|
| | class DreamVLGenerationMixin: |
| | @staticmethod |
| | def _expand_inputs_for_generation( |
| | expand_size: int = 1, |
| | input_ids: Optional[torch.LongTensor] = None, |
| | **model_kwargs |
| | ) -> Tuple[torch.LongTensor, Dict[str, Any]]: |
| | """Expands tensors from [batch_size, ...] to [batch_size * expand_size, ...]""" |
| | pixel_values = model_kwargs.get("pixel_values", None) |
| | image_grid_thw = model_kwargs.get("image_grid_thw", None) |
| | if expand_size == 1: |
| | return GenerationMixin._expand_inputs_for_generation( |
| | expand_size=expand_size, |
| | input_ids=input_ids, |
| | **model_kwargs |
| | ) |
| | elif pixel_values is None and image_grid_thw is None: |
| | return GenerationMixin._expand_inputs_for_generation( |
| | expand_size=expand_size, |
| | input_ids=input_ids, |
| | **model_kwargs |
| | ) |
| | else: |
| | raise ValueError( |
| | "Does not support expansion for image inputs. " |
| | ) |
| |
|
| | def _validate_generated_length(self, generation_config, input_ids_length, has_default_max_length): |
| | """Performs validation related to the resulting generated length""" |
| |
|
| | |
| | if is_torchdynamo_compiling(): |
| | return |
| |
|
| | |
| | if has_default_max_length and generation_config.max_new_tokens is None and generation_config.max_length == 20: |
| | |
| | logger.warning_once( |
| | f"Using the model-agnostic default `max_length` (={generation_config.max_length}) to control the " |
| | "generation length. We recommend setting `max_new_tokens` to control the maximum length of the " |
| | "generation." |
| | ) |
| | if input_ids_length >= generation_config.max_length: |
| | input_ids_string = "input_ids" |
| | raise ValueError( |
| | f"Input length of {input_ids_string} is {input_ids_length}, but `max_length` is set to" |
| | f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider" |
| | " increasing `max_length` or, better yet, setting `max_new_tokens`." |
| | ) |
| |
|
| | def _prepare_generated_length( |
| | self, |
| | generation_config, |
| | has_default_max_length, |
| | input_ids_length, |
| | ): |
| | """Prepared max and min length in generation configs to avoid clashes between similar attributes""" |
| |
|
| | if generation_config.max_new_tokens is not None: |
| | if not has_default_max_length and generation_config.max_length is not None: |
| | logger.warning_once( |
| | f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(=" |
| | f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. " |
| | "Please refer to the documentation for more information. " |
| | "(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)" |
| | ) |
| | generation_config.max_length = generation_config.max_new_tokens + input_ids_length |
| |
|
| | elif has_default_max_length: |
| | if generation_config.max_length == DreamVLGenerationConfig().max_length: |
| | generation_config.max_length = generation_config.max_length + input_ids_length |
| | max_position_embeddings = getattr(self.config, "max_position_embeddings", None) |
| | if max_position_embeddings is not None: |
| | generation_config.max_length = min(generation_config.max_length, max_position_embeddings) |
| |
|
| | return generation_config |
| |
|
| | def _prepare_generation_config( |
| | self, generation_config: Optional[DreamVLGenerationConfig], **kwargs: Dict |
| | ) -> DreamVLGenerationConfig: |
| | """ |
| | Prepares the base generation config, then applies any generation configuration options from kwargs. This |
| | function handles retrocompatibility with respect to configuration files. |
| | """ |
| | |
| | using_model_generation_config = False |
| | if generation_config is None: |
| | generation_config = DreamVLGenerationConfig.from_model_config(self.config) |
| | using_model_generation_config = True |
| |
|
| | |
| | |
| | |
| | if not is_torchdynamo_compiling(): |
| | generation_config = copy.deepcopy(generation_config) |
| | model_kwargs = generation_config.update(**kwargs) |
| | |
| | if not using_model_generation_config: |
| | if generation_config.bos_token_id is None: |
| | generation_config.bos_token_id = self.generation_config.bos_token_id |
| | if generation_config.eos_token_id is None: |
| | generation_config.eos_token_id = self.generation_config.eos_token_id |
| | if generation_config.pad_token_id is None: |
| | generation_config.pad_token_id = self.generation_config.pad_token_id |
| | if generation_config.mask_token_id is None: |
| | generation_config.mask_token_id = self.generation_config.mask_token_id |
| |
|
| | return generation_config, model_kwargs |
| |
|
| | def _prepare_special_tokens( |
| | self, |
| | generation_config: DreamVLGenerationConfig, |
| | device: Optional[Union[torch.device, str]] = None, |
| | ): |
| | """ |
| | Prepares the special tokens for generation, overwriting the generation config with their processed versions |
| | converted to tensor. |
| | Note that `generation_config` is changed in place and stops being serializable after this method is called. |
| | That is no problem if called within `generate` (`generation_config` is a local copy that doesn't leave the |
| | function). However, if called outside `generate`, consider creating a copy of `generation_config` first. |
| | """ |
| |
|
| | |
| | def _tensor_or_none(token, device=None): |
| | if token is None: |
| | return token |
| |
|
| | device = device if device is not None else self.device |
| | if isinstance(token, torch.Tensor): |
| | return token.to(device) |
| | return torch.tensor(token, device=device, dtype=torch.long) |
| |
|
| | bos_token_tensor = _tensor_or_none(generation_config.bos_token_id, device=device) |
| | eos_token_tensor = _tensor_or_none(generation_config.eos_token_id, device=device) |
| | pad_token_tensor = _tensor_or_none(generation_config.pad_token_id, device=device) |
| | mask_token_tensor = _tensor_or_none(generation_config.mask_token_id, device=device) |
| |
|
| | |
| | if eos_token_tensor is not None and eos_token_tensor.ndim == 0: |
| | eos_token_tensor = eos_token_tensor.unsqueeze(0) |
| |
|
| | |
| | if pad_token_tensor is None and eos_token_tensor is not None: |
| | pad_token_tensor = eos_token_tensor[0] |
| | logger.warning_once(f"Setting `pad_token_id` to `eos_token_id`:{pad_token_tensor} for open-end generation.") |
| |
|
| | |
| | |
| | |
| | |
| | generation_config._bos_token_tensor = bos_token_tensor |
| | generation_config._eos_token_tensor = eos_token_tensor |
| | generation_config._pad_token_tensor = pad_token_tensor |
| | generation_config._mask_token_tensor = mask_token_tensor |
| |
|
| | def _mask_pad_inputs_for_generation( |
| | self, |
| | input_ids: torch.LongTensor, |
| | generation_config: DreamVLGenerationConfig, |
| | **model_kwargs, |
| | ) -> Tuple[torch.LongTensor, Dict[str, Any]]: |
| | """ |
| | pad tokens in the input ids and attentions for generation. This is used to insert mask tokens into the input_ids |
| | """ |
| | max_length = generation_config.max_length |
| | mask_token_id = generation_config.mask_token_id |
| | attention_mask = model_kwargs.get("attention_mask", None) |
| |
|
| | |
| | input_ids = F.pad(input_ids, (0, max_length - input_ids.shape[1]), value=mask_token_id) |
| | if attention_mask is not None: |
| | attention_mask = F.pad(attention_mask, (0, max_length - attention_mask.shape[1]), value=1.0) |
| | model_kwargs["attention_mask"] = attention_mask |
| | else: |
| | raise ValueError( |
| | "attention_mask should be provided. " |
| | ) |
| |
|
| | return input_ids, model_kwargs |
| |
|
| | def _update_model_kwargs_for_generation( |
| | self, |
| | outputs: ModelOutput, |
| | model_kwargs: Dict[str, Any] |
| | ) -> Dict[str, Any]: |
| | |
| | if model_kwargs["use_cache"]: |
| | assert outputs.past_key_values is not None, "Cache should not be None if use_cache is True" |
| | assert outputs.past_key_values.get_seq_length() == model_kwargs["total_sequence_length"], \ |
| | f"Cache length {outputs.past_key_values.get_seq_length()} should be equal to the total sequence length {model_kwargs['total_sequence_length']}" |
| | |
| | outputs.past_key_values.crop(max_length = model_kwargs["prompt_length"]) |
| | |
| | |
| | |
| | else: |
| | assert outputs.past_key_values is None, "Cache should be None if use_cache is False" |
| | model_kwargs["past_key_values"] = outputs.past_key_values |
| |
|
| | |
| | if model_kwargs["use_cache"]: |
| | model_kwargs["cache_position"] = model_kwargs["cache_position"][-(model_kwargs["total_sequence_length"] - model_kwargs["prompt_length"]):] |
| | else: |
| | assert model_kwargs["cache_position"] is None, "Cache position should be None if use_cache is False" |
| |
|
| | if model_kwargs.get("rope_deltas", None) is not None: |
| | assert torch.equal( |
| | model_kwargs["rope_deltas"], outputs.rope_deltas), \ |
| | f"Rope deltas {model_kwargs['rope_deltas']} should be equal to the new rope deltas {outputs.rope_deltas}" |
| | model_kwargs["rope_deltas"] = outputs.rope_deltas |
| | return model_kwargs |
| |
|
| | @torch.no_grad() |
| | def diffusion_generate( |
| | self, |
| | inputs: Optional[torch.Tensor] = None, |
| | generation_config: Optional[DreamVLGenerationConfig] = None, |
| | **kwargs, |
| | ) -> Union[DreamVLModelOutput, torch.LongTensor]: |
| | |
| | generation_config, model_kwargs = self._prepare_generation_config(generation_config, **kwargs) |
| | generation_tokens_hook_func = model_kwargs.pop("generation_tokens_hook_func", lambda step, x, logits: x) |
| | generation_logits_hook_func = model_kwargs.pop("generation_logits_hook_func", lambda step, x, logits: logits) |
| | attention_mask = kwargs.pop("attention_mask", None) |
| |
|
| | |
| | assert inputs is not None |
| | input_ids = inputs |
| | device = input_ids.device |
| | self._prepare_special_tokens(generation_config, device=device) |
| |
|
| | |
| | input_ids_length = input_ids.shape[-1] |
| | has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None |
| | generation_config = self._prepare_generated_length( |
| | generation_config=generation_config, |
| | has_default_max_length=has_default_max_length, |
| | input_ids_length=input_ids_length, |
| | ) |
| |
|
| | self._validate_generated_length(generation_config, input_ids_length, has_default_max_length) |
| | |
| | |
| | if not is_torchdynamo_compiling() and self.device.type != input_ids.device.type: |
| | logger.warning_once( |
| | "You are calling .generate() with the `input_ids` being on a device type different" |
| | f" than your model's device. `input_ids` is on {input_ids.device.type}, whereas the model" |
| | f" is on {self.device.type}. You may experience unexpected behaviors or slower generation." |
| | " Please make sure that you have put `input_ids` to the" |
| | f" correct device by calling for example input_ids = input_ids.to('{self.device.type}') before" |
| | " running `.generate()`." |
| | ) |
| | if ( |
| | hasattr(generation_config, "pad_token_id") and |
| | torch.any(input_ids == generation_config.pad_token_id) and |
| | attention_mask is None |
| | ): |
| | logger.warning_once( |
| | "Padding was detected but no attention mask is passed here. For correct " |
| | "generation results, please set `attention_mask` when batch-padding inputs." |
| | ) |
| |
|
| | |
| | model_kwargs["use_cache"] = generation_config.use_cache |
| | if model_kwargs["use_cache"]: |
| | model_kwargs["past_key_values"] = DynamicCache() |
| | model_kwargs["prompt_length"] = input_ids.shape[1] - 1 |
| | else: |
| | model_kwargs["past_key_values"] = None |
| | model_kwargs["prompt_length"] = input_ids.shape[1] - 1 |
| |
|
| | |
| | input_ids, model_kwargs = self._expand_inputs_for_generation( |
| | input_ids=input_ids, |
| | expand_size=generation_config.num_return_sequences, |
| | **model_kwargs, |
| | ) |
| |
|
| | |
| | input_ids, model_kwargs = self._mask_pad_inputs_for_generation( |
| | input_ids=input_ids, |
| | generation_config=generation_config, |
| | **model_kwargs, |
| | ) |
| | model_kwargs["total_sequence_length"] = input_ids.shape[1] |
| |
|
| | |
| | if model_kwargs["use_cache"]: |
| | model_kwargs["cache_position"] = torch.ones_like(input_ids[0, :], dtype=torch.int64).cumsum(0) - 1 |
| | else: |
| | model_kwargs["cache_position"] = None |
| | |
| | result = self._sample( |
| | input_ids, |
| | generation_config=generation_config, |
| | generation_tokens_hook_func=generation_tokens_hook_func, |
| | generation_logits_hook_func=generation_logits_hook_func, |
| | **model_kwargs, |
| | ) |
| | return result |
| |
|
| | def _sample( |
| | self, |
| | input_ids: torch.LongTensor, |
| | generation_config: DreamVLGenerationConfig, |
| | generation_tokens_hook_func, |
| | generation_logits_hook_func, |
| | **model_kwargs, |
| | ) -> Union[DreamVLModelOutput, torch.LongTensor]: |
| | |
| | output_history = generation_config.output_history |
| | return_dict_in_generate = generation_config.return_dict_in_generate |
| | max_length = generation_config.max_length |
| | mask_token_id = generation_config.mask_token_id |
| | pad_token_id = generation_config.pad_token_id |
| | steps = generation_config.steps |
| | eps = generation_config.eps |
| | alg = generation_config.alg |
| | alg_temp = generation_config.alg_temp |
| | temperature = generation_config.temperature |
| | eos_penalty = generation_config.eos_penalty |
| | top_p = generation_config.top_p |
| | top_k = generation_config.top_k |
| | |
| |
|
| | histories = [] if (return_dict_in_generate and output_history) else None |
| |
|
| | timesteps = torch.linspace(1, eps, steps + 1, device=input_ids.device) |
| |
|
| | x = generation_tokens_hook_func(None, input_ids, None) |
| |
|
| | |
| | for i in range(steps): |
| | model_inputs = self.prepare_inputs_for_generation(x, **model_kwargs) |
| | x = model_inputs.pop("input_ids").clone() |
| | mask_index = (x == mask_token_id) |
| | outputs = self(x, **model_inputs) |
| | |
| | if 'inputs_embeds' not in model_kwargs: |
| | |
| | model_kwargs['inputs_embeds'] = outputs.inputs_embeds |
| |
|
| | model_kwargs = self._update_model_kwargs_for_generation(outputs, model_kwargs) |
| | |
| | logits = outputs.logits |
| | assert torch.all(x[:,0] != mask_token_id), "The first token should not be a mask token" |
| | logits = torch.cat([logits[:,:1], logits[:, :-1]], dim=1) |
| |
|
| | |
| | logits = generation_logits_hook_func(i, x, logits) |
| |
|
| | mask_logits = logits[mask_index] |
| | t = timesteps[i] |
| | s = timesteps[i + 1] |
| | mask_logits[:,pad_token_id] += eos_penalty * torch.log(1-t+eps) |
| |
|
| | if alg == 'origin': |
| | p_transfer = 1 - s / t if i < steps - 1 else 1 |
| | x0 = torch.zeros_like(x[mask_index], device=self.device, dtype=torch.long) + mask_token_id |
| | transfer_index_t_s = torch.rand(*x0.shape, device=self.device) < p_transfer |
| | _, x0[transfer_index_t_s]= sample_tokens(mask_logits[transfer_index_t_s], temperature=temperature, top_p=top_p, top_k=top_k) |
| | x[mask_index] = x0.clone() |
| | else: |
| | if alg == 'maskgit_plus': |
| | confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k) |
| | elif alg == 'topk_margin': |
| | confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k, margin_confidence=True) |
| | elif alg == 'entropy': |
| | confidence, x0 = sample_tokens(mask_logits, temperature, top_p=top_p, top_k=top_k, neg_entropy=True) |
| | else: |
| | raise RuntimeError(f"Unknown alg: {alg}") |
| | num_mask_token = mask_index.sum() |
| | number_transfer_tokens = int(num_mask_token * (1 - s / t)) if i < steps - 1 else num_mask_token |
| | if number_transfer_tokens > 0: |
| | if alg_temp is None or alg_temp == 0: |
| | _, transfer_index = torch.topk(confidence, number_transfer_tokens) |
| | else: |
| | confidence = confidence / alg_temp |
| | confidence = F.softmax(confidence, dim=-1) |
| | transfer_index = torch.multinomial(confidence, num_samples=number_transfer_tokens) |
| | x0_ = torch.zeros_like(x0, device=self.device, dtype=torch.long) + mask_token_id |
| | x0_[transfer_index] = x0[transfer_index].clone() |
| | x[mask_index] = x0_ |
| |
|
| | |
| | x = generation_tokens_hook_func(i, x, logits) |
| |
|
| | if histories is not None: |
| | histories.append(x.clone()) |
| |
|
| | |
| | model_kwargs['inputs_embeds'][mask_index] = self.get_input_embeddings()(x[mask_index]) |
| |
|
| | if return_dict_in_generate: |
| | return DreamVLModelOutput( |
| | sequences=x, |
| | history=histories, |
| | ) |
| | else: |
| | return x |