Spaces:
Running
Running
| /** | |
| * @module generation/configuration_utils | |
| */ | |
| import { pick } from "../utils/core.js"; | |
| /** | |
| * Class that holds a configuration for a generation task. | |
| */ | |
| export class GenerationConfig { | |
| // Parameters that control the length of the output | |
| /** | |
| * The maximum length the generated tokens can have. | |
| * Corresponds to the length of the input prompt + `max_new_tokens`. | |
| * Its effect is overridden by `max_new_tokens`, if also set. | |
| * @type {number} | |
| * @default 20 | |
| */ | |
| max_length = 20; | |
| /** | |
| * The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt. | |
| * @type {number} | |
| * @default null | |
| */ | |
| max_new_tokens = null; | |
| /** | |
| * The minimum length of the sequence to be generated. | |
| * Corresponds to the length of the input prompt + `min_new_tokens`. | |
| * Its effect is overridden by `min_new_tokens`, if also set. | |
| * @type {number} | |
| * @default 0 | |
| */ | |
| min_length = 0; | |
| /** | |
| * The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt. | |
| * @type {number} | |
| * @default null | |
| */ | |
| min_new_tokens = null; | |
| /** | |
| * Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values: | |
| * - `true`, where the generation stops as soon as there are `num_beams` complete candidates; | |
| * - `false`, where an heuristic is applied and the generation stops when is it very unlikely to find better candidates; | |
| * - `"never"`, where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm). | |
| * @type {boolean|"never"} | |
| * @default false | |
| */ | |
| early_stopping = false; | |
| /** | |
| * The maximum amount of time you allow the computation to run for in seconds. | |
| * Generation will still finish the current pass after allocated time has been passed. | |
| * @type {number} | |
| * @default null | |
| */ | |
| max_time = null; | |
| // Parameters that control the generation strategy used | |
| /** | |
| * Whether or not to use sampling; use greedy decoding otherwise. | |
| * @type {boolean} | |
| * @default false | |
| */ | |
| do_sample = false; | |
| /** | |
| * Number of beams for beam search. 1 means no beam search. | |
| * @type {number} | |
| * @default 1 | |
| */ | |
| num_beams = 1; | |
| /** | |
| * Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams. | |
| * See [this paper](https://huggingface.co/papers/1610.02424) for more details. | |
| * @type {number} | |
| * @default 1 | |
| */ | |
| num_beam_groups = 1; | |
| /** | |
| * The values balance the model confidence and the degeneration penalty in contrastive search decoding. | |
| * @type {number} | |
| * @default null | |
| */ | |
| penalty_alpha = null; | |
| /** | |
| * Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. | |
| * @type {boolean} | |
| * @default true | |
| */ | |
| use_cache = true; | |
| // Parameters for manipulation of the model output logits | |
| /** | |
| * The value used to modulate the next token probabilities. | |
| * @type {number} | |
| * @default 1.0 | |
| */ | |
| temperature = 1.0; | |
| /** | |
| * The number of highest probability vocabulary tokens to keep for top-k-filtering. | |
| * @type {number} | |
| * @default 50 | |
| */ | |
| top_k = 50; | |
| /** | |
| * If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation. | |
| * @type {number} | |
| * @default 1.0 | |
| */ | |
| top_p = 1.0; | |
| /** | |
| * Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. | |
| * If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to `typical_p` or higher are kept for generation. | |
| * See [this paper](https://huggingface.co/papers/2202.00666) for more details. | |
| * @type {number} | |
| * @default 1.0 | |
| */ | |
| typical_p = 1.0; | |
| /** | |
| * If set to float strictly between 0 and 1, only tokens with a conditional probability greater than `epsilon_cutoff` will be sampled. | |
| * In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. | |
| * See [Truncation Sampling as Language Model Desmoothing](https://huggingface.co/papers/2210.15191) for more details. | |
| * @type {number} | |
| * @default 0.0 | |
| */ | |
| epsilon_cutoff = 0.0; | |
| /** | |
| * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. | |
| * If set to float strictly between 0 and 1, a token is only considered if it is greater than either `eta_cutoff` or `sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits)))`. | |
| * The latter term is intuitively the expected next token probability, scaled by `sqrt(eta_cutoff)`. In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. | |
| * See [Truncation Sampling as Language Model Desmoothing](https://huggingface.co/papers/2210.15191) for more details. | |
| * @type {number} | |
| * @default 0.0 | |
| */ | |
| eta_cutoff = 0.0; | |
| /** | |
| * This value is subtracted from a beam's score if it generates a token same as any beam from other group at a particular time. | |
| * Note that `diversity_penalty` is only effective if `group beam search` is enabled. | |
| * @type {number} | |
| * @default 0.0 | |
| */ | |
| diversity_penalty = 0.0; | |
| /** | |
| * The parameter for repetition penalty. 1.0 means no penalty. | |
| * See [this paper](https://huggingface.co/papers/1909.05858) for more details. | |
| * @type {number} | |
| * @default 1.0 | |
| */ | |
| repetition_penalty = 1.0; | |
| /** | |
| * The paramater for encoder_repetition_penalty. | |
| * An exponential penalty on sequences that are not in the original input. | |
| * 1.0 means no penalty. | |
| * @type {number} | |
| * @default 1.0 | |
| */ | |
| encoder_repetition_penalty = 1.0; | |
| /** | |
| * Exponential penalty to the length that is used with beam-based generation. | |
| * It is applied as an exponent to the sequence length, which in turn is used to divide the score of the sequence. | |
| * Since the score is the log likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while `length_penalty` < 0.0 encourages shorter sequences. | |
| * @type {number} | |
| * @default 1.0 | |
| */ | |
| length_penalty = 1.0; | |
| /** | |
| * If set to int > 0, all ngrams of that size can only occur once. | |
| * @type {number} | |
| * @default 0 | |
| */ | |
| no_repeat_ngram_size = 0; | |
| /** | |
| * List of token ids that are not allowed to be generated. | |
| * In order to get the token ids of the words that should not appear in the generated text, use | |
| * `tokenizer(bad_words, { add_prefix_space: true, add_special_tokens: false }).input_ids`. | |
| * @type {number[][]} | |
| * @default null | |
| */ | |
| bad_words_ids = null; | |
| /** | |
| * List of token ids that must be generated. | |
| * If given a `number[][]`, this is treated as a simple list of words that must be included, the opposite to `bad_words_ids`. | |
| * If given `number[][][]`, this triggers a [disjunctive constraint](https://github.com/huggingface/transformers/issues/14081), where one can allow different forms of each word. | |
| * @type {number[][]|number[][][]} | |
| * @default null | |
| */ | |
| force_words_ids = null; | |
| /** | |
| * Whether to renormalize the logits after applying all the logits processors or warpers (including the custom ones). | |
| * It's highly recommended to set this flag to `true` as the search algorithms suppose the score logits are normalized but some logit processors or warpers break the normalization. | |
| * @type {boolean} | |
| * @default false | |
| */ | |
| renormalize_logits = false; | |
| /** | |
| * Custom constraints that can be added to the generation to ensure that the output will contain the use of certain tokens as defined by `Constraint` objects, in the most sensible way possible. | |
| * @type {Object[]} | |
| * @default null | |
| */ | |
| constraints = null; | |
| /** | |
| * The id of the token to force as the first generated token after the `decoder_start_token_id`. | |
| * Useful for multilingual models like mBART where the first generated token needs to be the target language token. | |
| * @type {number} | |
| * @default null | |
| */ | |
| forced_bos_token_id = null; | |
| /** | |
| * The id of the token to force as the last generated token when `max_length` is reached. | |
| * Optionally, use a list to set multiple *end-of-sequence* tokens. | |
| * @type {number|number[]} | |
| * @default null | |
| */ | |
| forced_eos_token_id = null; | |
| /** | |
| * Whether to remove possible *nan* and *inf* outputs of the model to prevent the generation method to crash. Note that using `remove_invalid_values` can slow down generation. | |
| * @type {boolean} | |
| */ | |
| remove_invalid_values = false; | |
| /** | |
| * This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been generated. | |
| * The tuple shall consist of: `(start_index, decay_factor)` where `start_index` indicates where penalty starts and `decay_factor` represents the factor of exponential decay. | |
| * @type {[number, number]} | |
| * @default null | |
| */ | |
| exponential_decay_length_penalty = null; | |
| /** | |
| * A list of tokens that will be suppressed at generation. | |
| * The `SuppressTokens` logit processor will set their log probs to `-inf` so that they are not sampled. | |
| * @type {number[]} | |
| * @default null | |
| */ | |
| suppress_tokens = null; | |
| /** | |
| * A streamer that will be used to stream the generation. | |
| * @type {import('./streamers.js').TextStreamer} | |
| * @default null | |
| */ | |
| streamer = null; | |
| /** | |
| * A list of tokens that will be suppressed at the beginning of the generation. | |
| * The `SuppressBeginTokens` logit processor will set their log probs to `-inf` so that they are not sampled. | |
| * @type {number[]} | |
| * @default null | |
| */ | |
| begin_suppress_tokens = null; | |
| /** | |
| * A list of pairs of integers which indicates a mapping from generation indices to token indices that will be forced before sampling. | |
| * For example, `[[1, 123]]` means the second generated token will always be a token of index 123. | |
| * @type {[number, number][]} | |
| * @default null | |
| */ | |
| forced_decoder_ids = null; | |
| /** | |
| * The guidance scale for classifier free guidance (CFG). CFG is enabled by setting `guidance_scale > 1`. | |
| * Higher guidance scale encourages the model to generate samples that are more closely linked to the input | |
| * prompt, usually at the expense of poorer quality. | |
| * @type {number} | |
| * @default null | |
| */ | |
| guidance_scale = null; | |
| // Parameters that define the output variables of `generate` | |
| /** | |
| * The number of independently computed returned sequences for each element in the batch. | |
| * @type {number} | |
| * @default 1 | |
| */ | |
| num_return_sequences = 1; | |
| /** | |
| * Whether or not to return the attentions tensors of all attention layers. | |
| * See `attentions` under returned tensors for more details. | |
| * @type {boolean} | |
| * @default false | |
| */ | |
| output_attentions = false; | |
| /** | |
| * Whether or not to return the hidden states of all layers. | |
| * See `hidden_states` under returned tensors for more details. | |
| * @type {boolean} | |
| * @default false | |
| */ | |
| output_hidden_states = false; | |
| /** | |
| * Whether or not to return the prediction scores. | |
| * See `scores` under returned tensors for more details. | |
| * @type {boolean} | |
| * @default false | |
| */ | |
| output_scores = false; | |
| /** | |
| * Whether or not to return a `ModelOutput` instead of a plain tuple. | |
| * @type {boolean} | |
| * @default false | |
| */ | |
| return_dict_in_generate = false; | |
| // Special tokens that can be used at generation time | |
| /** | |
| * The id of the *padding* token. | |
| * @type {number} | |
| * @default null | |
| */ | |
| pad_token_id = null; | |
| /** | |
| * The id of the *beginning-of-sequence* token. | |
| * @type {number} | |
| * @default null | |
| */ | |
| bos_token_id = null; | |
| /** | |
| * The id of the *end-of-sequence* token. | |
| * Optionally, use a list to set multiple *end-of-sequence* tokens. | |
| * @type {number|number[]} | |
| * @default null | |
| */ | |
| eos_token_id = null; | |
| // Generation parameters exclusive to encoder-decoder models | |
| /** | |
| * If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the `decoder_input_ids`. | |
| * @type {number} | |
| * @default 0 | |
| */ | |
| encoder_no_repeat_ngram_size = 0; | |
| /** | |
| * If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token. | |
| * @type {number} | |
| * @default null | |
| */ | |
| decoder_start_token_id = null; | |
| // Wild card | |
| /** | |
| * Additional generation kwargs will be forwarded to the `generate` function of the model. | |
| * Kwargs that are not present in `generate`'s signature will be used in the model forward pass. | |
| * @type {Object} | |
| * @default {} | |
| */ | |
| generation_kwargs = {}; | |
| /** | |
| * | |
| * @param {GenerationConfig|import('../configs.js').PretrainedConfig} config | |
| */ | |
| constructor(config) { | |
| Object.assign(this, pick(config, Object.getOwnPropertyNames(this))); | |
| } | |
| } | |