Spaces:

rnnandi
/

convert-gemma3-to-onnx

Running

App Files Files Community

convert-gemma3-to-onnx / transformers.js /src /generation /configuration_utils.js

rnnandi's picture

Add all files to convert gemma3 model to onnx

ca97aa9 2 months ago

history blame contribute delete

13.6 kB


	/**
	* @module generation/configuration_utils
	*/

	import { pick } from "../utils/core.js";

	/**
	* Class that holds a configuration for a generation task.
	*/
	export class GenerationConfig {
	// Parameters that control the length of the output
	/**
	* The maximum length the generated tokens can have.
	* Corresponds to the length of the input prompt + `max_new_tokens`.
	* Its effect is overridden by `max_new_tokens`, if also set.
	* @type {number}
	* @default 20
	*/
	max_length = 20;

	/**
	* The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
	* @type {number}
	* @default null
	*/
	max_new_tokens = null;

	/**
	* The minimum length of the sequence to be generated.
	* Corresponds to the length of the input prompt + `min_new_tokens`.
	* Its effect is overridden by `min_new_tokens`, if also set.
	* @type {number}
	* @default 0
	*/
	min_length = 0;

	/**
	* The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt.
	* @type {number}
	* @default null
	*/
	min_new_tokens = null;

	/**
	* Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values:
	* - `true`, where the generation stops as soon as there are `num_beams` complete candidates;
	* - `false`, where an heuristic is applied and the generation stops when is it very unlikely to find better candidates;
	* - `"never"`, where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm).
	* @type {boolean\|"never"}
	* @default false
	*/
	early_stopping = false;

	/**
	* The maximum amount of time you allow the computation to run for in seconds.
	* Generation will still finish the current pass after allocated time has been passed.
	* @type {number}
	* @default null
	*/
	max_time = null;

	// Parameters that control the generation strategy used
	/**
	* Whether or not to use sampling; use greedy decoding otherwise.
	* @type {boolean}
	* @default false
	*/
	do_sample = false;

	/**
	* Number of beams for beam search. 1 means no beam search.
	* @type {number}
	* @default 1
	*/
	num_beams = 1;

	/**
	* Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
	* See [this paper](https://huggingface.co/papers/1610.02424) for more details.
	* @type {number}
	* @default 1
	*/
	num_beam_groups = 1;

	/**
	* The values balance the model confidence and the degeneration penalty in contrastive search decoding.
	* @type {number}
	* @default null
	*/
	penalty_alpha = null;

	/**
	* Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding.
	* @type {boolean}
	* @default true
	*/
	use_cache = true;

	// Parameters for manipulation of the model output logits
	/**
	* The value used to modulate the next token probabilities.
	* @type {number}
	* @default 1.0
	*/
	temperature = 1.0;

	/**
	* The number of highest probability vocabulary tokens to keep for top-k-filtering.
	* @type {number}
	* @default 50
	*/
	top_k = 50;

	/**
	* If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation.
	* @type {number}
	* @default 1.0
	*/
	top_p = 1.0;

	/**
	* Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated.
	* If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to `typical_p` or higher are kept for generation.
	* See [this paper](https://huggingface.co/papers/2202.00666) for more details.
	* @type {number}
	* @default 1.0
	*/
	typical_p = 1.0;

	/**
	* If set to float strictly between 0 and 1, only tokens with a conditional probability greater than `epsilon_cutoff` will be sampled.
	* In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model.
	* See [Truncation Sampling as Language Model Desmoothing](https://huggingface.co/papers/2210.15191) for more details.
	* @type {number}
	* @default 0.0
	*/
	epsilon_cutoff = 0.0;

	/**
	* Eta sampling is a hybrid of locally typical sampling and epsilon sampling.
	* If set to float strictly between 0 and 1, a token is only considered if it is greater than either `eta_cutoff` or `sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits)))`.
	* The latter term is intuitively the expected next token probability, scaled by `sqrt(eta_cutoff)`. In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
	* See [Truncation Sampling as Language Model Desmoothing](https://huggingface.co/papers/2210.15191) for more details.
	* @type {number}
	* @default 0.0
	*/
	eta_cutoff = 0.0;

	/**
	* This value is subtracted from a beam's score if it generates a token same as any beam from other group at a particular time.
	* Note that `diversity_penalty` is only effective if `group beam search` is enabled.
	* @type {number}
	* @default 0.0
	*/
	diversity_penalty = 0.0;

	/**
	* The parameter for repetition penalty. 1.0 means no penalty.
	* See [this paper](https://huggingface.co/papers/1909.05858) for more details.
	* @type {number}
	* @default 1.0
	*/
	repetition_penalty = 1.0;

	/**
	* The paramater for encoder_repetition_penalty.
	* An exponential penalty on sequences that are not in the original input.
	* 1.0 means no penalty.
	* @type {number}
	* @default 1.0
	*/
	encoder_repetition_penalty = 1.0;

	/**
	* Exponential penalty to the length that is used with beam-based generation.
	* It is applied as an exponent to the sequence length, which in turn is used to divide the score of the sequence.
	* Since the score is the log likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while `length_penalty` < 0.0 encourages shorter sequences.
	* @type {number}
	* @default 1.0
	*/
	length_penalty = 1.0;

	/**
	* If set to int > 0, all ngrams of that size can only occur once.
	* @type {number}
	* @default 0
	*/
	no_repeat_ngram_size = 0;

	/**
	* List of token ids that are not allowed to be generated.
	* In order to get the token ids of the words that should not appear in the generated text, use
	* `tokenizer(bad_words, { add_prefix_space: true, add_special_tokens: false }).input_ids`.
	* @type {number[][]}
	* @default null
	*/
	bad_words_ids = null;

	/**
	* List of token ids that must be generated.
	* If given a `number[][]`, this is treated as a simple list of words that must be included, the opposite to `bad_words_ids`.
	* If given `number[][][]`, this triggers a [disjunctive constraint](https://github.com/huggingface/transformers/issues/14081), where one can allow different forms of each word.
	* @type {number[][]\|number[][][]}
	* @default null
	*/
	force_words_ids = null;

	/**
	* Whether to renormalize the logits after applying all the logits processors or warpers (including the custom ones).
	* It's highly recommended to set this flag to `true` as the search algorithms suppose the score logits are normalized but some logit processors or warpers break the normalization.
	* @type {boolean}
	* @default false
	*/
	renormalize_logits = false;

	/**
	* Custom constraints that can be added to the generation to ensure that the output will contain the use of certain tokens as defined by `Constraint` objects, in the most sensible way possible.
	* @type {Object[]}
	* @default null
	*/
	constraints = null;

	/**
	* The id of the token to force as the first generated token after the `decoder_start_token_id`.
	* Useful for multilingual models like mBART where the first generated token needs to be the target language token.
	* @type {number}
	* @default null
	*/
	forced_bos_token_id = null;

	/**
	* The id of the token to force as the last generated token when `max_length` is reached.
	* Optionally, use a list to set multiple end-of-sequence tokens.
	* @type {number\|number[]}
	* @default null
	*/
	forced_eos_token_id = null;

	/**
	* Whether to remove possible nan and inf outputs of the model to prevent the generation method to crash. Note that using `remove_invalid_values` can slow down generation.
	* @type {boolean}
	*/
	remove_invalid_values = false;

	/**
	* This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been generated.
	* The tuple shall consist of: `(start_index, decay_factor)` where `start_index` indicates where penalty starts and `decay_factor` represents the factor of exponential decay.
	* @type {[number, number]}
	* @default null
	*/
	exponential_decay_length_penalty = null;

	/**
	* A list of tokens that will be suppressed at generation.
	* The `SuppressTokens` logit processor will set their log probs to `-inf` so that they are not sampled.
	* @type {number[]}
	* @default null
	*/
	suppress_tokens = null;

	/**
	* A streamer that will be used to stream the generation.
	* @type {import('./streamers.js').TextStreamer}
	* @default null
	*/
	streamer = null;

	/**
	* A list of tokens that will be suppressed at the beginning of the generation.
	* The `SuppressBeginTokens` logit processor will set their log probs to `-inf` so that they are not sampled.
	* @type {number[]}
	* @default null
	*/
	begin_suppress_tokens = null;

	/**
	* A list of pairs of integers which indicates a mapping from generation indices to token indices that will be forced before sampling.
	* For example, `[[1, 123]]` means the second generated token will always be a token of index 123.
	* @type {[number, number][]}
	* @default null
	*/
	forced_decoder_ids = null;

	/**
	* The guidance scale for classifier free guidance (CFG). CFG is enabled by setting `guidance_scale > 1`.
	* Higher guidance scale encourages the model to generate samples that are more closely linked to the input
	* prompt, usually at the expense of poorer quality.
	* @type {number}
	* @default null
	*/
	guidance_scale = null;

	// Parameters that define the output variables of `generate`
	/**
	* The number of independently computed returned sequences for each element in the batch.
	* @type {number}
	* @default 1
	*/
	num_return_sequences = 1;

	/**
	* Whether or not to return the attentions tensors of all attention layers.
	* See `attentions` under returned tensors for more details.
	* @type {boolean}
	* @default false
	*/
	output_attentions = false;

	/**
	* Whether or not to return the hidden states of all layers.
	* See `hidden_states` under returned tensors for more details.
	* @type {boolean}
	* @default false
	*/
	output_hidden_states = false;

	/**
	* Whether or not to return the prediction scores.
	* See `scores` under returned tensors for more details.
	* @type {boolean}
	* @default false
	*/
	output_scores = false;

	/**
	* Whether or not to return a `ModelOutput` instead of a plain tuple.
	* @type {boolean}
	* @default false
	*/
	return_dict_in_generate = false;

	// Special tokens that can be used at generation time
	/**
	* The id of the padding token.
	* @type {number}
	* @default null
	*/
	pad_token_id = null;

	/**
	* The id of the beginning-of-sequence token.
	* @type {number}
	* @default null
	*/
	bos_token_id = null;

	/**
	* The id of the end-of-sequence token.
	* Optionally, use a list to set multiple end-of-sequence tokens.
	* @type {number\|number[]}
	* @default null
	*/
	eos_token_id = null;

	// Generation parameters exclusive to encoder-decoder models
	/**
	* If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the `decoder_input_ids`.
	* @type {number}
	* @default 0
	*/
	encoder_no_repeat_ngram_size = 0;

	/**
	* If an encoder-decoder model starts decoding with a different token than bos, the id of that token.
	* @type {number}
	* @default null
	*/
	decoder_start_token_id = null;

	// Wild card
	/**
	* Additional generation kwargs will be forwarded to the `generate` function of the model.
	* Kwargs that are not present in `generate`'s signature will be used in the model forward pass.
	* @type {Object}
	* @default {}
	*/
	generation_kwargs = {};

	/**
	*
	* @param {GenerationConfig\|import('../configs.js').PretrainedConfig} config
	*/
	constructor(config) {
	Object.assign(this, pick(config, Object.getOwnPropertyNames(this)));
	}
	}