localtest-002 / config.json
tensorlink-dev's picture
Save model using custom save_hf
514c2f4 verified
{
"use_cache": true,
"aux_loss_weight": 0.01,
"return_dict": true,
"torchscript": false,
"torch_dtype": null,
"use_bfloat16": false,
"tf_legacy_loss": false,
"pruned_heads": {},
"tie_word_embeddings": true,
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"is_decoder": true,
"cross_attention_hidden_size": null,
"add_cross_attention": false,
"tie_encoder_decoder": false,
"max_length": 20,
"min_length": 0,
"do_sample": false,
"early_stopping": false,
"num_beams": 1,
"num_beam_groups": 1,
"diversity_penalty": 0.0,
"temperature": 1.0,
"top_k": 50,
"top_p": 1.0,
"typical_p": 1.0,
"repetition_penalty": 1.0,
"length_penalty": 1.0,
"no_repeat_ngram_size": 0,
"encoder_no_repeat_ngram_size": 0,
"bad_words_ids": null,
"num_return_sequences": 1,
"output_scores": false,
"return_dict_in_generate": false,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"remove_invalid_values": false,
"exponential_decay_length_penalty": null,
"suppress_tokens": null,
"begin_suppress_tokens": null,
"architectures": null,
"finetuning_task": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"tokenizer_class": null,
"prefix": null,
"bos_token_id": null,
"pad_token_id": null,
"eos_token_id": null,
"sep_token_id": null,
"task_specific_params": null,
"problem_type": null,
"_name_or_path": "",
"transformers_version": "4.53.0",
"static_dim": 0,
"dynamic_dim": 0,
"past_dynamic_dim": 0,
"static_cardinalities": null,
"dynamic_cardinalities": null,
"past_dynamic_cardinalities": null,
"static_embedding_dim": null,
"dynamic_embedding_dim": null,
"past_dynamic_embedding_dim": null,
"time_features": null,
"scaling": true,
"decoder_start_token_value": 0.0,
"feature_size": 1,
"context_length": 1024,
"prediction_length": 256,
"quantiles": [
0.005,
0.015,
0.025,
0.034999999999999996,
0.045,
0.055,
0.065,
0.07500000000000001,
0.085,
0.095,
0.10500000000000001,
0.115,
0.125,
0.135,
0.14500000000000002,
0.155,
0.165,
0.17500000000000002,
0.185,
0.195,
0.20500000000000002,
0.215,
0.225,
0.23500000000000001,
0.245,
0.255,
0.265,
0.275,
0.28500000000000003,
0.295,
0.305,
0.315,
0.325,
0.335,
0.34500000000000003,
0.35500000000000004,
0.365,
0.375,
0.385,
0.395,
0.405,
0.41500000000000004,
0.425,
0.435,
0.445,
0.455,
0.465,
0.47500000000000003,
0.485,
0.495,
0.505,
0.515,
0.525,
0.535,
0.545,
0.555,
0.5650000000000001,
0.5750000000000001,
0.585,
0.595,
0.605,
0.615,
0.625,
0.635,
0.645,
0.655,
0.665,
0.675,
0.685,
0.6950000000000001,
0.7050000000000001,
0.715,
0.725,
0.735,
0.745,
0.755,
0.765,
0.775,
0.785,
0.795,
0.805,
0.8150000000000001,
0.8250000000000001,
0.8350000000000001,
0.845,
0.855,
0.865,
0.875,
0.885,
0.895,
0.905,
0.915,
0.925,
0.935,
0.9450000000000001,
0.9550000000000001,
0.965,
0.975,
0.985,
0.995
],
"output_token_lengths": 1,
"loss_type": "quantile",
"use_dynamic_features": false,
"use_static_features": false,
"autoregressive": true,
"gradient_checkpointing": true,
"model_type": "transformer",
"d_model": 32,
"hidden_dropout_prob": 0.1,
"max_position_embeddings": 4096,
"architecture": {
"layout": "decoder",
"num_encoder_layers": 0,
"num_decoder_layers": 1,
"share_weights": false
},
"value_embedding_config": {
"type": "value",
"dropout": 0.1,
"embedding_dim": null,
"kwargs": {
"feature_size": 1,
"d_model": 32,
"use_layer_norm": true
}
},
"positional_embedding_config": {
"type": "stacked_embedding",
"dropout": 0.1,
"embedding_dim": null,
"kwargs": {
"embedding_configs": [],
"max_seq_len": 4096
}
},
"encoder_blocks": null,
"decoder_blocks": [
{
"block_type": "default_decoder",
"attention_config": {
"attention_type": "full",
"num_heads": 2,
"dropout": 0.1,
"bias": true,
"use_rope": true,
"use_alibi": false,
"rope_base": 10000,
"kwargs": {}
},
"cross_attention_config": null,
"ffn_config": {
"type": "standard",
"intermediate_size": 128,
"activation": "gelu",
"dropout": 0.1,
"bias": true,
"num_experts": null,
"top_k": null,
"expert_intermediate_size": null,
"load_balancing_coef": 0.01,
"kwargs": {}
},
"norm_config": {
"norm_type": "layer",
"eps": 1e-05,
"kwargs": {}
},
"kwargs": {}
}
],
"output_head_config": {
"type": "distpred",
"output_size": 100,
"kwargs": {
"num_outputs": 100,
"feature_size": 1
}
},
"norm_config": {
"norm_type": "layer",
"eps": 1e-05,
"kwargs": {}
},
"head_agg_config": {
"type": "mean",
"kwargs": {}
},
"loss_config": {
"type": "crps",
"kwargs": {
"reduction": "mean",
"estimator": "pwm",
"spread_lambda": 0,
"spread_penalty_type": "symmetric_log",
"spread_penalty_epsilon": 0,
"scaling_type": "none",
"spread_target_spread": 0
}
},
"output_attentions": false,
"output_hidden_states": false,
"use_teacher_forcing": true,
"quantizer_config": null,
"vocab_size": null,
"decoder_start_token_id": null,
"num_quantiles": 100
}