tensorlink-dev
/

test-003

Safetensors

transformer

Model card Files Files and versions

xet

Community

tensorlink-dev commited on May 24, 2025

Commit

ab5fe31

verified ·

1 Parent(s): d84db49

Upload initial model version

Browse files

Files changed (2) hide show

config.json +767 -0
model.safetensors +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,767 @@

+{
+  "return_dict": true,
+  "torchscript": false,
+  "torch_dtype": null,
+  "use_bfloat16": false,
+  "tf_legacy_loss": false,
+  "pruned_heads": {},
+  "tie_word_embeddings": true,
+  "chunk_size_feed_forward": 0,
+  "is_encoder_decoder": false,
+  "is_decoder": true,
+  "cross_attention_hidden_size": null,
+  "add_cross_attention": false,
+  "tie_encoder_decoder": false,
+  "max_length": 20,
+  "min_length": 0,
+  "do_sample": false,
+  "early_stopping": false,
+  "num_beams": 1,
+  "num_beam_groups": 1,
+  "diversity_penalty": 0.0,
+  "temperature": 1.0,
+  "top_k": 50,
+  "top_p": 1.0,
+  "typical_p": 1.0,
+  "repetition_penalty": 1.0,
+  "length_penalty": 1.0,
+  "no_repeat_ngram_size": 0,
+  "encoder_no_repeat_ngram_size": 0,
+  "bad_words_ids": null,
+  "num_return_sequences": 1,
+  "output_scores": false,
+  "return_dict_in_generate": false,
+  "forced_bos_token_id": null,
+  "forced_eos_token_id": null,
+  "remove_invalid_values": false,
+  "exponential_decay_length_penalty": null,
+  "suppress_tokens": null,
+  "begin_suppress_tokens": null,
+  "architectures": null,
+  "finetuning_task": null,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1"
+  },
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1
+  },
+  "tokenizer_class": null,
+  "prefix": null,
+  "bos_token_id": null,
+  "pad_token_id": null,
+  "eos_token_id": null,
+  "sep_token_id": null,
+  "task_specific_params": null,
+  "problem_type": null,
+  "_name_or_path": "",
+  "_attn_implementation_autoset": false,
+  "transformers_version": "4.51.3",
+  "target_dim": 1,
+  "static_dim": 0,
+  "dynamic_dim": 0,
+  "past_dynamic_dim": 0,
+  "static_cardinalities": null,
+  "dynamic_cardinalities": null,
+  "past_dynamic_cardinalities": null,
+  "static_embedding_dim": null,
+  "dynamic_embedding_dim": null,
+  "past_dynamic_embedding_dim": null,
+  "time_features": null,
+  "scaling": true,
+  "decoder_start_token_value": 0.0,
+  "feature_size": 1,
+  "context_length": 1024,
+  "prediction_length": 256,
+  "quantiles": [
+    0.005,
+    0.015,
+    0.025,
+    0.034999999999999996,
+    0.045,
+    0.055,
+    0.065,
+    0.07500000000000001,
+    0.085,
+    0.095,
+    0.10500000000000001,
+    0.115,
+    0.125,
+    0.135,
+    0.14500000000000002,
+    0.155,
+    0.165,
+    0.17500000000000002,
+    0.185,
+    0.195,
+    0.20500000000000002,
+    0.215,
+    0.225,
+    0.23500000000000001,
+    0.245,
+    0.255,
+    0.265,
+    0.275,
+    0.28500000000000003,
+    0.295,
+    0.305,
+    0.315,
+    0.325,
+    0.335,
+    0.34500000000000003,
+    0.35500000000000004,
+    0.365,
+    0.375,
+    0.385,
+    0.395,
+    0.405,
+    0.41500000000000004,
+    0.425,
+    0.435,
+    0.445,
+    0.455,
+    0.465,
+    0.47500000000000003,
+    0.485,
+    0.495,
+    0.505,
+    0.515,
+    0.525,
+    0.535,
+    0.545,
+    0.555,
+    0.5650000000000001,
+    0.5750000000000001,
+    0.585,
+    0.595,
+    0.605,
+    0.615,
+    0.625,
+    0.635,
+    0.645,
+    0.655,
+    0.665,
+    0.675,
+    0.685,
+    0.6950000000000001,
+    0.7050000000000001,
+    0.715,
+    0.725,
+    0.735,
+    0.745,
+    0.755,
+    0.765,
+    0.775,
+    0.785,
+    0.795,
+    0.805,
+    0.8150000000000001,
+    0.8250000000000001,
+    0.8350000000000001,
+    0.845,
+    0.855,
+    0.865,
+    0.875,
+    0.885,
+    0.895,
+    0.905,
+    0.915,
+    0.925,
+    0.935,
+    0.9450000000000001,
+    0.9550000000000001,
+    0.965,
+    0.975,
+    0.985,
+    0.995
+  ],
+  "output_token_lengths": 1,
+  "loss_type": "quantile",
+  "use_dynamic_features": false,
+  "use_static_features": false,
+  "autoregressive": true,
+  "gradient_checkpointing": true,
+  "model_type": "transformer",
+  "d_model": 768,
+  "hidden_dropout_prob": 0.1,
+  "max_position_embeddings": 4096,
+  "architecture": {
+    "layout": "decoder",
+    "num_encoder_layers": 0,
+    "num_decoder_layers": 16,
+    "share_weights": false
+  },
+  "value_embedding_config": {
+    "type": "value",
+    "dropout": 0.1,
+    "embedding_dim": null,
+    "kwargs": {
+      "feature_size": 1,
+      "d_model": 768
+    }
+  },
+  "positional_embedding_config": {
+    "type": "stacked_embedding",
+    "dropout": 0.1,
+    "embedding_dim": null,
+    "kwargs": {
+      "embedding_configs": [
+        {
+          "type": "fourier",
+          "args": {
+            "feature_size": 64
+          }
+        }
+      ],
+      "max_seq_len": 4096
+    }
+  },
+  "encoder_blocks": null,
+  "decoder_blocks": [
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    },
+    {
+      "block_type": "default_decoder",
+      "attention_config": {
+        "attention_type": "full",
+        "num_heads": 12,
+        "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
+        "kwargs": {}
+      },
+      "cross_attention_config": null,
+      "ffn_config": {
+        "type": "standard",
+        "intermediate_size": 3072,
+        "activation": "gelu",
+        "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
+        "kwargs": {}
+      },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
+        "kwargs": {}
+      },
+      "kwargs": {}
+    }
+  ],
+  "output_head_config": {
+    "type": "distpred",
+    "output_size": 100,
+    "kwargs": {
+      "num_outputs": 100,
+      "feature_size": 1
+    }
+  },
+  "norm_config": {
+    "norm_type": "layer",
+    "eps": 1e-05,
+    "kwargs": {}
+  },
+  "head_agg_config": {
+    "type": "mean",
+    "kwargs": {}
+  },
+  "loss_config": {
+    "type": "crps",
+    "kwargs": {
+      "scaling_type": "minmax",
+      "scaling_dim": 1,
+      "scaling_eps": 1e-08
+    }
+  },
+  "output_attentions": false,
+  "output_hidden_states": false,
+  "use_teacher_forcing": true,
+  "quantizer_config": null,
+  "vocab_size": null,
+  "decoder_start_token_id": null,
+  "num_quantiles": 100
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:384311e5de374e80bd3fc7e573d70842b23a77ca4da4620d7dfa2730f64200df
+size 454072768