tensorlink-dev
/

test-001

Safetensors

transformer

Model card Files Files and versions

xet

Community

tensorlink-dev commited on May 30, 2025

Commit

71dbd15

verified ·

1 Parent(s): f021c6e

Upload initial model version

Browse files

Files changed (2) hide show

config.json +251 -164
model.safetensors +2 -2

config.json CHANGED Viewed

@@ -1,35 +1,205 @@
 {
   "architecture": {
-    "layout": "encoder-decoder",
-    "num_encoder_layers": 2,
     "num_decoder_layers": 2,
     "share_weights": false
   },
-  "attention_blocks": {
-    "encoder_attention": {
-      "attention_type": "full",
-      "num_heads": 4,
-      "dropout": 0.1,
-      "kwargs": {}
-    },
-    "decoder_attention": {
-      "attention_type": "full",
-      "num_heads": 4,
-      "dropout": 0.1,
-      "kwargs": {}
-    },
-    "decoder_cross_attention": {
-      "attention_type": "full",
-      "num_heads": 4,
-      "dropout": 0.1,
-      "kwargs": {}
-    }
-  },
   "value_embedding_config": {
     "type": "value",
     "dropout": 0.1,
     "embedding_dim": null,
-    "kwargs": {}
   },
   "positional_embedding_config": {
     "type": "stacked_embedding",
@@ -38,111 +208,90 @@
     "kwargs": {
       "embedding_configs": [
         {
-          "type": "sinusoidal",
-          "args": {
-            "max_seq_len": 128
-          }
-        },
-        {
-          "type": "timedelta",
           "args": {
-            "hidden_dim": 16
-          }
-        },
-        {
-          "type": "learned_abs",
-          "args": {
-            "max_seq_len": 128
           }
         }
       ],
-      "max_seq_len": 4096
     }
   },
-  "feedforward_config": {
-    "type": "standard",
-    "intermediate_size": 256,
-    "activation": "gelu",
-    "dropout": 0.1,
-    "kwargs": {}
-  },
-  "output_head_config": {
-    "type": "linear",
-    "output_size": 1,
-    "kwargs": {}
-  },
-  "encoder_blocks": [
     {
-      "block_type": "default_encoder",
       "attention_config": {
         "attention_type": "full",
         "num_heads": 2,
         "dropout": 0.1,
         "kwargs": {}
       },
       "ffn_config": {
         "type": "standard",
-        "intermediate_size": 64,
         "activation": "gelu",
         "dropout": 0.1,
         "kwargs": {}
       },
-      "kwargs": {}
-    },
-    {
-      "block_type": "default_encoder",
-      "attention_config": {
-        "attention_type": "full",
-        "num_heads": 2,
-        "dropout": 0.1,
-        "kwargs": {}
-      },
-      "ffn_config": {
-        "type": "standard",
-        "intermediate_size": 64,
-        "activation": "gelu",
-        "dropout": 0.1,
         "kwargs": {}
       },
       "kwargs": {}
-    }
-  ],
-  "decoder_blocks": [
     {
       "block_type": "default_decoder",
       "attention_config": {
         "attention_type": "full",
         "num_heads": 2,
         "dropout": 0.1,
         "kwargs": {}
       },
       "ffn_config": {
         "type": "standard",
-        "intermediate_size": 64,
         "activation": "gelu",
         "dropout": 0.1,
         "kwargs": {}
       },
-      "kwargs": {}
-    },
-    {
-      "block_type": "default_decoder",
-      "attention_config": {
-        "attention_type": "full",
-        "num_heads": 2,
-        "dropout": 0.1,
-        "kwargs": {}
-      },
-      "ffn_config": {
-        "type": "standard",
-        "intermediate_size": 64,
-        "activation": "gelu",
-        "dropout": 0.1,
         "kwargs": {}
       },
       "kwargs": {}
     }
   ],
   "norm_config": {
     "norm_type": "layer",
     "eps": 1e-05,
@@ -152,84 +301,22 @@
     "type": "mean",
     "kwargs": {}
   },
-  "hidden_size": 32,
-  "num_quantiles": 3,
   "output_attentions": false,
   "output_hidden_states": false,
   "use_teacher_forcing": true,
-  "hidden_dropout_prob": 0.1,
-  "return_dict": true,
-  "torchscript": false,
-  "torch_dtype": null,
-  "use_bfloat16": false,
-  "tf_legacy_loss": false,
-  "pruned_heads": {},
-  "tie_word_embeddings": true,
-  "chunk_size_feed_forward": 0,
-  "is_encoder_decoder": false,
-  "is_decoder": false,
-  "cross_attention_hidden_size": null,
-  "add_cross_attention": false,
-  "tie_encoder_decoder": false,
-  "max_length": 20,
-  "min_length": 0,
-  "do_sample": false,
-  "early_stopping": false,
-  "num_beams": 1,
-  "num_beam_groups": 1,
-  "diversity_penalty": 0.0,
-  "temperature": 1.0,
-  "top_k": 50,
-  "top_p": 1.0,
-  "typical_p": 1.0,
-  "repetition_penalty": 1.0,
-  "length_penalty": 1.0,
-  "no_repeat_ngram_size": 0,
-  "encoder_no_repeat_ngram_size": 0,
-  "bad_words_ids": null,
-  "num_return_sequences": 1,
-  "output_scores": false,
-  "return_dict_in_generate": false,
-  "forced_bos_token_id": null,
-  "forced_eos_token_id": null,
-  "remove_invalid_values": false,
-  "exponential_decay_length_penalty": null,
-  "suppress_tokens": null,
-  "begin_suppress_tokens": null,
-  "architectures": null,
-  "finetuning_task": null,
-  "id2label": {
-    "0": "LABEL_0",
-    "1": "LABEL_1"
-  },
-  "label2id": {
-    "LABEL_0": 0,
-    "LABEL_1": 1
-  },
-  "tokenizer_class": null,
-  "prefix": null,
-  "bos_token_id": null,
-  "pad_token_id": null,
-  "eos_token_id": null,
-  "sep_token_id": null,
-  "decoder_start_token_id": 3.0,
-  "task_specific_params": null,
-  "problem_type": null,
-  "_name_or_path": "",
-  "_attn_implementation_autoset": false,
-  "transformers_version": "4.51.3",
-  "model_type": "transformer",
-  "feature_size": 1,
-  "context_length": 16,
-  "prediction_length": 4,
-  "quantiles": [
-    0.1,
-    0.5,
-    0.9
-  ],
-  "output_token_lengths": 1,
-  "loss_type": "quantile",
-  "use_dynamic_features": false,
-  "use_static_features": false,
-  "autoregressive": false
 }

 {
+  "return_dict": true,
+  "torchscript": false,
+  "torch_dtype": null,
+  "use_bfloat16": false,
+  "tf_legacy_loss": false,
+  "pruned_heads": {},
+  "tie_word_embeddings": true,
+  "chunk_size_feed_forward": 0,
+  "is_encoder_decoder": false,
+  "is_decoder": true,
+  "cross_attention_hidden_size": null,
+  "add_cross_attention": false,
+  "tie_encoder_decoder": false,
+  "max_length": 20,
+  "min_length": 0,
+  "do_sample": false,
+  "early_stopping": false,
+  "num_beams": 1,
+  "num_beam_groups": 1,
+  "diversity_penalty": 0.0,
+  "temperature": 1.0,
+  "top_k": 50,
+  "top_p": 1.0,
+  "typical_p": 1.0,
+  "repetition_penalty": 1.0,
+  "length_penalty": 1.0,
+  "no_repeat_ngram_size": 0,
+  "encoder_no_repeat_ngram_size": 0,
+  "bad_words_ids": null,
+  "num_return_sequences": 1,
+  "output_scores": false,
+  "return_dict_in_generate": false,
+  "forced_bos_token_id": null,
+  "forced_eos_token_id": null,
+  "remove_invalid_values": false,
+  "exponential_decay_length_penalty": null,
+  "suppress_tokens": null,
+  "begin_suppress_tokens": null,
+  "architectures": null,
+  "finetuning_task": null,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1"
+  },
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1
+  },
+  "tokenizer_class": null,
+  "prefix": null,
+  "bos_token_id": null,
+  "pad_token_id": null,
+  "eos_token_id": null,
+  "sep_token_id": null,
+  "task_specific_params": null,
+  "problem_type": null,
+  "_name_or_path": "",
+  "transformers_version": "4.52.2",
+  "target_dim": 1,
+  "static_dim": 0,
+  "dynamic_dim": 0,
+  "past_dynamic_dim": 0,
+  "static_cardinalities": null,
+  "dynamic_cardinalities": null,
+  "past_dynamic_cardinalities": null,
+  "static_embedding_dim": null,
+  "dynamic_embedding_dim": null,
+  "past_dynamic_embedding_dim": null,
+  "time_features": null,
+  "scaling": true,
+  "decoder_start_token_value": 0.0,
+  "feature_size": 1,
+  "context_length": 1024,
+  "prediction_length": 256,
+  "quantiles": [
+    0.005,
+    0.015,
+    0.025,
+    0.034999999999999996,
+    0.045,
+    0.055,
+    0.065,
+    0.07500000000000001,
+    0.085,
+    0.095,
+    0.10500000000000001,
+    0.115,
+    0.125,
+    0.135,
+    0.14500000000000002,
+    0.155,
+    0.165,
+    0.17500000000000002,
+    0.185,
+    0.195,
+    0.20500000000000002,
+    0.215,
+    0.225,
+    0.23500000000000001,
+    0.245,
+    0.255,
+    0.265,
+    0.275,
+    0.28500000000000003,
+    0.295,
+    0.305,
+    0.315,
+    0.325,
+    0.335,
+    0.34500000000000003,
+    0.35500000000000004,
+    0.365,
+    0.375,
+    0.385,
+    0.395,
+    0.405,
+    0.41500000000000004,
+    0.425,
+    0.435,
+    0.445,
+    0.455,
+    0.465,
+    0.47500000000000003,
+    0.485,
+    0.495,
+    0.505,
+    0.515,
+    0.525,
+    0.535,
+    0.545,
+    0.555,
+    0.5650000000000001,
+    0.5750000000000001,
+    0.585,
+    0.595,
+    0.605,
+    0.615,
+    0.625,
+    0.635,
+    0.645,
+    0.655,
+    0.665,
+    0.675,
+    0.685,
+    0.6950000000000001,
+    0.7050000000000001,
+    0.715,
+    0.725,
+    0.735,
+    0.745,
+    0.755,
+    0.765,
+    0.775,
+    0.785,
+    0.795,
+    0.805,
+    0.8150000000000001,
+    0.8250000000000001,
+    0.8350000000000001,
+    0.845,
+    0.855,
+    0.865,
+    0.875,
+    0.885,
+    0.895,
+    0.905,
+    0.915,
+    0.925,
+    0.935,
+    0.9450000000000001,
+    0.9550000000000001,
+    0.965,
+    0.975,
+    0.985,
+    0.995
+  ],
+  "output_token_lengths": 1,
+  "loss_type": "quantile",
+  "use_dynamic_features": false,
+  "use_static_features": false,
+  "autoregressive": true,
+  "gradient_checkpointing": true,
+  "model_type": "transformer",
+  "d_model": 32,
+  "hidden_dropout_prob": 0.1,
+  "max_position_embeddings": 4096,
   "architecture": {
+    "layout": "decoder",
+    "num_encoder_layers": 0,
     "num_decoder_layers": 2,
     "share_weights": false
   },
   "value_embedding_config": {
     "type": "value",
     "dropout": 0.1,
     "embedding_dim": null,
+    "kwargs": {
+      "feature_size": 1,
+      "d_model": 32,
+      "use_layer_norm": true
+    }
   },
   "positional_embedding_config": {
     "type": "stacked_embedding",
     "kwargs": {
       "embedding_configs": [
         {
+          "type": "fourier",
           "args": {
+            "feature_size": 64
           }
         }
       ],
+      "max_seq_len": 1000
     }
   },
+  "encoder_blocks": null,
+  "decoder_blocks": [
     {
+      "block_type": "default_decoder",
       "attention_config": {
         "attention_type": "full",
         "num_heads": 2,
         "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
         "kwargs": {}
       },
+      "cross_attention_config": null,
       "ffn_config": {
         "type": "standard",
+        "intermediate_size": 128,
         "activation": "gelu",
         "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
         "kwargs": {}
       },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
         "kwargs": {}
       },
       "kwargs": {}
+    },
     {
       "block_type": "default_decoder",
       "attention_config": {
         "attention_type": "full",
         "num_heads": 2,
         "dropout": 0.1,
+        "bias": true,
+        "use_rope": true,
+        "use_alibi": true,
+        "rope_base": 10000,
         "kwargs": {}
       },
+      "cross_attention_config": null,
       "ffn_config": {
         "type": "standard",
+        "intermediate_size": 128,
         "activation": "gelu",
         "dropout": 0.1,
+        "bias": true,
+        "num_experts": null,
+        "top_k": null,
+        "expert_intermediate_size": null,
+        "load_balancing_coef": 0.01,
         "kwargs": {}
       },
+      "norm_config": {
+        "norm_type": "layer",
+        "eps": 1e-05,
         "kwargs": {}
       },
       "kwargs": {}
     }
   ],
+  "output_head_config": {
+    "type": "distpred",
+    "output_size": 100,
+    "kwargs": {
+      "num_outputs": 100,
+      "feature_size": 1
+    }
+  },
   "norm_config": {
     "norm_type": "layer",
     "eps": 1e-05,
     "type": "mean",
     "kwargs": {}
   },
+  "loss_config": {
+    "type": "crps",
+    "kwargs": {
+      "reduction": "mean",
+      "estimator": "pwm",
+      "spread_lambda": 0.0,
+      "spread_penalty_type": "log",
+      "spread_penalty_epsilon": 0.001,
+      "scaling_type": "none"
+    }
+  },
   "output_attentions": false,
   "output_hidden_states": false,
   "use_teacher_forcing": true,
+  "quantizer_config": null,
+  "vocab_size": null,
+  "decoder_start_token_id": null,
+  "num_quantiles": 100
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6e48b3d3c88b34881896a31683954e5f39b707926a3c8d9aee0b74657e18a8b
-size 252476

 version https://git-lfs.github.com/spec/v1
+oid sha256:71fdb87ec51270c8919baa9db70d81b1cf6d5dd63eb5a87af162f6ba3cfd0efd
+size 123656