updated config and weights

#3
by kashif HF Staff - opened
Files changed (3) hide show
  1. README.md +3 -4
  2. config.json +3 -39
  3. model.safetensors +2 -2
README.md CHANGED
@@ -5,7 +5,7 @@ pipeline_tag: time-series-forecasting
5
  tags:
6
  - transformers
7
  - timesfm
8
- - timesfm_2p5
9
  - time-series-forecasting
10
  - arxiv:2310.10688
11
  ---
@@ -16,13 +16,12 @@ TimesFM (Time Series Foundation Model) is a pretrained decoder-only model for ti
16
 
17
  **Resources and Technical Documentation**:
18
  * Original model: [google/timesfm-2.5-200m-pytorch](https://huggingface.co/google/timesfm-2.5-200m-pytorch)
19
- * Transformers model: [google/timesfm-2.5-200m-transformers](https://huggingface.co/google/timesfm-2.5-200m-transformers)
20
  * Paper: [A decoder-only foundation model for time-series forecasting](https://huggingface.co/papers/2310.10688)
21
  * Transformers docs: [TimesFM 2.5](https://huggingface.co/docs/transformers/main/en/model_doc/timesfm_2p5)
22
 
23
  ## Model description
24
 
25
- This model is converted from the official TimesFM 2.5 PyTorch checkpoint and integrated into `transformers` as `Timesfm2P5ModelForPrediction`.
26
 
27
  The converted checkpoint preserves the original architecture and forecasting behavior, including:
28
  * patch-based inputs for time-series contexts
@@ -35,7 +34,7 @@ The converted checkpoint preserves the original architecture and forecasting beh
35
  import torch
36
  from transformers import Timesfm2P5ModelForPrediction
37
 
38
- model = Timesfm2P5ModelForPrediction.from_pretrained("google/timesfm-2.5-200m-transformers", attn_implementation="sdpa")
39
  model = model.to(torch.float32).eval()
40
 
41
  past_values = [
 
5
  tags:
6
  - transformers
7
  - timesfm
8
+ - timesfm2_5
9
  - time-series-forecasting
10
  - arxiv:2310.10688
11
  ---
 
16
 
17
  **Resources and Technical Documentation**:
18
  * Original model: [google/timesfm-2.5-200m-pytorch](https://huggingface.co/google/timesfm-2.5-200m-pytorch)
 
19
  * Paper: [A decoder-only foundation model for time-series forecasting](https://huggingface.co/papers/2310.10688)
20
  * Transformers docs: [TimesFM 2.5](https://huggingface.co/docs/transformers/main/en/model_doc/timesfm_2p5)
21
 
22
  ## Model description
23
 
24
+ This model is converted from the official TimesFM 2.5 PyTorch checkpoint and integrated into `transformers` as `TimesFm2_5ModelForPrediction`.
25
 
26
  The converted checkpoint preserves the original architecture and forecasting behavior, including:
27
  * patch-based inputs for time-series contexts
 
34
  import torch
35
  from transformers import Timesfm2P5ModelForPrediction
36
 
37
+ model = TimesFm2_5ModelForPrediction.from_pretrained("google/timesfm-2.5-200m-transformers")
38
  model = model.to(torch.float32).eval()
39
 
40
  past_values = [
config.json CHANGED
@@ -1,54 +1,26 @@
1
  {
2
  "activation": "swish",
3
  "architectures": [
4
- "Timesfm2P5ModelForPrediction"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
- "attn_logit_softcapping": null,
9
  "context_length": 16384,
10
  "decode_index": 5,
11
  "dtype": "float32",
12
  "force_flip_invariance": true,
13
- "freq_size": 10,
14
  "head_dim": 80,
15
  "hidden_size": 1280,
16
  "horizon_length": 128,
17
  "infer_is_positive": true,
18
  "initializer_range": 0.02,
19
  "intermediate_size": 1280,
20
- "layer_types": [
21
- "attention",
22
- "attention",
23
- "attention",
24
- "attention",
25
- "attention",
26
- "attention",
27
- "attention",
28
- "attention",
29
- "attention",
30
- "attention",
31
- "attention",
32
- "attention",
33
- "attention",
34
- "attention",
35
- "attention",
36
- "attention",
37
- "attention",
38
- "attention",
39
- "attention",
40
- "attention"
41
- ],
42
  "max_position_embeddings": 16384,
43
- "max_timescale": 10000.0,
44
- "min_timescale": 1.0,
45
- "model_type": "timesfm_2p5",
46
- "normalize_inputs": true,
47
  "num_attention_heads": 16,
48
  "num_hidden_layers": 20,
49
  "num_key_value_heads": 16,
50
  "output_quantile_len": 1024,
51
- "pad_val": -1000000000.0,
52
  "patch_length": 32,
53
  "quantiles": [
54
  0.1,
@@ -61,20 +33,12 @@
61
  0.8,
62
  0.9
63
  ],
64
- "query_pre_attn_scalar": 256.0,
65
  "rms_norm_eps": 1e-06,
66
  "rope_parameters": {
67
  "rope_theta": 10000.0,
68
  "rope_type": "default"
69
  },
70
- "rope_theta": 10000.0,
71
- "sliding_window": null,
72
- "tolerance": 1e-05,
73
  "transformers_version": "5.3.0.dev0",
74
  "use_bias": false,
75
- "use_continuous_quantile_head": true,
76
- "use_per_dim_scale": true,
77
- "use_positional_embedding": false,
78
- "use_qk_norm": true,
79
- "use_rotary_embeddings": true
80
  }
 
1
  {
2
  "activation": "swish",
3
  "architectures": [
4
+ "TimesFm2_5ModelForPrediction"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
 
8
  "context_length": 16384,
9
  "decode_index": 5,
10
  "dtype": "float32",
11
  "force_flip_invariance": true,
 
12
  "head_dim": 80,
13
  "hidden_size": 1280,
14
  "horizon_length": 128,
15
  "infer_is_positive": true,
16
  "initializer_range": 0.02,
17
  "intermediate_size": 1280,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  "max_position_embeddings": 16384,
19
+ "model_type": "timesfm2_5",
 
 
 
20
  "num_attention_heads": 16,
21
  "num_hidden_layers": 20,
22
  "num_key_value_heads": 16,
23
  "output_quantile_len": 1024,
 
24
  "patch_length": 32,
25
  "quantiles": [
26
  0.1,
 
33
  0.8,
34
  0.9
35
  ],
 
36
  "rms_norm_eps": 1e-06,
37
  "rope_parameters": {
38
  "rope_theta": 10000.0,
39
  "rope_type": "default"
40
  },
 
 
 
41
  "transformers_version": "5.3.0.dev0",
42
  "use_bias": false,
43
+ "use_continuous_quantile_head": true
 
 
 
 
44
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3ecaed5ac5d6a4ab3681fde3647fa6b5e47614f168b25ca2313d3bd7500c3fd
3
- size 944863880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b53f6d52114e2ad786890f3c4637ce05f580b7800d6e24401f88b398b76035ef
3
+ size 925187448