GritLs commited on
Commit
73174e4
·
1 Parent(s): 4cc03a4

init commit

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. README.md +54 -0
  3. config.json +84 -0
  4. model.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.safetensor filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,57 @@
1
  ---
2
  license: apache-2.0
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ tags:
4
+ - time-series
5
+ - forecasting
6
+ - foundation-model
7
+ - zero-shot
8
  ---
9
+
10
+ # Kairos-10M: Adaptive Time Series Foundation Model
11
+
12
+ ## Model Description
13
+
14
+ **Kairos-10M** is a 10-million parameter time series foundation model designed for zero-shot forecasting across diverse domains. It features adaptive tokenization and instance-specific positional encodings to handle heterogeneous time series data with varying information density.
15
+
16
+ ## Key Features
17
+
18
+ - 🔀 **Mixture-of-Size Dynamic Patching (MoS-DP)**: Adaptively selects tokenization granularity based on local information density
19
+ - 🔄 **Instance-adaptive Rotary Position Embedding (IARoPE)**: Tailors positional encodings to unique temporal characteristics of each series
20
+ - 📊 **Zero-shot Forecasting**: Strong generalization across domains without fine-tuning
21
+ - ⚡ **Efficient**: Superior performance with fewer parameters
22
+
23
+ ## Model Specifications
24
+
25
+ - **Parameters**: ~10 million
26
+ - **Training Data**: PreSTS corpus (300+ billion time points)
27
+ - **Architecture**: Transformer-based with adaptive components
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from kairos import KairosModel
33
+
34
+ # Load model
35
+ model = KairosModel.from_pretrained("Kairos_10m")
36
+
37
+ # Zero-shot forecasting
38
+ forecasts = model.predict(historical_data, prediction_length=96)
39
+ ```
40
+
41
+ For detailed usage examples, please refer to the [main repository](https://github.com/foundation-model-research/Kairos).
42
+
43
+ ## Citation
44
+
45
+ If you use this model, please cite:
46
+
47
+ ```bibtex
48
+ @article{kairos2025,
49
+ title={Kairos: Towards Adaptive and Generalizable Time Series Foundation Models},
50
+ author={Kun Feng and Shaocheng Lan and Yuchen Fang and Wenchao He and Lintao Ma and Xingyu Lu and Kan Ren},
51
+ year={2025}
52
+ }
53
+ ```
54
+
55
+ ## License
56
+
57
+ Apache License 2.0
config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "KairosModel"
4
+ ],
5
+ "classifier_dropout": 0,
6
+ "context_length": 2048,
7
+ "cross_attention_pe_flip": false,
8
+ "d_ff": 1024,
9
+ "d_kv": 64,
10
+ "d_model": 256,
11
+ "data_driven": true,
12
+ "decoder_start_token_id": 0,
13
+ "dense_act_fn": "relu",
14
+ "diff_decoder_token_id": false,
15
+ "dropout_rate": 0.1,
16
+ "dtype": "float32",
17
+ "eos_token_id": 1,
18
+ "feed_forward_proj": "relu",
19
+ "finetune": false,
20
+ "initializer_factor": 0.05,
21
+ "input_patch_size": 128,
22
+ "input_patch_stride": 128,
23
+ "instance_rope_input_feature_dim": 128,
24
+ "is_cross_attention_pe": true,
25
+ "is_encoder_decoder": true,
26
+ "is_gated_act": false,
27
+ "layer_norm_epsilon": 1e-06,
28
+ "levels": 3,
29
+ "loss_weight_scheme": "log_decay",
30
+ "max_period": "original_rope_init",
31
+ "min_period": "original_rope_init",
32
+ "model_type": "kairos",
33
+ "moe_inter_dim": 1408,
34
+ "multi_pred_head": false,
35
+ "n_activated_experts": 3,
36
+ "n_expert_groups": 1,
37
+ "n_limited_groups": 1,
38
+ "n_null_experts": 2,
39
+ "n_positions": 512,
40
+ "num_decoder_layers": 4,
41
+ "num_decoder_segments": 2,
42
+ "num_heads": 4,
43
+ "num_layers": 4,
44
+ "pad_token_id": 0,
45
+ "position_embedding_type": "instance_wise_rope",
46
+ "prediction_length": 64,
47
+ "pretrained_model_path": "",
48
+ "quantiles": [
49
+ 0.1,
50
+ 0.2,
51
+ 0.3,
52
+ 0.4,
53
+ 0.5,
54
+ 0.6,
55
+ 0.7,
56
+ 0.8,
57
+ 0.9
58
+ ],
59
+ "reg_token_id": 1,
60
+ "relative_attention_max_distance": 128,
61
+ "relative_attention_num_buckets": 32,
62
+ "rope_init": "exp",
63
+ "router_gumbel_softmax": false,
64
+ "scale_method": "log",
65
+ "score_func": "softmax",
66
+ "seq_balance_factor": 0.0001,
67
+ "target_dist": [
68
+ 0.05,
69
+ 0.1,
70
+ 0.55,
71
+ 0.15,
72
+ 0.15
73
+ ],
74
+ "threshold": 0.6,
75
+ "transformers_version": "4.56.1",
76
+ "update_bias_rate": 0.01,
77
+ "use_bias": true,
78
+ "use_cache": true,
79
+ "use_reg_token": true,
80
+ "use_top1_bias": true,
81
+ "use_topk": true,
82
+ "vocab_size": 2,
83
+ "weights_norm": true
84
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386636f9fcd8de9f09cacc0e55d922fc46dc496fd3fa7d3550169aa4d8517440
3
+ size 39806164