{ "architectures": [ "TimeSeriesTransformerForPrediction" ], "context_length": 168, "d_model": 64, "decoder_attention_heads": 4, "decoder_ffn_dim": 256, "decoder_layers": 3, "distribution_output": "student_t", "dropout": 0.1, "encoder_attention_heads": 4, "encoder_ffn_dim": 256, "encoder_layers": 3, "feature_size": 14, "input_size": 1, "lags_sequence": [1, 2, 3, 4, 5, 6, 7, 24, 168], "loss": "negative_log_likelihood", "model_type": "time_series_transformer", "num_parallel_samples": 100, "prediction_length": 24, "scaling": "mean", "torch_dtype": "float32", "transformers_version": "4.36.0" }