MarketGPT Return-token Model

This is a GPT-style causal language model trained from scratch on volatility-normalized return tokens.

Instead of directly regressing future continuous returns, the pipeline converts each asset return into a discrete bucket token:

past return tokens -> future return tokens

The model can sample multiple future return paths with constrained generation.

Data

  • Data source: siddharthmb/stocks-ohlcv when use_synthetic=false; synthetic heavy-tailed market panel when use_synthetic=true
  • Assets: AAPL, MSFT, AMZN, GOOGL, NVDA, TSLA, AMD, INTC, JPM, BAC, V, MA
  • Return: log(price_t / price_t-1)
  • Normalization: return divided by EWM volatility, span 60
  • Buckets: 101 quantile buckets over clipped normalized returns
  • Split: chronological train / validation / test
  • Leakage control: quantile bucket edges are fitted only on the train period when bucket_method=quantile

Vocabulary

PAD = 0
BOS = 1
EOS = 2
STEP = 3
asset_bucket_token = 4 + asset_id * num_buckets + bucket_id
  • vocab size: 1216
  • sequence length: 2082

Model

GPT2Config(
    vocab_size=1216,
    n_positions=2082,
    n_embd=384,
    n_layer=6,
    n_head=6,
)

Metrics

{
  "train": {
    "train_runtime": 55.4346,
    "train_samples_per_second": 260.487,
    "train_steps_per_second": 8.208,
    "total_flos": 1920653130792960.0,
    "train_loss": 3.8263048025277944,
    "epoch": 5.0
  },
  "validation": {
    "eval_loss": 5.103570461273193,
    "eval_runtime": 0.5501,
    "eval_samples_per_second": 656.204,
    "eval_steps_per_second": 165.414,
    "epoch": 5.0
  },
  "test": {
    "test_loss": 5.131337642669678,
    "test_runtime": 0.59,
    "test_samples_per_second": 613.581,
    "test_steps_per_second": 154.243,
    "epoch": 5.0
  },
  "generation": {
    "num_contexts": 12,
    "num_sample_paths": 24,
    "directional_accuracy_median_path": 0.5219184027777778,
    "generated_degeneration_ratio_abs_lt_1e-5": 0.0,
    "true_degeneration_ratio_abs_lt_1e-5": 0.0,
    "path_diversity": 0.03765995055437088,
    "distribution_generated": {
      "mean": 0.004333046730607748,
      "std": 0.08561652898788452,
      "skew": -0.5086434483528137,
      "kurtosis": 56.622493743896484,
      "q01": -0.2753317952156067,
      "q05": -0.037811942398548126,
      "q95": 0.05145534127950668,
      "q99": 0.33806970715522766
    },
    "distribution_true": {
      "mean": -0.002802362898364663,
      "std": 0.030968399718403816,
      "skew": -2.6303722858428955,
      "kurtosis": 23.258527755737305,
      "q01": -0.09194755554199219,
      "q05": -0.05170159786939621,
      "q95": 0.04157276824116707,
      "q99": 0.06143633648753166
    },
    "cross_sectional_corr_mae": 0.24682527780532837,
    "cross_sectional_corr_real_matrix": [
      [
        1.0,
        0.6331244707107544,
        0.51563560962677,
        0.7108922600746155,
        0.5545589923858643,
        0.5535476207733154,
        0.5244247317314148,
        0.21973548829555511,
        0.3254808187484741,
        0.296897292137146,
        0.6087028384208679,
        0.5639088153839111
      ],
      [
        0.6331244707107544,
        1.0,
        0.6599540114402771,
        0.7749960422515869,
        0.5495731830596924,
        0.6230116486549377,
        0.46573472023010254,
        0.48720842599868774,
        0.45291945338249207,
        0.311764121055603,
        0.6260378360748291,
        0.47891733050346375
      ],
      [
        0.51563560962677,
        0.6599540114402771,
        1.0,
        0.6038300395011902,
        0.5262182950973511,
        0.5929338335990906,
        0.42031070590019226,
        0.8166924715042114,
        0.5714275240898132,
        0.5298638343811035,
        0.4197486639022827,
        0.4404708445072174
      ],
      [
        0.7108922600746155,
        0.7749960422515869,
        0.6038300395011902,
        1.0,
        0.5228630900382996,
        0.6676064133644104,
        0.5059135556221008,
        0.45712900161743164,
        0.39244410395622253,
        0.2839842736721039,
        0.5785690546035767,
        0.5194220542907715
      ],
      [
        0.5545589923858643,
        0.5495731830596924,
        0.5262182950973511,
        0.5228630900382996,
        1.0,
        0.7413638830184937,
        0.6974919438362122,
        0.4451492726802826,
        0.18301637470722198,
        0.11316652595996857,
        0.3873818814754486,
        0.4405401945114136
      ],
      [
        0.5535476207733154,
        0.6230116486549377,
        0.5929338335990906,
        0.6676064133644104,
        0.7413638830184937,
        1.0,
        0.626915693283081,
        0.5260854959487915,
        0.2714369297027588,
        0.18484577536582947,
        0.5772184729576111,
        0.4420454204082489
      ],
      [
        0.5244247317314148,
        0.46573472023010254,
        0.42031070590019226,
        0.5059135556221008,
        0.6974919438362122,
        0.626915693283081,
        1.0,
        0.3260938227176666,
        0.11072525382041931,
        0.11624810099601746,
        0.23918883502483368,
        0.18416988849639893
      ],
      [
        0.21973548829555511,
        0.48720842599868774,
        0.8166924715042114,
        0.45712900161743164,
        0.4451492726802826,
        0.5260854959487915,
        0.3260938227176666,
        1.0,
        0.587582528591156,
        0.5751133561134338,
        0.24537551403045654,
        0.28593382239341736
      ],
      [
        0.3254808187484741,
        0.45291945338249207,
        0.5714275240898132,
        0.39244410395622253,
        0.18301637470722198,
        0.2714369297027588,
        0.11072525382041931,
        0.587582528591156,
        1.0,
        0.7873843312263489,
        0.39742162823677063,
        0.378302663564682
      ],
      [
        0.296897292137146,
        0.311764121055603,
        0.5298638343811035,
        0.2839842736721039,
        0.11316652595996857,
        0.18484577536582947,
        0.11624810099601746,
        0.5751133561134338,
        0.7873843312263489,
        1.0,
        0.16922225058078766,
        0.11870864033699036
      ],
      [
        0.6087028384208679,
        0.6260378360748291,
        0.4197486639022827,
        0.5785690546035767,
        0.3873818814754486,
        0.5772184729576111,
        0.23918883502483368,
        0.24537551403045654,
        0.39742162823677063,
        0.16922225058078766,
        1.0,
        0.8636203408241272
      ],
      [
        0.5639088153839111,
        0.47891733050346375,
        0.4404708445072174,
        0.5194220542907715,
        0.4405401945114136,
        0.4420454204082489,
        0.18416988849639893,
        0.28593382239341736,
        0.378302663564682,
        0.11870864033699036,
        0.8636203408241272,
        1.0
      ]
    ],
    "cross_sectional_corr_generated_matrix": [
      [
        1.0,
        0.6723060011863708,
        0.5211983919143677,
        0.42777976393699646,
        0.3105679154396057,
        0.15275193750858307,
        0.09020064026117325,
        0.05684993788599968,
        0.027994271367788315,
        0.021539758890867233,
        0.0018611304694786668,
        0.00014955938968341798
      ],
      [
        0.6723060011863708,
        1.0,
        0.740753710269928,
        0.5988911390304565,
        0.4051375389099121,
        0.19946546852588654,
        0.12246865034103394,
        0.07496435195207596,
        0.041361626237630844,
        0.03579115495085716,
        0.0038796490989625454,
        0.006104168947786093
      ],
      [
        0.5211983919143677,
        0.740753710269928,
        1.0,
        0.7721667885780334,
        0.5105940699577332,
        0.25858888030052185,
        0.16144169867038727,
        0.09339627623558044,
        0.051841165870428085,
        0.04847245290875435,
        0.015083085745573044,
        0.013859635218977928
      ],
      [
        0.42777976393699646,
        0.5988911390304565,
        0.7721667885780334,
        1.0,
        0.635029137134552,
        0.3101363778114319,
        0.20397040247917175,
        0.12052255123853683,
        0.07821977138519287,
        0.07283712923526764,
        0.04090026021003723,
        0.03736479952931404
      ],
      [
        0.3105679154396057,
        0.4051375389099121,
        0.5105940699577332,
        0.635029137134552,
        1.0,
        0.44570595026016235,
        0.278595894575119,
        0.16786691546440125,
        0.11481305956840515,
        0.10485804080963135,
        0.06077614799141884,
        0.057598281651735306
      ],
      [
        0.15275193750858307,
        0.19946546852588654,
        0.25858888030052185,
        0.3101363778114319,
        0.44570595026016235,
        1.0,
        0.4601401388645172,
        0.2706546485424042,
        0.18775667250156403,
        0.17438025772571564,
        0.11574845016002655,
        0.10984478890895844
      ],
      [
        0.09020064026117325,
        0.12246865034103394,
        0.16144169867038727,
        0.20397040247917175,
        0.278595894575119,
        0.4601401388645172,
        1.0,
        0.5125625729560852,
        0.34511199593544006,
        0.31734904646873474,
        0.19516591727733612,
        0.18664193153381348
      ],
      [
        0.05684993788599968,
        0.07496435195207596,
        0.09339627623558044,
        0.12052255123853683,
        0.16786691546440125,
        0.2706546485424042,
        0.5125625729560852,
        1.0,
        0.588446855545044,
        0.5465589761734009,
        0.312430739402771,
        0.29470688104629517
      ],
      [
        0.027994271367788315,
        0.041361626237630844,
        0.051841165870428085,
        0.07821977138519287,
        0.11481305956840515,
        0.18775667250156403,
        0.34511199593544006,
        0.588446855545044,
        1.0,
        0.9245684146881104,
        0.5329838991165161,
        0.49864715337753296
      ],
      [
        0.021539758890867233,
        0.03579115495085716,
        0.04847245290875435,
        0.07283712923526764,
        0.10485804080963135,
        0.17438025772571564,
        0.31734904646873474,
        0.5465589761734009,
        0.9245684146881104,
        1.0,
        0.5616907477378845,
        0.5243045091629028
      ],
      [
        0.0018611304694786668,
        0.0038796490989625454,
        0.015083085745573044,
        0.04090026021003723,
        0.06077614799141884,
        0.11574845016002655,
        0.19516591727733612,
        0.312430739402771,
        0.5329838991165161,
        0.5616907477378845,
        1.0,
        0.920161783695221
      ],
      [
        0.00014955938968341798,
        0.006104168947786093,
        0.013859635218977928,
        0.03736479952931404,
        0.057598281651735306,
        0.10984478890895844,
        0.18664193153381348,
        0.29470688104629517,
        0.49864715337753296,
        0.5243045091629028,
        0.920161783695221,
        1.0
      ]
    ],
    "generated_abs_return_autocorr_lag1": 0.019996171948218167,
    "true_abs_return_autocorr_lag1": 0.2633381616777056
  },
  "data": {
    "tickers": [
      "AAPL",
      "MSFT",
      "AMZN",
      "GOOGL",
      "NVDA",
      "TSLA",
      "AMD",
      "INTC",
      "JPM",
      "BAC",
      "V",
      "MA"
    ],
    "price_shape": [
      3790,
      12
    ],
    "return_shape": [
      3770,
      12
    ],
    "bucket_method": "quantile",
    "num_samples": 3611,
    "train_samples": 2888,
    "validation_samples": 361,
    "test_samples": 362,
    "sequence_length": 2082,
    "vocab_size": 1216,
    "num_parameters": 11913984,
    "data_source": "siddharthmb/stocks-ohlcv"
  }
}

Intended Use

Research and experimentation with market return sequence generation. This model is not a trading system and is not investment advice.

Downloads last month
283
Safetensors
Model size
11.9M params
Tensor type
F32
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Dataset used to train kyLELEng/market-gpt-return-token