MarketGPT Return-token Model
This is a GPT-style causal language model trained from scratch on volatility-normalized return tokens.
Instead of directly regressing future continuous returns, the pipeline converts each asset return into a discrete bucket token:
past return tokens -> future return tokens
The model can sample multiple future return paths with constrained generation.
Data
- Data source:
siddharthmb/stocks-ohlcvwhenuse_synthetic=false; synthetic heavy-tailed market panel whenuse_synthetic=true - Assets:
AAPL, MSFT, AMZN, GOOGL, NVDA, TSLA, AMD, INTC, JPM, BAC, V, MA - Return:
log(price_t / price_t-1) - Normalization: return divided by EWM volatility, span
60 - Buckets:
101quantilebuckets over clipped normalized returns - Split: chronological train / validation / test
- Leakage control: quantile bucket edges are fitted only on the train period when
bucket_method=quantile
Vocabulary
PAD = 0
BOS = 1
EOS = 2
STEP = 3
asset_bucket_token = 4 + asset_id * num_buckets + bucket_id
- vocab size:
1216 - sequence length:
2082
Model
GPT2Config(
vocab_size=1216,
n_positions=2082,
n_embd=384,
n_layer=6,
n_head=6,
)
Metrics
{
"train": {
"train_runtime": 55.4346,
"train_samples_per_second": 260.487,
"train_steps_per_second": 8.208,
"total_flos": 1920653130792960.0,
"train_loss": 3.8263048025277944,
"epoch": 5.0
},
"validation": {
"eval_loss": 5.103570461273193,
"eval_runtime": 0.5501,
"eval_samples_per_second": 656.204,
"eval_steps_per_second": 165.414,
"epoch": 5.0
},
"test": {
"test_loss": 5.131337642669678,
"test_runtime": 0.59,
"test_samples_per_second": 613.581,
"test_steps_per_second": 154.243,
"epoch": 5.0
},
"generation": {
"num_contexts": 12,
"num_sample_paths": 24,
"directional_accuracy_median_path": 0.5219184027777778,
"generated_degeneration_ratio_abs_lt_1e-5": 0.0,
"true_degeneration_ratio_abs_lt_1e-5": 0.0,
"path_diversity": 0.03765995055437088,
"distribution_generated": {
"mean": 0.004333046730607748,
"std": 0.08561652898788452,
"skew": -0.5086434483528137,
"kurtosis": 56.622493743896484,
"q01": -0.2753317952156067,
"q05": -0.037811942398548126,
"q95": 0.05145534127950668,
"q99": 0.33806970715522766
},
"distribution_true": {
"mean": -0.002802362898364663,
"std": 0.030968399718403816,
"skew": -2.6303722858428955,
"kurtosis": 23.258527755737305,
"q01": -0.09194755554199219,
"q05": -0.05170159786939621,
"q95": 0.04157276824116707,
"q99": 0.06143633648753166
},
"cross_sectional_corr_mae": 0.24682527780532837,
"cross_sectional_corr_real_matrix": [
[
1.0,
0.6331244707107544,
0.51563560962677,
0.7108922600746155,
0.5545589923858643,
0.5535476207733154,
0.5244247317314148,
0.21973548829555511,
0.3254808187484741,
0.296897292137146,
0.6087028384208679,
0.5639088153839111
],
[
0.6331244707107544,
1.0,
0.6599540114402771,
0.7749960422515869,
0.5495731830596924,
0.6230116486549377,
0.46573472023010254,
0.48720842599868774,
0.45291945338249207,
0.311764121055603,
0.6260378360748291,
0.47891733050346375
],
[
0.51563560962677,
0.6599540114402771,
1.0,
0.6038300395011902,
0.5262182950973511,
0.5929338335990906,
0.42031070590019226,
0.8166924715042114,
0.5714275240898132,
0.5298638343811035,
0.4197486639022827,
0.4404708445072174
],
[
0.7108922600746155,
0.7749960422515869,
0.6038300395011902,
1.0,
0.5228630900382996,
0.6676064133644104,
0.5059135556221008,
0.45712900161743164,
0.39244410395622253,
0.2839842736721039,
0.5785690546035767,
0.5194220542907715
],
[
0.5545589923858643,
0.5495731830596924,
0.5262182950973511,
0.5228630900382996,
1.0,
0.7413638830184937,
0.6974919438362122,
0.4451492726802826,
0.18301637470722198,
0.11316652595996857,
0.3873818814754486,
0.4405401945114136
],
[
0.5535476207733154,
0.6230116486549377,
0.5929338335990906,
0.6676064133644104,
0.7413638830184937,
1.0,
0.626915693283081,
0.5260854959487915,
0.2714369297027588,
0.18484577536582947,
0.5772184729576111,
0.4420454204082489
],
[
0.5244247317314148,
0.46573472023010254,
0.42031070590019226,
0.5059135556221008,
0.6974919438362122,
0.626915693283081,
1.0,
0.3260938227176666,
0.11072525382041931,
0.11624810099601746,
0.23918883502483368,
0.18416988849639893
],
[
0.21973548829555511,
0.48720842599868774,
0.8166924715042114,
0.45712900161743164,
0.4451492726802826,
0.5260854959487915,
0.3260938227176666,
1.0,
0.587582528591156,
0.5751133561134338,
0.24537551403045654,
0.28593382239341736
],
[
0.3254808187484741,
0.45291945338249207,
0.5714275240898132,
0.39244410395622253,
0.18301637470722198,
0.2714369297027588,
0.11072525382041931,
0.587582528591156,
1.0,
0.7873843312263489,
0.39742162823677063,
0.378302663564682
],
[
0.296897292137146,
0.311764121055603,
0.5298638343811035,
0.2839842736721039,
0.11316652595996857,
0.18484577536582947,
0.11624810099601746,
0.5751133561134338,
0.7873843312263489,
1.0,
0.16922225058078766,
0.11870864033699036
],
[
0.6087028384208679,
0.6260378360748291,
0.4197486639022827,
0.5785690546035767,
0.3873818814754486,
0.5772184729576111,
0.23918883502483368,
0.24537551403045654,
0.39742162823677063,
0.16922225058078766,
1.0,
0.8636203408241272
],
[
0.5639088153839111,
0.47891733050346375,
0.4404708445072174,
0.5194220542907715,
0.4405401945114136,
0.4420454204082489,
0.18416988849639893,
0.28593382239341736,
0.378302663564682,
0.11870864033699036,
0.8636203408241272,
1.0
]
],
"cross_sectional_corr_generated_matrix": [
[
1.0,
0.6723060011863708,
0.5211983919143677,
0.42777976393699646,
0.3105679154396057,
0.15275193750858307,
0.09020064026117325,
0.05684993788599968,
0.027994271367788315,
0.021539758890867233,
0.0018611304694786668,
0.00014955938968341798
],
[
0.6723060011863708,
1.0,
0.740753710269928,
0.5988911390304565,
0.4051375389099121,
0.19946546852588654,
0.12246865034103394,
0.07496435195207596,
0.041361626237630844,
0.03579115495085716,
0.0038796490989625454,
0.006104168947786093
],
[
0.5211983919143677,
0.740753710269928,
1.0,
0.7721667885780334,
0.5105940699577332,
0.25858888030052185,
0.16144169867038727,
0.09339627623558044,
0.051841165870428085,
0.04847245290875435,
0.015083085745573044,
0.013859635218977928
],
[
0.42777976393699646,
0.5988911390304565,
0.7721667885780334,
1.0,
0.635029137134552,
0.3101363778114319,
0.20397040247917175,
0.12052255123853683,
0.07821977138519287,
0.07283712923526764,
0.04090026021003723,
0.03736479952931404
],
[
0.3105679154396057,
0.4051375389099121,
0.5105940699577332,
0.635029137134552,
1.0,
0.44570595026016235,
0.278595894575119,
0.16786691546440125,
0.11481305956840515,
0.10485804080963135,
0.06077614799141884,
0.057598281651735306
],
[
0.15275193750858307,
0.19946546852588654,
0.25858888030052185,
0.3101363778114319,
0.44570595026016235,
1.0,
0.4601401388645172,
0.2706546485424042,
0.18775667250156403,
0.17438025772571564,
0.11574845016002655,
0.10984478890895844
],
[
0.09020064026117325,
0.12246865034103394,
0.16144169867038727,
0.20397040247917175,
0.278595894575119,
0.4601401388645172,
1.0,
0.5125625729560852,
0.34511199593544006,
0.31734904646873474,
0.19516591727733612,
0.18664193153381348
],
[
0.05684993788599968,
0.07496435195207596,
0.09339627623558044,
0.12052255123853683,
0.16786691546440125,
0.2706546485424042,
0.5125625729560852,
1.0,
0.588446855545044,
0.5465589761734009,
0.312430739402771,
0.29470688104629517
],
[
0.027994271367788315,
0.041361626237630844,
0.051841165870428085,
0.07821977138519287,
0.11481305956840515,
0.18775667250156403,
0.34511199593544006,
0.588446855545044,
1.0,
0.9245684146881104,
0.5329838991165161,
0.49864715337753296
],
[
0.021539758890867233,
0.03579115495085716,
0.04847245290875435,
0.07283712923526764,
0.10485804080963135,
0.17438025772571564,
0.31734904646873474,
0.5465589761734009,
0.9245684146881104,
1.0,
0.5616907477378845,
0.5243045091629028
],
[
0.0018611304694786668,
0.0038796490989625454,
0.015083085745573044,
0.04090026021003723,
0.06077614799141884,
0.11574845016002655,
0.19516591727733612,
0.312430739402771,
0.5329838991165161,
0.5616907477378845,
1.0,
0.920161783695221
],
[
0.00014955938968341798,
0.006104168947786093,
0.013859635218977928,
0.03736479952931404,
0.057598281651735306,
0.10984478890895844,
0.18664193153381348,
0.29470688104629517,
0.49864715337753296,
0.5243045091629028,
0.920161783695221,
1.0
]
],
"generated_abs_return_autocorr_lag1": 0.019996171948218167,
"true_abs_return_autocorr_lag1": 0.2633381616777056
},
"data": {
"tickers": [
"AAPL",
"MSFT",
"AMZN",
"GOOGL",
"NVDA",
"TSLA",
"AMD",
"INTC",
"JPM",
"BAC",
"V",
"MA"
],
"price_shape": [
3790,
12
],
"return_shape": [
3770,
12
],
"bucket_method": "quantile",
"num_samples": 3611,
"train_samples": 2888,
"validation_samples": 361,
"test_samples": 362,
"sequence_length": 2082,
"vocab_size": 1216,
"num_parameters": 11913984,
"data_source": "siddharthmb/stocks-ohlcv"
}
}
Intended Use
Research and experimentation with market return sequence generation. This model is not a trading system and is not investment advice.
- Downloads last month
- 283