Added Files in the repo
Browse files- .gitignore +1 -0
- SPY500MW.csv +253 -0
- app.py +189 -0
- dbscan_stock_model.pkl +3 -0
- gmm_stock_model.pkl +3 -0
- hierarchical_stock_model.pkl +3 -0
- kmeans_stock_model.pkl +3 -0
- requirements.txt +7 -0
- stock_market_clustering.ipynb +0 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.venv/
|
SPY500MW.csv
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Date,Open,High,Low,Close
|
| 2 |
+
8/11/2023,4450.69,4476.23,4443.98,4464.05
|
| 3 |
+
8/10/2023,4487.16,4527.37,4457.92,4468.83
|
| 4 |
+
8/9/2023,4501.57,4502.44,4461.33,4467.71
|
| 5 |
+
8/8/2023,4498.03,4503.31,4464.39,4499.38
|
| 6 |
+
8/7/2023,4491.58,4519.84,4491.15,4518.44
|
| 7 |
+
8/4/2023,4513.96,4540.34,4474.55,4478.03
|
| 8 |
+
8/3/2023,4494.27,4519.49,4485.54,4501.89
|
| 9 |
+
8/2/2023,4550.93,4550.93,4505.75,4513.39
|
| 10 |
+
8/1/2023,4578.83,4584.62,4567.53,4576.73
|
| 11 |
+
7/31/2023,4584.82,4594.22,4573.14,4588.96
|
| 12 |
+
7/28/2023,4565.75,4590.16,4564.01,4582.23
|
| 13 |
+
7/27/2023,4598.26,4607.07,4528.56,4537.41
|
| 14 |
+
7/26/2023,4558.96,4582.47,4547.58,4566.75
|
| 15 |
+
7/25/2023,4555.19,4580.62,4552.42,4567.46
|
| 16 |
+
7/24/2023,4543.39,4563.41,4541.29,4554.64
|
| 17 |
+
7/21/2023,4550.16,4555.00,4535.79,4536.34
|
| 18 |
+
7/20/2023,4554.38,4564.74,4527.56,4534.87
|
| 19 |
+
7/19/2023,4563.87,4578.43,4557.48,4565.72
|
| 20 |
+
7/18/2023,4521.78,4562.30,4514.59,4554.98
|
| 21 |
+
7/17/2023,4508.86,4532.85,4504.90,4522.79
|
| 22 |
+
7/14/2023,4514.61,4527.76,4499.56,4505.42
|
| 23 |
+
7/13/2023,4491.50,4517.38,4489.36,4510.04
|
| 24 |
+
7/12/2023,4467.69,4488.34,4463.23,4472.16
|
| 25 |
+
7/11/2023,4415.55,4443.64,4408.46,4439.26
|
| 26 |
+
7/10/2023,4394.23,4412.60,4389.92,4409.53
|
| 27 |
+
7/7/2023,4404.54,4440.39,4397.40,4398.95
|
| 28 |
+
7/6/2023,4422.62,4422.62,4385.05,4411.59
|
| 29 |
+
7/5/2023,4442.04,4454.06,4436.61,4446.82
|
| 30 |
+
7/3/2023,4449.45,4456.46,4442.29,4455.59
|
| 31 |
+
6/30/2023,4422.44,4458.48,4422.44,4450.38
|
| 32 |
+
6/29/2023,4374.94,4398.39,4371.97,4396.44
|
| 33 |
+
6/28/2023,4367.48,4390.35,4360.22,4376.86
|
| 34 |
+
6/27/2023,4337.36,4384.42,4335.00,4378.41
|
| 35 |
+
6/26/2023,4344.84,4362.06,4328.08,4328.82
|
| 36 |
+
6/23/2023,4354.17,4366.55,4341.34,4348.33
|
| 37 |
+
6/22/2023,4355.40,4382.25,4351.82,4381.89
|
| 38 |
+
6/21/2023,4380.01,4386.22,4360.14,4365.69
|
| 39 |
+
6/20/2023,4396.11,4400.15,4367.19,4388.71
|
| 40 |
+
6/16/2023,4442.70,4448.47,4407.44,4409.59
|
| 41 |
+
6/15/2023,4365.33,4439.20,4362.60,4425.84
|
| 42 |
+
6/14/2023,4366.29,4391.82,4337.85,4372.59
|
| 43 |
+
6/13/2023,4352.61,4375.37,4349.31,4369.01
|
| 44 |
+
6/12/2023,4308.32,4340.13,4304.37,4338.93
|
| 45 |
+
6/9/2023,4304.88,4322.62,4291.70,4298.86
|
| 46 |
+
6/8/2023,4268.69,4298.01,4261.07,4293.93
|
| 47 |
+
6/7/2023,4285.47,4299.19,4263.96,4267.52
|
| 48 |
+
6/6/2023,4271.34,4288.33,4263.09,4283.85
|
| 49 |
+
6/5/2023,4282.99,4299.28,4266.82,4273.79
|
| 50 |
+
6/2/2023,4241.01,4290.67,4241.01,4282.37
|
| 51 |
+
6/1/2023,4183.03,4232.43,4171.64,4221.02
|
| 52 |
+
5/31/2023,4190.74,4195.44,4166.15,4179.83
|
| 53 |
+
5/30/2023,4226.71,4231.10,4192.18,4205.52
|
| 54 |
+
5/26/2023,4155.98,4212.87,4156.16,4205.45
|
| 55 |
+
5/25/2023,4155.71,4165.74,4129.73,4151.28
|
| 56 |
+
5/24/2023,4132.96,4132.96,4103.98,4115.24
|
| 57 |
+
5/23/2023,4176.80,4185.68,4142.54,4145.58
|
| 58 |
+
5/22/2023,4190.78,4209.22,4179.68,4192.63
|
| 59 |
+
5/19/2023,4204.15,4212.91,4180.20,4191.98
|
| 60 |
+
5/18/2023,4157.68,4202.20,4153.50,4198.05
|
| 61 |
+
5/17/2023,4122.85,4164.67,4113.62,4158.77
|
| 62 |
+
5/16/2023,4127.95,4135.54,4109.86,4109.90
|
| 63 |
+
5/15/2023,4126.65,4141.25,4110.27,4136.28
|
| 64 |
+
5/12/2023,4138.54,4143.74,4099.12,4124.08
|
| 65 |
+
5/11/2023,4132.24,4132.80,4109.29,4130.62
|
| 66 |
+
5/10/2023,4143.74,4154.28,4098.92,4137.64
|
| 67 |
+
5/9/2023,4124.25,4130.35,4116.65,4119.17
|
| 68 |
+
5/8/2023,4136.98,4142.30,4123.81,4138.12
|
| 69 |
+
5/5/2023,4084.73,4147.02,4084.73,4136.25
|
| 70 |
+
5/4/2023,4082.55,4082.61,4048.28,4061.22
|
| 71 |
+
5/3/2023,4122.25,4148.30,4088.86,4090.75
|
| 72 |
+
5/2/2023,4164.10,4164.10,4089.72,4119.58
|
| 73 |
+
5/1/2023,4166.79,4186.92,4164.12,4167.87
|
| 74 |
+
4/28/2023,4129.63,4170.06,4127.18,4169.48
|
| 75 |
+
4/27/2023,4075.29,4138.24,4075.29,4135.35
|
| 76 |
+
4/26/2023,4087.78,4089.67,4049.35,4055.99
|
| 77 |
+
4/25/2023,4126.43,4126.43,4071.38,4071.63
|
| 78 |
+
4/24/2023,4132.07,4142.41,4117.77,4137.04
|
| 79 |
+
4/21/2023,4132.14,4138.02,4113.86,4133.52
|
| 80 |
+
4/20/2023,4130.48,4148.57,4114.57,4129.79
|
| 81 |
+
4/19/2023,4139.33,4162.57,4134.49,4154.52
|
| 82 |
+
4/18/2023,4164.26,4169.48,4140.36,4154.87
|
| 83 |
+
4/17/2023,4137.17,4151.72,4123.18,4151.32
|
| 84 |
+
4/14/2023,4140.11,4163.19,4113.20,4137.64
|
| 85 |
+
4/13/2023,4100.04,4150.26,4099.40,4146.22
|
| 86 |
+
4/12/2023,4121.72,4134.37,4086.94,4091.95
|
| 87 |
+
4/11/2023,4110.29,4124.26,4102.61,4108.94
|
| 88 |
+
4/10/2023,4085.20,4109.50,4072.55,4109.11
|
| 89 |
+
4/6/2023,4081.58,4107.32,4069.84,4105.02
|
| 90 |
+
4/5/2023,4094.50,4099.69,4072.56,4090.38
|
| 91 |
+
4/4/2023,4128.03,4133.13,4086.87,4100.60
|
| 92 |
+
4/3/2023,4102.20,4127.66,4098.79,4124.51
|
| 93 |
+
3/31/2023,4056.18,4110.75,4056.18,4109.31
|
| 94 |
+
3/30/2023,4046.74,4057.85,4032.10,4050.83
|
| 95 |
+
3/29/2023,3999.53,4030.59,3999.53,4027.81
|
| 96 |
+
3/28/2023,3974.13,3979.20,3951.53,3971.27
|
| 97 |
+
3/27/2023,3982.93,4003.83,3970.49,3977.53
|
| 98 |
+
3/24/2023,3939.21,3972.74,3909.16,3970.99
|
| 99 |
+
3/23/2023,3959.21,4007.66,3919.05,3948.72
|
| 100 |
+
3/22/2023,4002.04,4039.49,3936.17,3936.97
|
| 101 |
+
3/21/2023,3975.89,4009.08,3971.19,4002.87
|
| 102 |
+
3/20/2023,3917.47,3956.62,3916.89,3951.57
|
| 103 |
+
3/17/2023,3958.69,3958.91,3901.27,3916.64
|
| 104 |
+
3/16/2023,3878.93,3964.46,3864.11,3960.28
|
| 105 |
+
3/15/2023,3876.74,3894.26,3838.24,3891.93
|
| 106 |
+
3/14/2023,3894.01,3937.29,3873.63,3919.29
|
| 107 |
+
3/13/2023,3835.12,3905.05,3808.86,3855.76
|
| 108 |
+
3/10/2023,3912.77,3934.05,3846.32,3861.59
|
| 109 |
+
3/9/2023,3998.66,4017.81,3908.70,3918.32
|
| 110 |
+
3/8/2023,3987.55,4000.41,3969.76,3992.01
|
| 111 |
+
3/7/2023,4048.26,4050.00,3980.31,3986.37
|
| 112 |
+
3/6/2023,4055.15,4078.49,4044.61,4048.42
|
| 113 |
+
3/3/2023,3998.02,4048.29,3995.17,4045.64
|
| 114 |
+
3/2/2023,3938.68,3990.84,3928.16,3981.35
|
| 115 |
+
3/1/2023,3963.34,3971.73,3939.05,3951.39
|
| 116 |
+
2/28/2023,3977.19,3997.50,3968.98,3970.15
|
| 117 |
+
2/27/2023,3992.36,4018.05,3973.55,3982.24
|
| 118 |
+
2/24/2023,3973.24,3978.25,3943.08,3970.04
|
| 119 |
+
2/23/2023,4018.60,4028.30,3969.19,4012.32
|
| 120 |
+
2/22/2023,4001.83,4017.37,3976.90,3991.05
|
| 121 |
+
2/21/2023,4052.35,4052.35,3995.19,3997.34
|
| 122 |
+
2/17/2023,4077.73,4081.51,4047.95,4079.09
|
| 123 |
+
2/16/2023,4114.75,4136.54,4089.49,4090.41
|
| 124 |
+
2/15/2023,4119.50,4148.11,4103.98,4147.60
|
| 125 |
+
2/14/2023,4126.70,4159.77,4095.01,4136.13
|
| 126 |
+
2/13/2023,4096.62,4138.90,4092.67,4137.29
|
| 127 |
+
2/10/2023,4068.92,4094.36,4060.79,4090.46
|
| 128 |
+
2/9/2023,4144.25,4156.23,4069.67,4081.50
|
| 129 |
+
2/8/2023,4153.47,4156.85,4111.67,4117.86
|
| 130 |
+
2/7/2023,4105.35,4176.54,4088.39,4164.00
|
| 131 |
+
2/6/2023,4119.57,4124.63,4093.38,4111.08
|
| 132 |
+
2/3/2023,4136.69,4182.36,4123.36,4136.48
|
| 133 |
+
2/2/2023,4158.68,4195.44,4141.88,4179.76
|
| 134 |
+
2/1/2023,4070.07,4148.95,4037.20,4119.21
|
| 135 |
+
1/31/2023,4020.85,4077.16,4020.44,4076.60
|
| 136 |
+
1/30/2023,4049.27,4063.85,4015.55,4017.77
|
| 137 |
+
1/27/2023,4053.72,4094.21,4048.70,4070.56
|
| 138 |
+
1/26/2023,4036.08,4061.57,4013.29,4060.43
|
| 139 |
+
1/25/2023,3982.71,4019.55,3949.06,4016.22
|
| 140 |
+
1/24/2023,4001.74,4023.92,3989.79,4016.95
|
| 141 |
+
1/23/2023,3978.14,4039.31,3971.64,4019.81
|
| 142 |
+
1/20/2023,3909.04,3972.96,3897.86,3972.61
|
| 143 |
+
1/19/2023,3911.84,3922.94,3885.54,3898.85
|
| 144 |
+
1/18/2023,4002.25,4014.16,3926.59,3928.86
|
| 145 |
+
1/17/2023,3999.28,4015.39,3984.57,3990.97
|
| 146 |
+
1/13/2023,3960.32,4003.95,3947.67,3999.09
|
| 147 |
+
1/12/2023,3977.57,3997.76,3937.56,3983.17
|
| 148 |
+
1/11/2023,3932.35,3970.07,3928.54,3969.61
|
| 149 |
+
1/10/2023,3888.57,3919.83,3877.29,3919.25
|
| 150 |
+
1/9/2023,3910.82,3950.57,3890.42,3892.09
|
| 151 |
+
1/6/2023,3823.37,3906.19,3809.56,3895.08
|
| 152 |
+
1/5/2023,3839.74,3839.74,3802.42,3808.10
|
| 153 |
+
1/4/2023,3840.36,3873.16,3815.77,3852.97
|
| 154 |
+
1/3/2023,3853.29,3878.46,3794.33,3824.14
|
| 155 |
+
12/30/2022,3828.85,3839.85,3800.34,3839.50
|
| 156 |
+
12/29/2022,3805.45,3858.19,3805.45,3849.28
|
| 157 |
+
12/28/2022,3829.56,3848.32,3780.78,3783.22
|
| 158 |
+
12/27/2022,3843.34,3846.65,3813.22,3829.25
|
| 159 |
+
12/23/2022,3813.11,3845.80,3797.01,3844.82
|
| 160 |
+
12/22/2022,3853.26,3853.26,3764.49,3822.39
|
| 161 |
+
12/21/2022,3839.49,3889.82,3839.49,3878.44
|
| 162 |
+
12/20/2022,3810.47,3838.24,3795.62,3821.62
|
| 163 |
+
12/19/2022,3853.79,3854.86,3800.04,3817.66
|
| 164 |
+
12/16/2022,3890.91,3890.91,3827.91,3852.36
|
| 165 |
+
12/15/2022,3958.37,3958.37,3879.45,3895.75
|
| 166 |
+
12/14/2022,4015.54,4053.76,3965.65,3995.32
|
| 167 |
+
12/13/2022,4069.38,4100.96,3993.03,4019.65
|
| 168 |
+
12/12/2022,3939.29,3990.71,3935.30,3990.56
|
| 169 |
+
12/9/2022,3954.17,3977.02,3933.04,3934.38
|
| 170 |
+
12/8/2022,3947.79,3974.19,3935.83,3963.51
|
| 171 |
+
12/7/2022,3933.28,3957.57,3922.68,3933.92
|
| 172 |
+
12/6/2022,3996.63,4001.51,3918.39,3941.26
|
| 173 |
+
12/5/2022,4052.02,4052.45,3984.49,3998.84
|
| 174 |
+
12/2/2022,4040.17,4080.48,4026.63,4071.70
|
| 175 |
+
12/1/2022,4087.14,4100.51,4050.87,4076.57
|
| 176 |
+
11/30/2022,3957.18,4080.11,3938.58,4080.11
|
| 177 |
+
11/29/2022,3964.19,3976.77,3937.65,3957.63
|
| 178 |
+
11/28/2022,4005.36,4012.27,3955.77,3963.94
|
| 179 |
+
11/25/2022,4023.34,4034.02,4020.76,4026.12
|
| 180 |
+
11/23/2022,4001.02,4033.78,3998.66,4027.26
|
| 181 |
+
11/22/2022,3965.51,4005.88,3956.88,4003.58
|
| 182 |
+
11/21/2022,3956.23,3962.00,3933.34,3949.94
|
| 183 |
+
11/18/2022,3966.39,3979.89,3935.98,3965.34
|
| 184 |
+
11/17/2022,3919.26,3954.33,3906.54,3946.56
|
| 185 |
+
11/16/2022,3976.82,3983.09,3954.34,3958.79
|
| 186 |
+
11/15/2022,4006.41,4028.84,3953.17,3991.73
|
| 187 |
+
11/14/2022,3977.97,4008.97,3956.40,3957.25
|
| 188 |
+
11/11/2022,3963.72,4001.48,3944.82,3992.93
|
| 189 |
+
11/10/2022,3859.89,3958.33,3859.89,3956.37
|
| 190 |
+
11/9/2022,3810.94,3818.20,3744.22,3748.57
|
| 191 |
+
11/8/2022,3817.02,3859.40,3786.28,3828.11
|
| 192 |
+
11/7/2022,3780.71,3813.95,3764.70,3806.80
|
| 193 |
+
11/4/2022,3766.98,3796.34,3708.84,3770.55
|
| 194 |
+
11/3/2022,3733.25,3750.59,3698.15,3719.89
|
| 195 |
+
11/2/2022,3852.90,3894.44,3758.68,3759.69
|
| 196 |
+
11/1/2022,3901.79,3911.79,3843.80,3856.10
|
| 197 |
+
10/31/2022,3881.85,3893.73,3863.18,3871.98
|
| 198 |
+
10/28/2022,3808.26,3905.42,3808.26,3901.06
|
| 199 |
+
10/27/2022,3834.69,3859.95,3803.79,3807.30
|
| 200 |
+
10/26/2022,3825.97,3886.15,3824.07,3830.60
|
| 201 |
+
10/25/2022,3799.44,3862.85,3799.44,3859.11
|
| 202 |
+
10/24/2022,3762.01,3810.74,3741.65,3797.34
|
| 203 |
+
10/21/2022,3657.10,3757.89,3647.42,3752.75
|
| 204 |
+
10/20/2022,3689.05,3736.00,3656.44,3665.78
|
| 205 |
+
10/19/2022,3703.11,3728.58,3666.51,3695.16
|
| 206 |
+
10/18/2022,3746.26,3762.79,3686.53,3719.98
|
| 207 |
+
10/17/2022,3638.65,3689.73,3638.65,3677.95
|
| 208 |
+
10/14/2022,3690.41,3712.00,3579.68,3583.07
|
| 209 |
+
10/13/2022,3520.37,3685.41,3491.58,3669.91
|
| 210 |
+
10/12/2022,3590.83,3608.34,3573.86,3577.03
|
| 211 |
+
10/11/2022,3595.86,3640.66,3568.45,3588.84
|
| 212 |
+
10/10/2022,3647.51,3652.17,3588.10,3612.39
|
| 213 |
+
10/7/2022,3706.74,3706.74,3620.73,3639.66
|
| 214 |
+
10/6/2022,3771.97,3797.93,3739.22,3744.52
|
| 215 |
+
10/5/2022,3753.25,3806.91,3722.66,3783.28
|
| 216 |
+
10/4/2022,3726.46,3791.92,3726.46,3790.93
|
| 217 |
+
10/3/2022,3609.78,3698.35,3604.93,3678.43
|
| 218 |
+
9/30/2022,3633.48,3671.44,3584.13,3585.62
|
| 219 |
+
9/29/2022,3687.01,3687.01,3610.40,3640.47
|
| 220 |
+
9/28/2022,3651.94,3736.74,3640.61,3719.04
|
| 221 |
+
9/27/2022,3686.44,3717.53,3623.29,3647.29
|
| 222 |
+
9/26/2022,3682.72,3715.67,3644.76,3655.04
|
| 223 |
+
9/23/2022,3727.14,3727.14,3647.47,3693.23
|
| 224 |
+
9/22/2022,3782.36,3790.90,3749.45,3757.99
|
| 225 |
+
9/21/2022,3871.40,3907.07,3789.49,3789.93
|
| 226 |
+
9/20/2022,3875.23,3876.01,3827.54,3855.93
|
| 227 |
+
9/19/2022,3849.91,3900.45,3838.50,3899.89
|
| 228 |
+
9/16/2022,3880.95,3880.95,3837.08,3873.33
|
| 229 |
+
9/15/2022,3932.41,3959.14,3888.28,3901.35
|
| 230 |
+
9/14/2022,3940.73,3961.94,3912.18,3946.01
|
| 231 |
+
9/13/2022,4037.12,4037.12,3921.28,3932.69
|
| 232 |
+
9/12/2022,4083.67,4119.28,4083.67,4110.41
|
| 233 |
+
9/9/2022,4022.94,4076.81,4022.94,4067.36
|
| 234 |
+
9/8/2022,3959.94,4010.50,3944.81,4006.18
|
| 235 |
+
9/7/2022,3909.43,3987.89,3906.03,3979.87
|
| 236 |
+
9/6/2022,3930.89,3942.55,3886.75,3908.19
|
| 237 |
+
9/2/2022,3996.39,4018.43,3906.21,3924.26
|
| 238 |
+
9/1/2022,3936.73,3970.23,3903.65,3966.85
|
| 239 |
+
8/31/2022,4000.67,4015.37,3954.53,3955.00
|
| 240 |
+
8/30/2022,4041.25,4044.98,3965.21,3986.16
|
| 241 |
+
8/29/2022,4034.58,4062.99,4017.42,4030.61
|
| 242 |
+
8/26/2022,4198.74,4203.04,4057.66,4057.66
|
| 243 |
+
8/25/2022,4153.26,4200.54,4147.59,4199.12
|
| 244 |
+
8/24/2022,4126.55,4156.56,4119.97,4140.77
|
| 245 |
+
8/23/2022,4133.09,4159.77,4124.03,4128.73
|
| 246 |
+
8/22/2022,4195.08,4195.08,4129.86,4137.99
|
| 247 |
+
8/19/2022,4266.31,4266.31,4218.70,4228.48
|
| 248 |
+
8/18/2022,4273.13,4292.53,4261.98,4283.74
|
| 249 |
+
8/17/2022,4280.40,4302.18,4253.08,4274.04
|
| 250 |
+
8/16/2022,4290.46,4325.28,4277.77,4305.20
|
| 251 |
+
8/15/2022,4269.37,4301.79,4256.90,4297.14
|
| 252 |
+
8/12/2022,4225.02,4280.47,4219.78,4280.15
|
| 253 |
+
8/11/2022,4227.40,4257.91,4201.41,4207.27
|
app.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import joblib
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
import seaborn as sns
|
| 7 |
+
from sklearn.preprocessing import StandardScaler
|
| 8 |
+
from sklearn.cluster import KMeans, DBSCAN
|
| 9 |
+
from sklearn.metrics import silhouette_score
|
| 10 |
+
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
|
| 11 |
+
from sklearn.mixture import GaussianMixture
|
| 12 |
+
from datasets import load_dataset
|
| 13 |
+
import os
|
| 14 |
+
|
| 15 |
+
# Load dataset
|
| 16 |
+
@st.cache_data
|
| 17 |
+
def load_data():
|
| 18 |
+
dataset = load_dataset("Ci-Dave/SPY500MW_StockMarket")
|
| 19 |
+
df = pd.DataFrame(dataset["train"])
|
| 20 |
+
return df
|
| 21 |
+
|
| 22 |
+
# Preprocessing function
|
| 23 |
+
def preprocess_data(df):
|
| 24 |
+
numeric_cols = df.select_dtypes(include=[np.number]).columns # Select only numeric columns
|
| 25 |
+
df_numeric = df[numeric_cols].dropna() # Drop missing values
|
| 26 |
+
scaler = StandardScaler()
|
| 27 |
+
df_scaled = scaler.fit_transform(df_numeric)
|
| 28 |
+
return df_numeric, df_scaled, scaler
|
| 29 |
+
|
| 30 |
+
# Load dataset
|
| 31 |
+
df = load_data()
|
| 32 |
+
df_numeric, df_scaled, scaler = preprocess_data(df)
|
| 33 |
+
|
| 34 |
+
# Load trained models with error handling
|
| 35 |
+
def load_model(filename):
|
| 36 |
+
if os.path.exists(filename):
|
| 37 |
+
return joblib.load(filename)
|
| 38 |
+
else:
|
| 39 |
+
st.error(f"Model file {filename} not found. Make sure you uploaded the trained model.")
|
| 40 |
+
return None
|
| 41 |
+
|
| 42 |
+
kmeans = load_model("kmeans_stock_model.pkl")
|
| 43 |
+
hierarchical = load_model("hierarchical_stock_model.pkl")
|
| 44 |
+
dbscan = load_model("dbscan_stock_model.pkl")
|
| 45 |
+
gmm = load_model("gmm_stock_model.pkl")
|
| 46 |
+
|
| 47 |
+
# Sidebar navigation
|
| 48 |
+
st.sidebar.title("Stock Market Clustering")
|
| 49 |
+
page = st.sidebar.radio("Choose a model:", ["Home", "Dataset", "K-Means", "Hierarchical", "DBSCAN", "GMM"])
|
| 50 |
+
|
| 51 |
+
if page == "Home":
|
| 52 |
+
st.title("Stock Market Clustering Analysis")
|
| 53 |
+
st.write("""
|
| 54 |
+
This application analyzes stock market data using clustering techniques.
|
| 55 |
+
Explore different clustering models (K-Means, Hierarchical, DBSCAN, and GMM) to identify patterns in stock prices.
|
| 56 |
+
|
| 57 |
+
**Dataset:**
|
| 58 |
+
The dataset includes stock market price data with key attributes such as Open, High, Low, and Close values.
|
| 59 |
+
|
| 60 |
+
**Clustering Models:**
|
| 61 |
+
- **K-Means:** Groups stocks into clusters based on price similarities.
|
| 62 |
+
- **Hierarchical Clustering:** Forms a tree-like structure to identify stock relationships.
|
| 63 |
+
- **DBSCAN:** Detects core clusters and outliers in stock prices.
|
| 64 |
+
- **Gaussian Mixture Model (GMM):** Uses probabilistic clustering for stock analysis.
|
| 65 |
+
|
| 66 |
+
**Instructions:**
|
| 67 |
+
- Select a clustering model from the sidebar.
|
| 68 |
+
- View the clustering results, including cluster assignments, distributions, and visualizations.
|
| 69 |
+
""")
|
| 70 |
+
|
| 71 |
+
elif page == "Dataset":
|
| 72 |
+
st.title("Stock Market Dataset")
|
| 73 |
+
|
| 74 |
+
st.write("### Dataset Preview:")
|
| 75 |
+
st.dataframe(df[["Open", "High", "Low", "Close"]].head(), use_container_width=True) # Show only key features
|
| 76 |
+
|
| 77 |
+
st.write("### Full Dataset:")
|
| 78 |
+
st.dataframe(df, use_container_width=True)
|
| 79 |
+
|
| 80 |
+
st.write("### Dataset Summary:")
|
| 81 |
+
st.write(df_numeric.describe())
|
| 82 |
+
|
| 83 |
+
st.write("### Correlation Heatmap:")
|
| 84 |
+
fig, ax = plt.subplots()
|
| 85 |
+
sns.heatmap(df_numeric.corr(), annot=True, cmap="coolwarm", ax=ax)
|
| 86 |
+
st.pyplot(fig)
|
| 87 |
+
|
| 88 |
+
elif page == "K-Means":
|
| 89 |
+
st.title("K-Means Clustering")
|
| 90 |
+
if kmeans:
|
| 91 |
+
clusters = kmeans.predict(df_scaled)
|
| 92 |
+
df_numeric["KMeans Cluster"] = clusters
|
| 93 |
+
|
| 94 |
+
# Display Cluster Assignments
|
| 95 |
+
st.write("Cluster Assignments:")
|
| 96 |
+
st.dataframe(df_numeric[["KMeans Cluster"]].head())
|
| 97 |
+
|
| 98 |
+
# Display Cluster Distribution
|
| 99 |
+
cluster_counts = df_numeric["KMeans Cluster"].value_counts().reset_index()
|
| 100 |
+
cluster_counts.columns = ["Cluster", "Count"]
|
| 101 |
+
st.write("Cluster Distribution:")
|
| 102 |
+
st.dataframe(cluster_counts)
|
| 103 |
+
|
| 104 |
+
# Compute and Display Silhouette Score
|
| 105 |
+
silhouette = silhouette_score(df_scaled, clusters)
|
| 106 |
+
st.write(f"Silhouette Score: {silhouette:.4f}")
|
| 107 |
+
|
| 108 |
+
# Visualization
|
| 109 |
+
fig, ax = plt.subplots()
|
| 110 |
+
sns.scatterplot(x=df_numeric.iloc[:, 0], y=df_numeric.iloc[:, 1], hue=clusters, palette="viridis", ax=ax)
|
| 111 |
+
st.pyplot(fig)
|
| 112 |
+
|
| 113 |
+
elif page == "Hierarchical":
|
| 114 |
+
st.title("Hierarchical Clustering")
|
| 115 |
+
|
| 116 |
+
# Perform Hierarchical Clustering
|
| 117 |
+
linked = linkage(df_scaled, method='ward') # Create linkage matrix
|
| 118 |
+
clusters = fcluster(linked, 4, criterion='maxclust') # Assign cluster labels
|
| 119 |
+
df_numeric["Hierarchical Cluster"] = clusters # Add to dataframe
|
| 120 |
+
|
| 121 |
+
# Display Cluster Assignments
|
| 122 |
+
st.write("Cluster Assignments:")
|
| 123 |
+
st.dataframe(df_numeric[["Hierarchical Cluster"]].head())
|
| 124 |
+
|
| 125 |
+
# Display Cluster Distribution
|
| 126 |
+
cluster_counts = df_numeric["Hierarchical Cluster"].value_counts().reset_index()
|
| 127 |
+
cluster_counts.columns = ["Cluster", "Count"]
|
| 128 |
+
st.write("Cluster Distribution:")
|
| 129 |
+
st.dataframe(cluster_counts)
|
| 130 |
+
|
| 131 |
+
# Dendrogram Visualization
|
| 132 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
| 133 |
+
dendrogram(linked, truncate_mode='level', p=5, ax=ax) # Plot dendrogram
|
| 134 |
+
st.pyplot(fig)
|
| 135 |
+
|
| 136 |
+
# Scatter Plot of First Two Features
|
| 137 |
+
fig, ax = plt.subplots()
|
| 138 |
+
sns.scatterplot(x=df_numeric.iloc[:, 0], y=df_numeric.iloc[:, 1], hue=clusters, palette="viridis", ax=ax)
|
| 139 |
+
st.pyplot(fig)
|
| 140 |
+
|
| 141 |
+
elif page == "DBSCAN":
|
| 142 |
+
st.title("DBSCAN Clustering")
|
| 143 |
+
if dbscan:
|
| 144 |
+
clusters = dbscan.fit_predict(df_scaled)
|
| 145 |
+
df_numeric["DBSCAN Cluster"] = clusters
|
| 146 |
+
|
| 147 |
+
# Display Cluster Assignments
|
| 148 |
+
st.write("Cluster Assignments:")
|
| 149 |
+
st.dataframe(df_numeric[["DBSCAN Cluster"]].head())
|
| 150 |
+
|
| 151 |
+
# Display Cluster Distribution (Handle outliers)
|
| 152 |
+
cluster_counts = df_numeric["DBSCAN Cluster"].value_counts().reset_index()
|
| 153 |
+
cluster_counts.columns = ["Cluster", "Count"]
|
| 154 |
+
st.write("Cluster Distribution:")
|
| 155 |
+
st.dataframe(cluster_counts)
|
| 156 |
+
|
| 157 |
+
# Visualization: Different color for outliers
|
| 158 |
+
fig, ax = plt.subplots()
|
| 159 |
+
palette = {i: sns.color_palette("viridis", as_cmap=True)(i / 4) for i in set(clusters) if i != -1}
|
| 160 |
+
palette[-1] = "red" # Mark outliers in red
|
| 161 |
+
sns.scatterplot(x=df_numeric.iloc[:, 0], y=df_numeric.iloc[:, 1], hue=clusters, palette=palette, ax=ax)
|
| 162 |
+
st.pyplot(fig)
|
| 163 |
+
|
| 164 |
+
elif page == "GMM":
|
| 165 |
+
st.title("Gaussian Mixture Model (GMM)")
|
| 166 |
+
if gmm:
|
| 167 |
+
expected_features = gmm.means_.shape[1] # Get the number of features used during training
|
| 168 |
+
df_gmm = df_scaled[:, :expected_features] # Keep only the needed columns
|
| 169 |
+
clusters = gmm.predict(df_gmm)
|
| 170 |
+
df_numeric["GMM Cluster"] = clusters
|
| 171 |
+
|
| 172 |
+
# Display Cluster Assignments
|
| 173 |
+
st.write("Cluster Assignments:")
|
| 174 |
+
st.dataframe(df_numeric[["GMM Cluster"]].head())
|
| 175 |
+
|
| 176 |
+
# Display Cluster Distribution
|
| 177 |
+
cluster_counts = df_numeric["GMM Cluster"].value_counts().reset_index()
|
| 178 |
+
cluster_counts.columns = ["Cluster", "Count"]
|
| 179 |
+
st.write("Cluster Distribution:")
|
| 180 |
+
st.dataframe(cluster_counts)
|
| 181 |
+
|
| 182 |
+
# Compute and Display Silhouette Score
|
| 183 |
+
silhouette = silhouette_score(df_gmm, clusters)
|
| 184 |
+
st.write(f"Silhouette Score: {silhouette:.4f}")
|
| 185 |
+
|
| 186 |
+
# Visualization
|
| 187 |
+
fig, ax = plt.subplots()
|
| 188 |
+
sns.scatterplot(x=df_numeric.iloc[:, 0], y=df_numeric.iloc[:, 1], hue=clusters, palette="viridis", ax=ax)
|
| 189 |
+
st.pyplot(fig)
|
dbscan_stock_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d79f445d36381918a010805245cac0d13111b8f04535dfebb853c5eab582a4e
|
| 3 |
+
size 12727
|
gmm_stock_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c59cef6dd1f677d5fdc08f6a8553b90b0f8cb367148b367b5ebc7ec246d0e91e
|
| 3 |
+
size 1742
|
hierarchical_stock_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c38beacb44e402b74041b7770da954aee34e6197f70e540e20741d4a29df967
|
| 3 |
+
size 8033
|
kmeans_stock_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00564ad761caee0d83ad3d3b8929964615122ae323e07b7c821cbbbea054c331
|
| 3 |
+
size 1831
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
joblib
|
| 3 |
+
pandas
|
| 4 |
+
matplotlib
|
| 5 |
+
seaborn
|
| 6 |
+
scikit-learn
|
| 7 |
+
datasets
|
stock_market_clustering.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|