Ci-Dave commited on
Commit
4619d1d
·
1 Parent(s): 8714d9d

Added Files in the repo

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv/
SPY500MW.csv ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Date,Open,High,Low,Close
2
+ 8/11/2023,4450.69,4476.23,4443.98,4464.05
3
+ 8/10/2023,4487.16,4527.37,4457.92,4468.83
4
+ 8/9/2023,4501.57,4502.44,4461.33,4467.71
5
+ 8/8/2023,4498.03,4503.31,4464.39,4499.38
6
+ 8/7/2023,4491.58,4519.84,4491.15,4518.44
7
+ 8/4/2023,4513.96,4540.34,4474.55,4478.03
8
+ 8/3/2023,4494.27,4519.49,4485.54,4501.89
9
+ 8/2/2023,4550.93,4550.93,4505.75,4513.39
10
+ 8/1/2023,4578.83,4584.62,4567.53,4576.73
11
+ 7/31/2023,4584.82,4594.22,4573.14,4588.96
12
+ 7/28/2023,4565.75,4590.16,4564.01,4582.23
13
+ 7/27/2023,4598.26,4607.07,4528.56,4537.41
14
+ 7/26/2023,4558.96,4582.47,4547.58,4566.75
15
+ 7/25/2023,4555.19,4580.62,4552.42,4567.46
16
+ 7/24/2023,4543.39,4563.41,4541.29,4554.64
17
+ 7/21/2023,4550.16,4555.00,4535.79,4536.34
18
+ 7/20/2023,4554.38,4564.74,4527.56,4534.87
19
+ 7/19/2023,4563.87,4578.43,4557.48,4565.72
20
+ 7/18/2023,4521.78,4562.30,4514.59,4554.98
21
+ 7/17/2023,4508.86,4532.85,4504.90,4522.79
22
+ 7/14/2023,4514.61,4527.76,4499.56,4505.42
23
+ 7/13/2023,4491.50,4517.38,4489.36,4510.04
24
+ 7/12/2023,4467.69,4488.34,4463.23,4472.16
25
+ 7/11/2023,4415.55,4443.64,4408.46,4439.26
26
+ 7/10/2023,4394.23,4412.60,4389.92,4409.53
27
+ 7/7/2023,4404.54,4440.39,4397.40,4398.95
28
+ 7/6/2023,4422.62,4422.62,4385.05,4411.59
29
+ 7/5/2023,4442.04,4454.06,4436.61,4446.82
30
+ 7/3/2023,4449.45,4456.46,4442.29,4455.59
31
+ 6/30/2023,4422.44,4458.48,4422.44,4450.38
32
+ 6/29/2023,4374.94,4398.39,4371.97,4396.44
33
+ 6/28/2023,4367.48,4390.35,4360.22,4376.86
34
+ 6/27/2023,4337.36,4384.42,4335.00,4378.41
35
+ 6/26/2023,4344.84,4362.06,4328.08,4328.82
36
+ 6/23/2023,4354.17,4366.55,4341.34,4348.33
37
+ 6/22/2023,4355.40,4382.25,4351.82,4381.89
38
+ 6/21/2023,4380.01,4386.22,4360.14,4365.69
39
+ 6/20/2023,4396.11,4400.15,4367.19,4388.71
40
+ 6/16/2023,4442.70,4448.47,4407.44,4409.59
41
+ 6/15/2023,4365.33,4439.20,4362.60,4425.84
42
+ 6/14/2023,4366.29,4391.82,4337.85,4372.59
43
+ 6/13/2023,4352.61,4375.37,4349.31,4369.01
44
+ 6/12/2023,4308.32,4340.13,4304.37,4338.93
45
+ 6/9/2023,4304.88,4322.62,4291.70,4298.86
46
+ 6/8/2023,4268.69,4298.01,4261.07,4293.93
47
+ 6/7/2023,4285.47,4299.19,4263.96,4267.52
48
+ 6/6/2023,4271.34,4288.33,4263.09,4283.85
49
+ 6/5/2023,4282.99,4299.28,4266.82,4273.79
50
+ 6/2/2023,4241.01,4290.67,4241.01,4282.37
51
+ 6/1/2023,4183.03,4232.43,4171.64,4221.02
52
+ 5/31/2023,4190.74,4195.44,4166.15,4179.83
53
+ 5/30/2023,4226.71,4231.10,4192.18,4205.52
54
+ 5/26/2023,4155.98,4212.87,4156.16,4205.45
55
+ 5/25/2023,4155.71,4165.74,4129.73,4151.28
56
+ 5/24/2023,4132.96,4132.96,4103.98,4115.24
57
+ 5/23/2023,4176.80,4185.68,4142.54,4145.58
58
+ 5/22/2023,4190.78,4209.22,4179.68,4192.63
59
+ 5/19/2023,4204.15,4212.91,4180.20,4191.98
60
+ 5/18/2023,4157.68,4202.20,4153.50,4198.05
61
+ 5/17/2023,4122.85,4164.67,4113.62,4158.77
62
+ 5/16/2023,4127.95,4135.54,4109.86,4109.90
63
+ 5/15/2023,4126.65,4141.25,4110.27,4136.28
64
+ 5/12/2023,4138.54,4143.74,4099.12,4124.08
65
+ 5/11/2023,4132.24,4132.80,4109.29,4130.62
66
+ 5/10/2023,4143.74,4154.28,4098.92,4137.64
67
+ 5/9/2023,4124.25,4130.35,4116.65,4119.17
68
+ 5/8/2023,4136.98,4142.30,4123.81,4138.12
69
+ 5/5/2023,4084.73,4147.02,4084.73,4136.25
70
+ 5/4/2023,4082.55,4082.61,4048.28,4061.22
71
+ 5/3/2023,4122.25,4148.30,4088.86,4090.75
72
+ 5/2/2023,4164.10,4164.10,4089.72,4119.58
73
+ 5/1/2023,4166.79,4186.92,4164.12,4167.87
74
+ 4/28/2023,4129.63,4170.06,4127.18,4169.48
75
+ 4/27/2023,4075.29,4138.24,4075.29,4135.35
76
+ 4/26/2023,4087.78,4089.67,4049.35,4055.99
77
+ 4/25/2023,4126.43,4126.43,4071.38,4071.63
78
+ 4/24/2023,4132.07,4142.41,4117.77,4137.04
79
+ 4/21/2023,4132.14,4138.02,4113.86,4133.52
80
+ 4/20/2023,4130.48,4148.57,4114.57,4129.79
81
+ 4/19/2023,4139.33,4162.57,4134.49,4154.52
82
+ 4/18/2023,4164.26,4169.48,4140.36,4154.87
83
+ 4/17/2023,4137.17,4151.72,4123.18,4151.32
84
+ 4/14/2023,4140.11,4163.19,4113.20,4137.64
85
+ 4/13/2023,4100.04,4150.26,4099.40,4146.22
86
+ 4/12/2023,4121.72,4134.37,4086.94,4091.95
87
+ 4/11/2023,4110.29,4124.26,4102.61,4108.94
88
+ 4/10/2023,4085.20,4109.50,4072.55,4109.11
89
+ 4/6/2023,4081.58,4107.32,4069.84,4105.02
90
+ 4/5/2023,4094.50,4099.69,4072.56,4090.38
91
+ 4/4/2023,4128.03,4133.13,4086.87,4100.60
92
+ 4/3/2023,4102.20,4127.66,4098.79,4124.51
93
+ 3/31/2023,4056.18,4110.75,4056.18,4109.31
94
+ 3/30/2023,4046.74,4057.85,4032.10,4050.83
95
+ 3/29/2023,3999.53,4030.59,3999.53,4027.81
96
+ 3/28/2023,3974.13,3979.20,3951.53,3971.27
97
+ 3/27/2023,3982.93,4003.83,3970.49,3977.53
98
+ 3/24/2023,3939.21,3972.74,3909.16,3970.99
99
+ 3/23/2023,3959.21,4007.66,3919.05,3948.72
100
+ 3/22/2023,4002.04,4039.49,3936.17,3936.97
101
+ 3/21/2023,3975.89,4009.08,3971.19,4002.87
102
+ 3/20/2023,3917.47,3956.62,3916.89,3951.57
103
+ 3/17/2023,3958.69,3958.91,3901.27,3916.64
104
+ 3/16/2023,3878.93,3964.46,3864.11,3960.28
105
+ 3/15/2023,3876.74,3894.26,3838.24,3891.93
106
+ 3/14/2023,3894.01,3937.29,3873.63,3919.29
107
+ 3/13/2023,3835.12,3905.05,3808.86,3855.76
108
+ 3/10/2023,3912.77,3934.05,3846.32,3861.59
109
+ 3/9/2023,3998.66,4017.81,3908.70,3918.32
110
+ 3/8/2023,3987.55,4000.41,3969.76,3992.01
111
+ 3/7/2023,4048.26,4050.00,3980.31,3986.37
112
+ 3/6/2023,4055.15,4078.49,4044.61,4048.42
113
+ 3/3/2023,3998.02,4048.29,3995.17,4045.64
114
+ 3/2/2023,3938.68,3990.84,3928.16,3981.35
115
+ 3/1/2023,3963.34,3971.73,3939.05,3951.39
116
+ 2/28/2023,3977.19,3997.50,3968.98,3970.15
117
+ 2/27/2023,3992.36,4018.05,3973.55,3982.24
118
+ 2/24/2023,3973.24,3978.25,3943.08,3970.04
119
+ 2/23/2023,4018.60,4028.30,3969.19,4012.32
120
+ 2/22/2023,4001.83,4017.37,3976.90,3991.05
121
+ 2/21/2023,4052.35,4052.35,3995.19,3997.34
122
+ 2/17/2023,4077.73,4081.51,4047.95,4079.09
123
+ 2/16/2023,4114.75,4136.54,4089.49,4090.41
124
+ 2/15/2023,4119.50,4148.11,4103.98,4147.60
125
+ 2/14/2023,4126.70,4159.77,4095.01,4136.13
126
+ 2/13/2023,4096.62,4138.90,4092.67,4137.29
127
+ 2/10/2023,4068.92,4094.36,4060.79,4090.46
128
+ 2/9/2023,4144.25,4156.23,4069.67,4081.50
129
+ 2/8/2023,4153.47,4156.85,4111.67,4117.86
130
+ 2/7/2023,4105.35,4176.54,4088.39,4164.00
131
+ 2/6/2023,4119.57,4124.63,4093.38,4111.08
132
+ 2/3/2023,4136.69,4182.36,4123.36,4136.48
133
+ 2/2/2023,4158.68,4195.44,4141.88,4179.76
134
+ 2/1/2023,4070.07,4148.95,4037.20,4119.21
135
+ 1/31/2023,4020.85,4077.16,4020.44,4076.60
136
+ 1/30/2023,4049.27,4063.85,4015.55,4017.77
137
+ 1/27/2023,4053.72,4094.21,4048.70,4070.56
138
+ 1/26/2023,4036.08,4061.57,4013.29,4060.43
139
+ 1/25/2023,3982.71,4019.55,3949.06,4016.22
140
+ 1/24/2023,4001.74,4023.92,3989.79,4016.95
141
+ 1/23/2023,3978.14,4039.31,3971.64,4019.81
142
+ 1/20/2023,3909.04,3972.96,3897.86,3972.61
143
+ 1/19/2023,3911.84,3922.94,3885.54,3898.85
144
+ 1/18/2023,4002.25,4014.16,3926.59,3928.86
145
+ 1/17/2023,3999.28,4015.39,3984.57,3990.97
146
+ 1/13/2023,3960.32,4003.95,3947.67,3999.09
147
+ 1/12/2023,3977.57,3997.76,3937.56,3983.17
148
+ 1/11/2023,3932.35,3970.07,3928.54,3969.61
149
+ 1/10/2023,3888.57,3919.83,3877.29,3919.25
150
+ 1/9/2023,3910.82,3950.57,3890.42,3892.09
151
+ 1/6/2023,3823.37,3906.19,3809.56,3895.08
152
+ 1/5/2023,3839.74,3839.74,3802.42,3808.10
153
+ 1/4/2023,3840.36,3873.16,3815.77,3852.97
154
+ 1/3/2023,3853.29,3878.46,3794.33,3824.14
155
+ 12/30/2022,3828.85,3839.85,3800.34,3839.50
156
+ 12/29/2022,3805.45,3858.19,3805.45,3849.28
157
+ 12/28/2022,3829.56,3848.32,3780.78,3783.22
158
+ 12/27/2022,3843.34,3846.65,3813.22,3829.25
159
+ 12/23/2022,3813.11,3845.80,3797.01,3844.82
160
+ 12/22/2022,3853.26,3853.26,3764.49,3822.39
161
+ 12/21/2022,3839.49,3889.82,3839.49,3878.44
162
+ 12/20/2022,3810.47,3838.24,3795.62,3821.62
163
+ 12/19/2022,3853.79,3854.86,3800.04,3817.66
164
+ 12/16/2022,3890.91,3890.91,3827.91,3852.36
165
+ 12/15/2022,3958.37,3958.37,3879.45,3895.75
166
+ 12/14/2022,4015.54,4053.76,3965.65,3995.32
167
+ 12/13/2022,4069.38,4100.96,3993.03,4019.65
168
+ 12/12/2022,3939.29,3990.71,3935.30,3990.56
169
+ 12/9/2022,3954.17,3977.02,3933.04,3934.38
170
+ 12/8/2022,3947.79,3974.19,3935.83,3963.51
171
+ 12/7/2022,3933.28,3957.57,3922.68,3933.92
172
+ 12/6/2022,3996.63,4001.51,3918.39,3941.26
173
+ 12/5/2022,4052.02,4052.45,3984.49,3998.84
174
+ 12/2/2022,4040.17,4080.48,4026.63,4071.70
175
+ 12/1/2022,4087.14,4100.51,4050.87,4076.57
176
+ 11/30/2022,3957.18,4080.11,3938.58,4080.11
177
+ 11/29/2022,3964.19,3976.77,3937.65,3957.63
178
+ 11/28/2022,4005.36,4012.27,3955.77,3963.94
179
+ 11/25/2022,4023.34,4034.02,4020.76,4026.12
180
+ 11/23/2022,4001.02,4033.78,3998.66,4027.26
181
+ 11/22/2022,3965.51,4005.88,3956.88,4003.58
182
+ 11/21/2022,3956.23,3962.00,3933.34,3949.94
183
+ 11/18/2022,3966.39,3979.89,3935.98,3965.34
184
+ 11/17/2022,3919.26,3954.33,3906.54,3946.56
185
+ 11/16/2022,3976.82,3983.09,3954.34,3958.79
186
+ 11/15/2022,4006.41,4028.84,3953.17,3991.73
187
+ 11/14/2022,3977.97,4008.97,3956.40,3957.25
188
+ 11/11/2022,3963.72,4001.48,3944.82,3992.93
189
+ 11/10/2022,3859.89,3958.33,3859.89,3956.37
190
+ 11/9/2022,3810.94,3818.20,3744.22,3748.57
191
+ 11/8/2022,3817.02,3859.40,3786.28,3828.11
192
+ 11/7/2022,3780.71,3813.95,3764.70,3806.80
193
+ 11/4/2022,3766.98,3796.34,3708.84,3770.55
194
+ 11/3/2022,3733.25,3750.59,3698.15,3719.89
195
+ 11/2/2022,3852.90,3894.44,3758.68,3759.69
196
+ 11/1/2022,3901.79,3911.79,3843.80,3856.10
197
+ 10/31/2022,3881.85,3893.73,3863.18,3871.98
198
+ 10/28/2022,3808.26,3905.42,3808.26,3901.06
199
+ 10/27/2022,3834.69,3859.95,3803.79,3807.30
200
+ 10/26/2022,3825.97,3886.15,3824.07,3830.60
201
+ 10/25/2022,3799.44,3862.85,3799.44,3859.11
202
+ 10/24/2022,3762.01,3810.74,3741.65,3797.34
203
+ 10/21/2022,3657.10,3757.89,3647.42,3752.75
204
+ 10/20/2022,3689.05,3736.00,3656.44,3665.78
205
+ 10/19/2022,3703.11,3728.58,3666.51,3695.16
206
+ 10/18/2022,3746.26,3762.79,3686.53,3719.98
207
+ 10/17/2022,3638.65,3689.73,3638.65,3677.95
208
+ 10/14/2022,3690.41,3712.00,3579.68,3583.07
209
+ 10/13/2022,3520.37,3685.41,3491.58,3669.91
210
+ 10/12/2022,3590.83,3608.34,3573.86,3577.03
211
+ 10/11/2022,3595.86,3640.66,3568.45,3588.84
212
+ 10/10/2022,3647.51,3652.17,3588.10,3612.39
213
+ 10/7/2022,3706.74,3706.74,3620.73,3639.66
214
+ 10/6/2022,3771.97,3797.93,3739.22,3744.52
215
+ 10/5/2022,3753.25,3806.91,3722.66,3783.28
216
+ 10/4/2022,3726.46,3791.92,3726.46,3790.93
217
+ 10/3/2022,3609.78,3698.35,3604.93,3678.43
218
+ 9/30/2022,3633.48,3671.44,3584.13,3585.62
219
+ 9/29/2022,3687.01,3687.01,3610.40,3640.47
220
+ 9/28/2022,3651.94,3736.74,3640.61,3719.04
221
+ 9/27/2022,3686.44,3717.53,3623.29,3647.29
222
+ 9/26/2022,3682.72,3715.67,3644.76,3655.04
223
+ 9/23/2022,3727.14,3727.14,3647.47,3693.23
224
+ 9/22/2022,3782.36,3790.90,3749.45,3757.99
225
+ 9/21/2022,3871.40,3907.07,3789.49,3789.93
226
+ 9/20/2022,3875.23,3876.01,3827.54,3855.93
227
+ 9/19/2022,3849.91,3900.45,3838.50,3899.89
228
+ 9/16/2022,3880.95,3880.95,3837.08,3873.33
229
+ 9/15/2022,3932.41,3959.14,3888.28,3901.35
230
+ 9/14/2022,3940.73,3961.94,3912.18,3946.01
231
+ 9/13/2022,4037.12,4037.12,3921.28,3932.69
232
+ 9/12/2022,4083.67,4119.28,4083.67,4110.41
233
+ 9/9/2022,4022.94,4076.81,4022.94,4067.36
234
+ 9/8/2022,3959.94,4010.50,3944.81,4006.18
235
+ 9/7/2022,3909.43,3987.89,3906.03,3979.87
236
+ 9/6/2022,3930.89,3942.55,3886.75,3908.19
237
+ 9/2/2022,3996.39,4018.43,3906.21,3924.26
238
+ 9/1/2022,3936.73,3970.23,3903.65,3966.85
239
+ 8/31/2022,4000.67,4015.37,3954.53,3955.00
240
+ 8/30/2022,4041.25,4044.98,3965.21,3986.16
241
+ 8/29/2022,4034.58,4062.99,4017.42,4030.61
242
+ 8/26/2022,4198.74,4203.04,4057.66,4057.66
243
+ 8/25/2022,4153.26,4200.54,4147.59,4199.12
244
+ 8/24/2022,4126.55,4156.56,4119.97,4140.77
245
+ 8/23/2022,4133.09,4159.77,4124.03,4128.73
246
+ 8/22/2022,4195.08,4195.08,4129.86,4137.99
247
+ 8/19/2022,4266.31,4266.31,4218.70,4228.48
248
+ 8/18/2022,4273.13,4292.53,4261.98,4283.74
249
+ 8/17/2022,4280.40,4302.18,4253.08,4274.04
250
+ 8/16/2022,4290.46,4325.28,4277.77,4305.20
251
+ 8/15/2022,4269.37,4301.79,4256.90,4297.14
252
+ 8/12/2022,4225.02,4280.47,4219.78,4280.15
253
+ 8/11/2022,4227.40,4257.91,4201.41,4207.27
app.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ from sklearn.preprocessing import StandardScaler
8
+ from sklearn.cluster import KMeans, DBSCAN
9
+ from sklearn.metrics import silhouette_score
10
+ from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
11
+ from sklearn.mixture import GaussianMixture
12
+ from datasets import load_dataset
13
+ import os
14
+
15
+ # Load dataset
16
+ @st.cache_data
17
+ def load_data():
18
+ dataset = load_dataset("Ci-Dave/SPY500MW_StockMarket")
19
+ df = pd.DataFrame(dataset["train"])
20
+ return df
21
+
22
+ # Preprocessing function
23
+ def preprocess_data(df):
24
+ numeric_cols = df.select_dtypes(include=[np.number]).columns # Select only numeric columns
25
+ df_numeric = df[numeric_cols].dropna() # Drop missing values
26
+ scaler = StandardScaler()
27
+ df_scaled = scaler.fit_transform(df_numeric)
28
+ return df_numeric, df_scaled, scaler
29
+
30
+ # Load dataset
31
+ df = load_data()
32
+ df_numeric, df_scaled, scaler = preprocess_data(df)
33
+
34
+ # Load trained models with error handling
35
+ def load_model(filename):
36
+ if os.path.exists(filename):
37
+ return joblib.load(filename)
38
+ else:
39
+ st.error(f"Model file {filename} not found. Make sure you uploaded the trained model.")
40
+ return None
41
+
42
+ kmeans = load_model("kmeans_stock_model.pkl")
43
+ hierarchical = load_model("hierarchical_stock_model.pkl")
44
+ dbscan = load_model("dbscan_stock_model.pkl")
45
+ gmm = load_model("gmm_stock_model.pkl")
46
+
47
+ # Sidebar navigation
48
+ st.sidebar.title("Stock Market Clustering")
49
+ page = st.sidebar.radio("Choose a model:", ["Home", "Dataset", "K-Means", "Hierarchical", "DBSCAN", "GMM"])
50
+
51
+ if page == "Home":
52
+ st.title("Stock Market Clustering Analysis")
53
+ st.write("""
54
+ This application analyzes stock market data using clustering techniques.
55
+ Explore different clustering models (K-Means, Hierarchical, DBSCAN, and GMM) to identify patterns in stock prices.
56
+
57
+ **Dataset:**
58
+ The dataset includes stock market price data with key attributes such as Open, High, Low, and Close values.
59
+
60
+ **Clustering Models:**
61
+ - **K-Means:** Groups stocks into clusters based on price similarities.
62
+ - **Hierarchical Clustering:** Forms a tree-like structure to identify stock relationships.
63
+ - **DBSCAN:** Detects core clusters and outliers in stock prices.
64
+ - **Gaussian Mixture Model (GMM):** Uses probabilistic clustering for stock analysis.
65
+
66
+ **Instructions:**
67
+ - Select a clustering model from the sidebar.
68
+ - View the clustering results, including cluster assignments, distributions, and visualizations.
69
+ """)
70
+
71
+ elif page == "Dataset":
72
+ st.title("Stock Market Dataset")
73
+
74
+ st.write("### Dataset Preview:")
75
+ st.dataframe(df[["Open", "High", "Low", "Close"]].head(), use_container_width=True) # Show only key features
76
+
77
+ st.write("### Full Dataset:")
78
+ st.dataframe(df, use_container_width=True)
79
+
80
+ st.write("### Dataset Summary:")
81
+ st.write(df_numeric.describe())
82
+
83
+ st.write("### Correlation Heatmap:")
84
+ fig, ax = plt.subplots()
85
+ sns.heatmap(df_numeric.corr(), annot=True, cmap="coolwarm", ax=ax)
86
+ st.pyplot(fig)
87
+
88
+ elif page == "K-Means":
89
+ st.title("K-Means Clustering")
90
+ if kmeans:
91
+ clusters = kmeans.predict(df_scaled)
92
+ df_numeric["KMeans Cluster"] = clusters
93
+
94
+ # Display Cluster Assignments
95
+ st.write("Cluster Assignments:")
96
+ st.dataframe(df_numeric[["KMeans Cluster"]].head())
97
+
98
+ # Display Cluster Distribution
99
+ cluster_counts = df_numeric["KMeans Cluster"].value_counts().reset_index()
100
+ cluster_counts.columns = ["Cluster", "Count"]
101
+ st.write("Cluster Distribution:")
102
+ st.dataframe(cluster_counts)
103
+
104
+ # Compute and Display Silhouette Score
105
+ silhouette = silhouette_score(df_scaled, clusters)
106
+ st.write(f"Silhouette Score: {silhouette:.4f}")
107
+
108
+ # Visualization
109
+ fig, ax = plt.subplots()
110
+ sns.scatterplot(x=df_numeric.iloc[:, 0], y=df_numeric.iloc[:, 1], hue=clusters, palette="viridis", ax=ax)
111
+ st.pyplot(fig)
112
+
113
+ elif page == "Hierarchical":
114
+ st.title("Hierarchical Clustering")
115
+
116
+ # Perform Hierarchical Clustering
117
+ linked = linkage(df_scaled, method='ward') # Create linkage matrix
118
+ clusters = fcluster(linked, 4, criterion='maxclust') # Assign cluster labels
119
+ df_numeric["Hierarchical Cluster"] = clusters # Add to dataframe
120
+
121
+ # Display Cluster Assignments
122
+ st.write("Cluster Assignments:")
123
+ st.dataframe(df_numeric[["Hierarchical Cluster"]].head())
124
+
125
+ # Display Cluster Distribution
126
+ cluster_counts = df_numeric["Hierarchical Cluster"].value_counts().reset_index()
127
+ cluster_counts.columns = ["Cluster", "Count"]
128
+ st.write("Cluster Distribution:")
129
+ st.dataframe(cluster_counts)
130
+
131
+ # Dendrogram Visualization
132
+ fig, ax = plt.subplots(figsize=(10, 5))
133
+ dendrogram(linked, truncate_mode='level', p=5, ax=ax) # Plot dendrogram
134
+ st.pyplot(fig)
135
+
136
+ # Scatter Plot of First Two Features
137
+ fig, ax = plt.subplots()
138
+ sns.scatterplot(x=df_numeric.iloc[:, 0], y=df_numeric.iloc[:, 1], hue=clusters, palette="viridis", ax=ax)
139
+ st.pyplot(fig)
140
+
141
+ elif page == "DBSCAN":
142
+ st.title("DBSCAN Clustering")
143
+ if dbscan:
144
+ clusters = dbscan.fit_predict(df_scaled)
145
+ df_numeric["DBSCAN Cluster"] = clusters
146
+
147
+ # Display Cluster Assignments
148
+ st.write("Cluster Assignments:")
149
+ st.dataframe(df_numeric[["DBSCAN Cluster"]].head())
150
+
151
+ # Display Cluster Distribution (Handle outliers)
152
+ cluster_counts = df_numeric["DBSCAN Cluster"].value_counts().reset_index()
153
+ cluster_counts.columns = ["Cluster", "Count"]
154
+ st.write("Cluster Distribution:")
155
+ st.dataframe(cluster_counts)
156
+
157
+ # Visualization: Different color for outliers
158
+ fig, ax = plt.subplots()
159
+ palette = {i: sns.color_palette("viridis", as_cmap=True)(i / 4) for i in set(clusters) if i != -1}
160
+ palette[-1] = "red" # Mark outliers in red
161
+ sns.scatterplot(x=df_numeric.iloc[:, 0], y=df_numeric.iloc[:, 1], hue=clusters, palette=palette, ax=ax)
162
+ st.pyplot(fig)
163
+
164
+ elif page == "GMM":
165
+ st.title("Gaussian Mixture Model (GMM)")
166
+ if gmm:
167
+ expected_features = gmm.means_.shape[1] # Get the number of features used during training
168
+ df_gmm = df_scaled[:, :expected_features] # Keep only the needed columns
169
+ clusters = gmm.predict(df_gmm)
170
+ df_numeric["GMM Cluster"] = clusters
171
+
172
+ # Display Cluster Assignments
173
+ st.write("Cluster Assignments:")
174
+ st.dataframe(df_numeric[["GMM Cluster"]].head())
175
+
176
+ # Display Cluster Distribution
177
+ cluster_counts = df_numeric["GMM Cluster"].value_counts().reset_index()
178
+ cluster_counts.columns = ["Cluster", "Count"]
179
+ st.write("Cluster Distribution:")
180
+ st.dataframe(cluster_counts)
181
+
182
+ # Compute and Display Silhouette Score
183
+ silhouette = silhouette_score(df_gmm, clusters)
184
+ st.write(f"Silhouette Score: {silhouette:.4f}")
185
+
186
+ # Visualization
187
+ fig, ax = plt.subplots()
188
+ sns.scatterplot(x=df_numeric.iloc[:, 0], y=df_numeric.iloc[:, 1], hue=clusters, palette="viridis", ax=ax)
189
+ st.pyplot(fig)
dbscan_stock_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d79f445d36381918a010805245cac0d13111b8f04535dfebb853c5eab582a4e
3
+ size 12727
gmm_stock_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c59cef6dd1f677d5fdc08f6a8553b90b0f8cb367148b367b5ebc7ec246d0e91e
3
+ size 1742
hierarchical_stock_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c38beacb44e402b74041b7770da954aee34e6197f70e540e20741d4a29df967
3
+ size 8033
kmeans_stock_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00564ad761caee0d83ad3d3b8929964615122ae323e07b7c821cbbbea054c331
3
+ size 1831
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ joblib
3
+ pandas
4
+ matplotlib
5
+ seaborn
6
+ scikit-learn
7
+ datasets
stock_market_clustering.ipynb ADDED
The diff for this file is too large to render. See raw diff