bong9513 commited on
Commit
b53e714
ยท
verified ยท
1 Parent(s): b051a08

Upload 8 files

Browse files
Analysis_code/find_reason/ busan_trend.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Analysis_code/find_reason/ daegu_trend.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Analysis_code/find_reason/ gwangju_trend.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Analysis_code/find_reason/ incheon_trend.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Analysis_code/find_reason/ seoul_trend.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Analysis_code/find_reason/daejeon_trend.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Analysis_code/find_reason/make_trend_plot.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Analysis_code/find_reason/wasserstein_distance.ipynb ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 6,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# ๋ถ„์„์— ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์ž„ํฌํŠธ\n",
10
+ "import warnings\n",
11
+ "warnings.filterwarnings('ignore')\n",
12
+ "import pandas as pd\n",
13
+ "import numpy as np\n",
14
+ "import matplotlib.pyplot as plt\n",
15
+ "import seaborn as sns\n",
16
+ "from scipy import stats\n",
17
+ "from scipy.spatial import distance\n",
18
+ "from scipy.stats import wasserstein_distance, entropy, ks_2samp\n",
19
+ "from sklearn.manifold import TSNE\n",
20
+ "from sklearn.preprocessing import StandardScaler\n",
21
+ "from sklearn.ensemble import RandomForestRegressor\n",
22
+ "from sklearn.ensemble import RandomForestClassifier # Added\n",
23
+ "from sklearn.model_selection import train_test_split # Added\n",
24
+ "from sklearn.metrics import roc_auc_score # Added\n",
25
+ "from statsmodels.distributions.empirical_distribution import ECDF # Added\n",
26
+ "import ot\n",
27
+ "\n",
28
+ "\n",
29
+ "# ํ•œ๊ธ€ ํฐํŠธ ์„ค์ •\n",
30
+ "plt.rcParams['font.family'] = 'NanumGothic'\n",
31
+ "plt.rcParams['axes.unicode_minus'] = False"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 7,
37
+ "metadata": {},
38
+ "outputs": [],
39
+ "source": [
40
+ "seoul = pd.read_feather(\"../../data/data_for_modeling/df_seoul.feather\")\n",
41
+ "seoul= seoul[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]\n",
42
+ "\n",
43
+ "busan = pd.read_feather(\"../../data/data_for_modeling/df_busan.feather\")\n",
44
+ "busan= busan[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]\n",
45
+ "\n",
46
+ "incheon = pd.read_feather(\"../../data/data_for_modeling/df_incheon.feather\")\n",
47
+ "incheon= incheon[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]\n",
48
+ "\n",
49
+ "daegu = pd.read_feather(\"../../data/data_for_modeling/df_daegu.feather\")\n",
50
+ "daegu= daegu[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]\n",
51
+ "\n",
52
+ "daejeon = pd.read_feather(\"../../data/data_for_modeling/df_daejeon.feather\")\n",
53
+ "daejeon= daejeon[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]\n",
54
+ "\n",
55
+ "gwangju = pd.read_feather(\"../../data/data_for_modeling/df_gwangju.feather\")\n",
56
+ "gwangju= gwangju[['datetime','hm','PM10','PM25','year','month','hour','multi_class']]"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": 8,
62
+ "metadata": {},
63
+ "outputs": [
64
+ {
65
+ "name": "stdout",
66
+ "output_type": "stream",
67
+ "text": [
68
+ "[0.5920662 0.92351786]\n",
69
+ "[0.60414398 0.9190468 ]\n",
70
+ "[0.60250035 0.9391276 ]\n",
71
+ "[0.60112832 0.92493121]\n",
72
+ "[0.58469137 0.90476229]\n",
73
+ "[0.617718 0.93503164]\n"
74
+ ]
75
+ }
76
+ ],
77
+ "source": [
78
+ "from sklearn.decomposition import PCA\n",
79
+ "\n",
80
+ "# ํŠน์„ฑ ์„ ํƒ (์˜ˆ: PM10, PM25, hm ๋“ฑ)\n",
81
+ "features = ['PM10','PM25', 'hm']\n",
82
+ "# ์Šค์ผ€์ผ๋ง\n",
83
+ "scaler = StandardScaler()\n",
84
+ "scaled_features = scaler.fit_transform(seoul[features])\n",
85
+ "pca = PCA(n_components=2)\n",
86
+ "pca.fit(scaled_features)\n",
87
+ "print(pca.explained_variance_ratio_.cumsum())\n",
88
+ "seoul_pca = pca.transform(scaled_features)\n",
89
+ "seoul.drop(columns=['PM25', 'hm'], inplace=True)\n",
90
+ "seoul[['pca_x', 'pca_y']] = seoul_pca\n",
91
+ "\n",
92
+ "\n",
93
+ "scaled_features = scaler.fit_transform(busan[features])\n",
94
+ "pca = PCA(n_components=2)\n",
95
+ "pca.fit(scaled_features)\n",
96
+ "print(pca.explained_variance_ratio_.cumsum())\n",
97
+ "busan_pca = pca.transform(scaled_features)\n",
98
+ "busan.drop(columns=['PM25', 'hm'], inplace=True)\n",
99
+ "busan[['pca_x', 'pca_y']] = busan_pca\n",
100
+ "\n",
101
+ "scaled_features = scaler.fit_transform(incheon[features]) \n",
102
+ "pca = PCA(n_components=2)\n",
103
+ "pca.fit(scaled_features)\n",
104
+ "print(pca.explained_variance_ratio_.cumsum())\n",
105
+ "incheon_pca = pca.transform(scaled_features)\n",
106
+ "incheon.drop(columns=['PM25', 'hm'], inplace=True)\n",
107
+ "incheon[['pca_x', 'pca_y']] = incheon_pca\n",
108
+ "\n",
109
+ "scaled_features = scaler.fit_transform(daegu[features])\n",
110
+ "pca = PCA(n_components=2)\n",
111
+ "pca.fit(scaled_features)\n",
112
+ "print(pca.explained_variance_ratio_.cumsum())\n",
113
+ "daegu_pca = pca.transform(scaled_features)\n",
114
+ "daegu.drop(columns=['PM25', 'hm'], inplace=True)\n",
115
+ "daegu[['pca_x', 'pca_y']] = daegu_pca\n",
116
+ "\n",
117
+ "scaled_features = scaler.fit_transform(daejeon[features])\n",
118
+ "pca = PCA(n_components=2)\n",
119
+ "pca.fit(scaled_features)\n",
120
+ "print(pca.explained_variance_ratio_.cumsum())\n",
121
+ "daejeon_pca = pca.transform(scaled_features)\n",
122
+ "daejeon.drop(columns=['PM25', 'hm'], inplace=True)\n",
123
+ "daejeon[['pca_x', 'pca_y']] = daejeon_pca\n",
124
+ "\n",
125
+ "scaled_features = scaler.fit_transform(gwangju[features])\n",
126
+ "pca = PCA(n_components=2)\n",
127
+ "pca.fit(scaled_features)\n",
128
+ "print(pca.explained_variance_ratio_.cumsum())\n",
129
+ "gwangju_pca = pca.transform(scaled_features)\n",
130
+ "gwangju.drop(columns=['PM25', 'hm'], inplace=True)\n",
131
+ "gwangju[['pca_x', 'pca_y']] = gwangju_pca\n"
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "execution_count": 31,
137
+ "metadata": {},
138
+ "outputs": [],
139
+ "source": [
140
+ "seoul_2018 = seoul[seoul['year'] == 2018]\n",
141
+ "seoul_2019 = seoul[seoul['year'] == 2019]\n",
142
+ "seoul_2020 = seoul[seoul['year'] == 2020]\n",
143
+ "seoul_2021 = seoul[seoul['year'] == 2021]\n",
144
+ "years = [2018, 2019, 2020, 2021]\n",
145
+ "\n",
146
+ "\n",
147
+ "busan_2018 = busan[busan['year'] == 2018]\n",
148
+ "busan_2019 = busan[busan['year'] == 2019]\n",
149
+ "busan_2020 = busan[busan['year'] == 2020]\n",
150
+ "busan_2021 = busan[busan['year'] == 2021]\n",
151
+ "years = [2018, 2019, 2020, 2021]\n",
152
+ "\n",
153
+ "\n",
154
+ "incheon_2018 = incheon[incheon['year'] == 2018]\n",
155
+ "incheon_2019 = incheon[incheon['year'] == 2019]\n",
156
+ "incheon_2020 = incheon[incheon['year'] == 2020]\n",
157
+ "incheon_2021 = incheon[incheon['year'] == 2021]\n",
158
+ "years = [2018, 2019, 2020, 2021]\n",
159
+ "\n",
160
+ "\n",
161
+ "daegu_2018 = daegu[daegu['year'] == 2018]\n",
162
+ "daegu_2019 = daegu[daegu['year'] == 2019]\n",
163
+ "daegu_2020 = daegu[daegu['year'] == 2020]\n",
164
+ "daegu_2021 = daegu[daegu['year'] == 2021]\n",
165
+ "years = [2018, 2019, 2020, 2021]\n",
166
+ "\n",
167
+ "\n",
168
+ "daejeon_2018 = daejeon[daejeon['year'] == 2018]\n",
169
+ "daejeon_2019 = daejeon[daejeon['year'] == 2019]\n",
170
+ "daejeon_2020 = daejeon[daejeon['year'] == 2020]\n",
171
+ "daejeon_2021 = daejeon[daejeon['year'] == 2021]\n",
172
+ "years = [2018, 2019, 2020, 2021]\n",
173
+ "\n",
174
+ "\n",
175
+ "gwangju_2018 = gwangju[gwangju['year'] == 2018]\n",
176
+ "gwangju_2019 = gwangju[gwangju['year'] == 2019]\n",
177
+ "gwangju_2020 = gwangju[gwangju['year'] == 2020]\n",
178
+ "gwangju_2021 = gwangju[gwangju['year'] == 2021]\n",
179
+ "years = [2018, 2019, 2020, 2021]\n",
180
+ "\n",
181
+ "\n",
182
+ "\n"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "code",
187
+ "execution_count": 33,
188
+ "metadata": {},
189
+ "outputs": [
190
+ {
191
+ "name": "stdout",
192
+ "output_type": "stream",
193
+ "text": [
194
+ " 2018 2019 2020 2021\n",
195
+ "2018 0.0 0.130217 0.063132 1.081307\n",
196
+ "2019 0.130217 0.0 0.059051 0.830648\n",
197
+ "2020 0.063132 0.059051 0.0 0.039927\n",
198
+ "2021 1.081307 0.830648 0.039927 0.0\n"
199
+ ]
200
+ }
201
+ ],
202
+ "source": [
203
+ "# ์—ฐ๋„๋ณ„ ๋ฐ์ดํ„ฐ ์ค€๋น„\n",
204
+ "years = [2018, 2019, 2020, 2021]\n",
205
+ "data_dict = {\n",
206
+ " 2018: seoul_2018[['pca_x', 'pca_y']].values,\n",
207
+ " 2019: seoul_2019[['pca_x', 'pca_y']].values,\n",
208
+ " 2020: seoul_2020[['pca_x', 'pca_y']].values,\n",
209
+ " 2021: seoul_2021[['pca_x', 'pca_y']].values\n",
210
+ "}\n",
211
+ "\n",
212
+ "\n",
213
+ "# ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅํ•  ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ\n",
214
+ "result_df = pd.DataFrame(index=years, columns=years)\n",
215
+ "\n",
216
+ "for i, year1 in enumerate(years):\n",
217
+ " for j, year2 in enumerate(years):\n",
218
+ " if year1 == year2:\n",
219
+ " result_df.iloc[i, j] = 0.0\n",
220
+ " if j < i:\n",
221
+ " # ์ด๋ฏธ ๊ณ„์‚ฐ๋œ ๊ฐ’ ์‚ฌ์šฉ\n",
222
+ " result_df.iloc[i, j] = result_df.iloc[j, i]\n",
223
+ " else:\n",
224
+ " X = data_dict[year1]\n",
225
+ " Y = data_dict[year2]\n",
226
+ " a = np.ones(len(X)) / len(X)\n",
227
+ " b = np.ones(len(Y)) / len(Y)\n",
228
+ " W = ot.emd2(a, b, ot.dist(X, Y))\n",
229
+ " result_df.iloc[i, j] = W\n",
230
+ " result_df.iloc[j, i] = W # ๋Œ€์นญ ์œ„์น˜์— ๋™์ผ ๊ฐ’ ์ €์žฅ\n",
231
+ "\n",
232
+ "# ๊ฒฐ๊ณผ ์ถœ๋ ฅ\n",
233
+ "print(result_df)"
234
+ ]
235
+ },
236
+ {
237
+ "cell_type": "code",
238
+ "execution_count": 23,
239
+ "metadata": {},
240
+ "outputs": [
241
+ {
242
+ "name": "stdout",
243
+ "output_type": "stream",
244
+ "text": [
245
+ " 2018 2019 2020 2021\n",
246
+ "2018 0.0 0.116261 0.10445 1.424479\n",
247
+ "2019 0.116261 0.0 0.09933 1.164067\n",
248
+ "2020 0.10445 0.09933 0.0 1.075336\n",
249
+ "2021 1.424479 1.164067 1.075336 0.0\n"
250
+ ]
251
+ }
252
+ ],
253
+ "source": [
254
+ "# ์—ฐ๋„๋ณ„ ๋ฐ์ดํ„ฐ ์ค€๋น„\n",
255
+ "years = [2018, 2019, 2020, 2021]\n",
256
+ "data_dict = {\n",
257
+ " 2018: busan_2018[['pca_x', 'pca_y']].values,\n",
258
+ " 2019: busan_2019[['pca_x', 'pca_y']].values,\n",
259
+ " 2020: busan_2020[['pca_x', 'pca_y']].values,\n",
260
+ " 2021: busan_2021[['pca_x', 'pca_y']].values\n",
261
+ "}\n",
262
+ "\n",
263
+ "\n",
264
+ "# ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅํ•  ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ\n",
265
+ "result_df = pd.DataFrame(index=years, columns=years)\n",
266
+ "\n",
267
+ "for i, year1 in enumerate(years):\n",
268
+ " for j, year2 in enumerate(years):\n",
269
+ " if year1 == year2:\n",
270
+ " result_df.iloc[i, j] = 0.0\n",
271
+ " if j < i:\n",
272
+ " # ์ด๋ฏธ ๊ณ„์‚ฐ๋œ ๊ฐ’ ์‚ฌ์šฉ\n",
273
+ " result_df.iloc[i, j] = result_df.iloc[j, i]\n",
274
+ " else:\n",
275
+ " X = data_dict[year1]\n",
276
+ " Y = data_dict[year2]\n",
277
+ " a = np.ones(len(X)) / len(X)\n",
278
+ " b = np.ones(len(Y)) / len(Y)\n",
279
+ " W = ot.emd2(a, b, ot.dist(X, Y))\n",
280
+ " result_df.iloc[i, j] = W\n",
281
+ " result_df.iloc[j, i] = W # ๋Œ€์นญ ์œ„์น˜์— ๋™์ผ ๊ฐ’ ์ €์žฅ\n",
282
+ "\n",
283
+ "# ๊ฒฐ๊ณผ ์ถœ๋ ฅ\n",
284
+ "print(result_df)"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": 24,
290
+ "metadata": {},
291
+ "outputs": [
292
+ {
293
+ "name": "stdout",
294
+ "output_type": "stream",
295
+ "text": [
296
+ " 2018 2019 2020 2021\n",
297
+ "2018 0.0 0.080291 0.074071 0.449094\n",
298
+ "2019 0.080291 0.0 0.060171 0.384189\n",
299
+ "2020 0.074071 0.060171 0.0 0.04047\n",
300
+ "2021 0.449094 0.384189 0.04047 0.0\n"
301
+ ]
302
+ }
303
+ ],
304
+ "source": [
305
+ "# ์—ฐ๋„๋ณ„ ๋ฐ์ดํ„ฐ ์ค€๋น„\n",
306
+ "years = [2018, 2019, 2020, 2021]\n",
307
+ "data_dict = {\n",
308
+ " 2018: incheon_2018[['pca_x', 'pca_y']].values,\n",
309
+ " 2019: incheon_2019[['pca_x', 'pca_y']].values,\n",
310
+ " 2020: incheon_2020[['pca_x', 'pca_y']].values,\n",
311
+ " 2021: incheon_2021[['pca_x', 'pca_y']].values\n",
312
+ "}\n",
313
+ "\n",
314
+ "\n",
315
+ "# ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅํ•  ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ\n",
316
+ "result_df = pd.DataFrame(index=years, columns=years)\n",
317
+ "\n",
318
+ "for i, year1 in enumerate(years):\n",
319
+ " for j, year2 in enumerate(years):\n",
320
+ " if year1 == year2:\n",
321
+ " result_df.iloc[i, j] = 0.0\n",
322
+ " if j < i:\n",
323
+ " # ์ด๋ฏธ ๊ณ„์‚ฐ๋œ ๊ฐ’ ์‚ฌ์šฉ\n",
324
+ " result_df.iloc[i, j] = result_df.iloc[j, i]\n",
325
+ " else:\n",
326
+ " X = data_dict[year1]\n",
327
+ " Y = data_dict[year2]\n",
328
+ " a = np.ones(len(X)) / len(X)\n",
329
+ " b = np.ones(len(Y)) / len(Y)\n",
330
+ " W = ot.emd2(a, b, ot.dist(X, Y))\n",
331
+ " result_df.iloc[i, j] = W\n",
332
+ " result_df.iloc[j, i] = W # ๋Œ€์นญ ์œ„์น˜์— ๋™์ผ ๊ฐ’ ์ €์žฅ\n",
333
+ "\n",
334
+ "# ๊ฒฐ๊ณผ ์ถœ๋ ฅ\n",
335
+ "print(result_df)"
336
+ ]
337
+ },
338
+ {
339
+ "cell_type": "code",
340
+ "execution_count": 25,
341
+ "metadata": {},
342
+ "outputs": [
343
+ {
344
+ "name": "stdout",
345
+ "output_type": "stream",
346
+ "text": [
347
+ " 2018 2019 2020 2021\n",
348
+ "2018 0.0 0.127512 0.112157 0.731476\n",
349
+ "2019 0.127512 0.0 0.094651 0.647071\n",
350
+ "2020 0.112157 0.094651 0.0 0.041217\n",
351
+ "2021 0.731476 0.647071 0.041217 0.0\n"
352
+ ]
353
+ }
354
+ ],
355
+ "source": [
356
+ "# ์—ฐ๋„๋ณ„ ๋ฐ์ดํ„ฐ ์ค€๋น„\n",
357
+ "years = [2018, 2019, 2020, 2021]\n",
358
+ "data_dict = {\n",
359
+ " 2018: daegu_2018[['pca_x', 'pca_y']].values,\n",
360
+ " 2019: daegu_2019[['pca_x', 'pca_y']].values,\n",
361
+ " 2020: daegu_2020[['pca_x', 'pca_y']].values,\n",
362
+ " 2021: daegu_2021[['pca_x', 'pca_y']].values\n",
363
+ "}\n",
364
+ "\n",
365
+ "\n",
366
+ "# ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅํ•  ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ\n",
367
+ "result_df = pd.DataFrame(index=years, columns=years)\n",
368
+ "\n",
369
+ "for i, year1 in enumerate(years):\n",
370
+ " for j, year2 in enumerate(years):\n",
371
+ " if year1 == year2:\n",
372
+ " result_df.iloc[i, j] = 0.0\n",
373
+ " if j < i:\n",
374
+ " # ์ด๋ฏธ ๊ณ„์‚ฐ๋œ ๊ฐ’ ์‚ฌ์šฉ\n",
375
+ " result_df.iloc[i, j] = result_df.iloc[j, i]\n",
376
+ " else:\n",
377
+ " X = data_dict[year1]\n",
378
+ " Y = data_dict[year2]\n",
379
+ " a = np.ones(len(X)) / len(X)\n",
380
+ " b = np.ones(len(Y)) / len(Y)\n",
381
+ " W = ot.emd2(a, b, ot.dist(X, Y))\n",
382
+ " result_df.iloc[i, j] = W\n",
383
+ " result_df.iloc[j, i] = W # ๋Œ€์นญ ์œ„์น˜์— ๋™์ผ ๊ฐ’ ์ €์žฅ\n",
384
+ "\n",
385
+ "# ๊ฒฐ๊ณผ ์ถœ๋ ฅ\n",
386
+ "print(result_df)"
387
+ ]
388
+ },
389
+ {
390
+ "cell_type": "code",
391
+ "execution_count": 26,
392
+ "metadata": {},
393
+ "outputs": [
394
+ {
395
+ "name": "stdout",
396
+ "output_type": "stream",
397
+ "text": [
398
+ " 2018 2019 2020 2021\n",
399
+ "2018 0.0 0.273013 0.053969 0.877338\n",
400
+ "2019 0.273013 0.0 0.137817 0.780071\n",
401
+ "2020 0.053969 0.137817 0.0 0.042294\n",
402
+ "2021 0.877338 0.780071 0.042294 0.0\n"
403
+ ]
404
+ }
405
+ ],
406
+ "source": [
407
+ "# ์—ฐ๋„๋ณ„ ๋ฐ์ดํ„ฐ ์ค€๋น„\n",
408
+ "years = [2018, 2019, 2020, 2021]\n",
409
+ "data_dict = {\n",
410
+ " 2018: daejeon_2018[['pca_x', 'pca_y']].values,\n",
411
+ " 2019: daejeon_2019[['pca_x', 'pca_y']].values,\n",
412
+ " 2020: daejeon_2020[['pca_x', 'pca_y']].values,\n",
413
+ " 2021: daejeon_2021[['pca_x', 'pca_y']].values\n",
414
+ "}\n",
415
+ "\n",
416
+ "\n",
417
+ "# ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅํ•  ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ\n",
418
+ "result_df = pd.DataFrame(index=years, columns=years)\n",
419
+ "\n",
420
+ "for i, year1 in enumerate(years):\n",
421
+ " for j, year2 in enumerate(years):\n",
422
+ " if year1 == year2:\n",
423
+ " result_df.iloc[i, j] = 0.0\n",
424
+ " if j < i:\n",
425
+ " # ์ด๋ฏธ ๊ณ„์‚ฐ๋œ ๊ฐ’ ์‚ฌ์šฉ\n",
426
+ " result_df.iloc[i, j] = result_df.iloc[j, i]\n",
427
+ " else:\n",
428
+ " X = data_dict[year1]\n",
429
+ " Y = data_dict[year2]\n",
430
+ " a = np.ones(len(X)) / len(X)\n",
431
+ " b = np.ones(len(Y)) / len(Y)\n",
432
+ " W = ot.emd2(a, b, ot.dist(X, Y))\n",
433
+ " result_df.iloc[i, j] = W\n",
434
+ " result_df.iloc[j, i] = W # ๋Œ€์นญ ์œ„์น˜์— ๋™์ผ ๊ฐ’ ์ €์žฅ\n",
435
+ "\n",
436
+ "# ๊ฒฐ๊ณผ ์ถœ๋ ฅ\n",
437
+ "print(result_df)"
438
+ ]
439
+ },
440
+ {
441
+ "cell_type": "code",
442
+ "execution_count": 27,
443
+ "metadata": {},
444
+ "outputs": [
445
+ {
446
+ "name": "stdout",
447
+ "output_type": "stream",
448
+ "text": [
449
+ " 2018 2019 2020 2021\n",
450
+ "2018 0.0 0.105633 0.08202 1.00155\n",
451
+ "2019 0.105633 0.0 0.069322 0.892938\n",
452
+ "2020 0.08202 0.069322 0.0 0.480667\n",
453
+ "2021 1.00155 0.892938 0.480667 0.0\n"
454
+ ]
455
+ }
456
+ ],
457
+ "source": [
458
+ "# ์—ฐ๋„๋ณ„ ๋ฐ์ดํ„ฐ ์ค€๋น„\n",
459
+ "years = [2018, 2019, 2020, 2021]\n",
460
+ "data_dict = {\n",
461
+ " 2018: gwangju_2018[['pca_x', 'pca_y']].values,\n",
462
+ " 2019: gwangju_2019[['pca_x', 'pca_y']].values,\n",
463
+ " 2020: gwangju_2020[['pca_x', 'pca_y']].values,\n",
464
+ " 2021: gwangju_2021[['pca_x', 'pca_y']].values\n",
465
+ "}\n",
466
+ "\n",
467
+ "\n",
468
+ "# ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅํ•  ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ\n",
469
+ "result_df = pd.DataFrame(index=years, columns=years)\n",
470
+ "\n",
471
+ "for i, year1 in enumerate(years):\n",
472
+ " for j, year2 in enumerate(years):\n",
473
+ " if year1 == year2:\n",
474
+ " result_df.iloc[i, j] = 0.0\n",
475
+ " if j < i:\n",
476
+ " # ์ด๋ฏธ ๊ณ„์‚ฐ๋œ ๊ฐ’ ์‚ฌ์šฉ\n",
477
+ " result_df.iloc[i, j] = result_df.iloc[j, i]\n",
478
+ " else:\n",
479
+ " X = data_dict[year1]\n",
480
+ " Y = data_dict[year2]\n",
481
+ " a = np.ones(len(X)) / len(X)\n",
482
+ " b = np.ones(len(Y)) / len(Y)\n",
483
+ " W = ot.emd2(a, b, ot.dist(X, Y))\n",
484
+ " result_df.iloc[i, j] = W\n",
485
+ " result_df.iloc[j, i] = W # ๋Œ€์นญ ์œ„์น˜์— ๋™์ผ ๊ฐ’ ์ €์žฅ\n",
486
+ "\n",
487
+ "# ๊ฒฐ๊ณผ ์ถœ๋ ฅ\n",
488
+ "print(result_df)"
489
+ ]
490
+ },
491
+ {
492
+ "cell_type": "code",
493
+ "execution_count": null,
494
+ "metadata": {},
495
+ "outputs": [],
496
+ "source": []
497
+ },
498
+ {
499
+ "cell_type": "code",
500
+ "execution_count": null,
501
+ "metadata": {},
502
+ "outputs": [],
503
+ "source": []
504
+ },
505
+ {
506
+ "cell_type": "code",
507
+ "execution_count": null,
508
+ "metadata": {},
509
+ "outputs": [],
510
+ "source": []
511
+ },
512
+ {
513
+ "cell_type": "code",
514
+ "execution_count": null,
515
+ "metadata": {},
516
+ "outputs": [],
517
+ "source": []
518
+ }
519
+ ],
520
+ "metadata": {
521
+ "kernelspec": {
522
+ "display_name": "py39",
523
+ "language": "python",
524
+ "name": "python3"
525
+ },
526
+ "language_info": {
527
+ "codemirror_mode": {
528
+ "name": "ipython",
529
+ "version": 3
530
+ },
531
+ "file_extension": ".py",
532
+ "mimetype": "text/x-python",
533
+ "name": "python",
534
+ "nbconvert_exporter": "python",
535
+ "pygments_lexer": "ipython3",
536
+ "version": "3.9.18"
537
+ }
538
+ },
539
+ "nbformat": 4,
540
+ "nbformat_minor": 4
541
+ }