bong9513 commited on
Commit
36f7eff
·
1 Parent(s): d2cebf0

Prepare for history rewrite

Browse files
Analysis_code/1.data_preprocessing/0.air_data_merge.ipynb CHANGED
@@ -1,1469 +1,3 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "metadata": {},
7
- "outputs": [
8
- {
9
- "name": "stdout",
10
- "output_type": "stream",
11
- "text": [
12
- "Package Version\n",
13
- "----------------------------- ------------------\n",
14
- "absl-py 1.4.0\n",
15
- "accelerate 0.24.0.dev0\n",
16
- "aiofiles 23.2.1\n",
17
- "aiohttp 3.8.5\n",
18
- "aiosignal 1.3.1\n",
19
- "alabaster 0.7.13\n",
20
- "albumentations 1.3.1\n",
21
- "alembic 1.12.0\n",
22
- "annotated-types 0.5.0\n",
23
- "anyio 4.0.0\n",
24
- "appdirs 1.4.4\n",
25
- "argon2-cffi 23.1.0\n",
26
- "argon2-cffi-bindings 21.2.0\n",
27
- "array-record 0.4.1\n",
28
- "arrow 1.2.3\n",
29
- "asttokens 2.4.0\n",
30
- "astunparse 1.6.3\n",
31
- "async-lru 2.0.4\n",
32
- "async-timeout 4.0.3\n",
33
- "attrs 23.1.0\n",
34
- "audioread 3.0.0\n",
35
- "Babel 2.12.1\n",
36
- "backcall 0.2.0\n",
37
- "backoff 2.2.1\n",
38
- "bcrypt 4.0.1\n",
39
- "beautifulsoup4 4.12.2\n",
40
- "bitsandbytes 0.41.1\n",
41
- "black 23.9.1\n",
42
- "bleach 6.0.0\n",
43
- "blis 0.7.10\n",
44
- "branca 0.6.0\n",
45
- "Brotli 1.1.0\n",
46
- "cachetools 5.3.1\n",
47
- "captum 0.6.0\n",
48
- "catalogue 2.0.9\n",
49
- "catalyst 22.4\n",
50
- "catboost 1.2.1.1\n",
51
- "certifi 2023.7.22\n",
52
- "cffi 1.15.1\n",
53
- "charset-normalizer 3.2.0\n",
54
- "chroma-hnswlib 0.7.3\n",
55
- "chromadb 0.4.10\n",
56
- "click 8.1.7\n",
57
- "cloudpickle 2.2.1\n",
58
- "cmaes 0.10.0\n",
59
- "cmake 3.27.5\n",
60
- "cmdstanpy 1.1.0\n",
61
- "coloredlogs 15.0.1\n",
62
- "colorlog 6.7.0\n",
63
- "comm 0.1.4\n",
64
- "confection 0.1.3\n",
65
- "contourpy 1.1.1\n",
66
- "convertdate 2.4.0\n",
67
- "cubinlinker-cu11 0.3.0.post1\n",
68
- "cuda-python 11.8.2\n",
69
- "cudf-cu11 23.8.0\n",
70
- "cuml-cu11 23.8.0\n",
71
- "cupy-cuda11x 12.2.0\n",
72
- "curio 1.6\n",
73
- "customized-konlpy 0.0.64\n",
74
- "cycler 0.11.0\n",
75
- "cymem 2.0.8\n",
76
- "cysignals 1.11.2\n",
77
- "Cython 3.0.2\n",
78
- "dask 2023.7.1\n",
79
- "dask-cuda 23.8.0\n",
80
- "dask-cudf-cu11 23.8.0\n",
81
- "dataclasses-json 0.5.14\n",
82
- "datasets 2.14.5\n",
83
- "debugpy 1.8.0\n",
84
- "decorator 5.1.1\n",
85
- "defusedxml 0.7.1\n",
86
- "dill 0.3.7\n",
87
- "distributed 2023.7.1\n",
88
- "dm-tree 0.1.8\n",
89
- "dnspython 2.4.2\n",
90
- "docker-pycreds 0.4.0\n",
91
- "docrepr 0.2.0\n",
92
- "docutils 0.18.1\n",
93
- "duckduckgo-search 3.8.5\n",
94
- "entrypoints 0.4\n",
95
- "ephem 4.1.4\n",
96
- "etils 1.4.1\n",
97
- "exceptiongroup 1.1.3\n",
98
- "executing 1.2.0\n",
99
- "fastai 2.7.12\n",
100
- "fastapi 0.99.1\n",
101
- "fastcore 1.5.29\n",
102
- "fastdownload 0.0.7\n",
103
- "fastjsonschema 2.18.0\n",
104
- "fastprogress 1.0.3\n",
105
- "fastrlock 0.8.2\n",
106
- "fasttext 0.9.2\n",
107
- "filelock 3.12.4\n",
108
- "flatbuffers 23.5.26\n",
109
- "folium 0.14.0\n",
110
- "fonttools 4.42.1\n",
111
- "fqdn 1.5.1\n",
112
- "frozenlist 1.4.0\n",
113
- "fsspec 2023.6.0\n",
114
- "future 0.18.3\n",
115
- "fvcore 0.1.5.post20221221\n",
116
- "gast 0.4.0\n",
117
- "gensim 4.3.2\n",
118
- "gitdb 4.0.10\n",
119
- "GitPython 3.1.36\n",
120
- "google-auth 2.23.0\n",
121
- "google-auth-oauthlib 1.0.0\n",
122
- "google-pasta 0.2.0\n",
123
- "googleapis-common-protos 1.60.0\n",
124
- "graphviz 0.20.1\n",
125
- "greenlet 2.0.2\n",
126
- "grpcio 1.58.0\n",
127
- "h11 0.14.0\n",
128
- "h2 4.1.0\n",
129
- "h5py 3.9.0\n",
130
- "holidays 0.33\n",
131
- "hpack 4.0.0\n",
132
- "httpcore 0.18.0\n",
133
- "httptools 0.6.0\n",
134
- "httpx 0.25.0\n",
135
- "huggingface-hub 0.16.4\n",
136
- "humanfriendly 10.0\n",
137
- "hydra-slayer 0.4.1\n",
138
- "hyperframe 6.0.1\n",
139
- "hyperopt 0.2.7\n",
140
- "idna 3.4\n",
141
- "imageio 2.31.3\n",
142
- "imagesize 1.4.1\n",
143
- "importlib-metadata 6.8.0\n",
144
- "importlib-resources 6.0.1\n",
145
- "iniconfig 2.0.0\n",
146
- "intel-openmp 2023.2.0\n",
147
- "iopath 0.1.10\n",
148
- "ipykernel 6.25.2\n",
149
- "ipyparallel 8.6.1\n",
150
- "ipython 8.15.0\n",
151
- "ipython-genutils 0.2.0\n",
152
- "ipywidgets 8.1.1\n",
153
- "isoduration 20.11.0\n",
154
- "jedi 0.19.0\n",
155
- "Jinja2 3.1.2\n",
156
- "joblib 1.3.2\n",
157
- "JPype1 1.4.1\n",
158
- "JPype1-py3 0.5.5.4\n",
159
- "json5 0.9.14\n",
160
- "jsonpointer 2.4\n",
161
- "jsonschema 4.19.0\n",
162
- "jsonschema-specifications 2023.7.1\n",
163
- "jupyter 1.0.0\n",
164
- "jupyter_client 8.3.1\n",
165
- "jupyter-console 6.6.3\n",
166
- "jupyter_core 5.3.1\n",
167
- "jupyter-events 0.7.0\n",
168
- "jupyter-lsp 2.2.0\n",
169
- "jupyter_server 2.7.3\n",
170
- "jupyter_server_terminals 0.4.4\n",
171
- "jupyterlab 4.0.6\n",
172
- "jupyterlab-pygments 0.2.2\n",
173
- "jupyterlab_server 2.25.0\n",
174
- "jupyterlab-widgets 3.0.9\n",
175
- "jupyterthemes 0.20.0\n",
176
- "kaggle 1.5.16\n",
177
- "keras 2.13.1\n",
178
- "kiwisolver 1.4.5\n",
179
- "konlpy 0.6.0\n",
180
- "kornia 0.7.0\n",
181
- "krwordrank 1.0.3\n",
182
- "langchain 0.0.295\n",
183
- "langcodes 3.3.0\n",
184
- "langsmith 0.0.38\n",
185
- "lazy_loader 0.3\n",
186
- "lesscpy 0.15.1\n",
187
- "libclang 16.0.6\n",
188
- "librosa 0.10.1\n",
189
- "lightgbm 4.1.0\n",
190
- "lit 16.0.6\n",
191
- "llvmlite 0.40.1\n",
192
- "locket 1.0.0\n",
193
- "loguru 0.7.2\n",
194
- "LunarCalendar 0.0.9\n",
195
- "lxml 4.9.3\n",
196
- "Mako 1.2.4\n",
197
- "Markdown 3.4.4\n",
198
- "MarkupSafe 2.1.3\n",
199
- "marshmallow 3.20.1\n",
200
- "matplotlib 3.8.0\n",
201
- "matplotlib-inline 0.1.6\n",
202
- "mecab-python3 1.0.7\n",
203
- "missingno 0.5.2\n",
204
- "mistune 3.0.1\n",
205
- "mkl 2023.2.0\n",
206
- "mlxtend 0.22.0\n",
207
- "monotonic 1.6\n",
208
- "mpmath 1.3.0\n",
209
- "msgpack 1.0.5\n",
210
- "multidict 6.0.4\n",
211
- "multiprocess 0.70.15\n",
212
- "murmurhash 1.0.10\n",
213
- "mypy-extensions 1.0.0\n",
214
- "nbclient 0.8.0\n",
215
- "nbconvert 7.8.0\n",
216
- "nbformat 5.9.2\n",
217
- "nest-asyncio 1.5.8\n",
218
- "networkx 3.1\n",
219
- "nltk 3.8.1\n",
220
- "notebook 7.0.3\n",
221
- "notebook_shim 0.2.3\n",
222
- "numba 0.57.1\n",
223
- "numexpr 2.8.6\n",
224
- "numpy 1.24.3\n",
225
- "nvidia-cublas-cu11 11.10.3.66\n",
226
- "nvidia-cuda-cupti-cu11 11.7.101\n",
227
- "nvidia-cuda-nvrtc-cu11 11.7.99\n",
228
- "nvidia-cuda-runtime-cu11 11.7.99\n",
229
- "nvidia-cudnn-cu11 8.5.0.96\n",
230
- "nvidia-cufft-cu11 10.9.0.58\n",
231
- "nvidia-curand-cu11 10.2.10.91\n",
232
- "nvidia-cusolver-cu11 11.4.0.1\n",
233
- "nvidia-cusparse-cu11 11.7.4.91\n",
234
- "nvidia-nccl-cu11 2.14.3\n",
235
- "nvidia-nvtx-cu11 11.7.91\n",
236
- "nvtx 0.2.8\n",
237
- "oauthlib 3.2.2\n",
238
- "onnxruntime 1.15.1\n",
239
- "openai 0.28.0\n",
240
- "opencv-python 4.8.0.76\n",
241
- "opencv-python-headless 4.8.0.76\n",
242
- "opt-einsum 3.3.0\n",
243
- "optuna 3.3.0\n",
244
- "outcome 1.2.0\n",
245
- "overrides 7.4.0\n",
246
- "packaging 23.1\n",
247
- "pandas 1.5.3\n",
248
- "pandocfilters 1.5.0\n",
249
- "parso 0.8.3\n",
250
- "partd 1.4.0\n",
251
- "pathspec 0.11.2\n",
252
- "pathtools 0.1.2\n",
253
- "pathy 0.10.2\n",
254
- "patsy 0.5.3\n",
255
- "peft 0.6.0.dev0\n",
256
- "pexpect 4.8.0\n",
257
- "pickleshare 0.7.5\n",
258
- "Pillow 10.0.1\n",
259
- "pinecone-client 2.2.4\n",
260
- "pip 23.2.1\n",
261
- "platformdirs 3.10.0\n",
262
- "plotly 5.17.0\n",
263
- "pluggy 1.3.0\n",
264
- "ply 3.11\n",
265
- "pooch 1.7.0\n",
266
- "portalocker 2.8.2\n",
267
- "posthog 3.0.2\n",
268
- "preshed 3.0.9\n",
269
- "prometheus-client 0.17.1\n",
270
- "promise 2.3\n",
271
- "prompt-toolkit 3.0.39\n",
272
- "prophet 1.1.4\n",
273
- "protobuf 4.24.3\n",
274
- "psutil 5.9.5\n",
275
- "ptxcompiler-cu11 0.7.0.post1\n",
276
- "ptyprocess 0.7.0\n",
277
- "pulsar-client 3.3.0\n",
278
- "pure-eval 0.2.2\n",
279
- "py 1.11.0\n",
280
- "py4j 0.10.9.7\n",
281
- "pyarrow 11.0.0\n",
282
- "pyasn1 0.5.0\n",
283
- "pyasn1-modules 0.3.0\n",
284
- "pybind11 2.11.1\n",
285
- "pycparser 2.21\n",
286
- "pydantic 1.10.12\n",
287
- "pydantic_core 2.6.3\n",
288
- "pydicom 2.4.3\n",
289
- "pyfasttext 0.4.6\n",
290
- "Pygments 2.16.1\n",
291
- "pygraphviz 1.11\n",
292
- "pylibraft-cu11 23.8.0\n",
293
- "PyMeeus 0.5.12\n",
294
- "PyMySQL 1.1.0\n",
295
- "pynvml 11.4.1\n",
296
- "pyparsing 3.1.1\n",
297
- "pypdf 3.16.1\n",
298
- "PyPika 0.48.9\n",
299
- "pystan 2.19.1.1\n",
300
- "pytest 6.2.5\n",
301
- "pytest-asyncio 0.20.3\n",
302
- "python-dateutil 2.8.2\n",
303
- "python-dotenv 1.0.0\n",
304
- "python-json-logger 2.0.7\n",
305
- "python-slugify 8.0.1\n",
306
- "pytz 2023.3.post1\n",
307
- "PyWavelets 1.4.1\n",
308
- "PyYAML 6.0.1\n",
309
- "pyzmq 25.1.1\n",
310
- "qtconsole 5.4.4\n",
311
- "QtPy 2.4.0\n",
312
- "qudida 0.0.4\n",
313
- "raft-dask-cu11 23.8.0\n",
314
- "referencing 0.30.2\n",
315
- "regex 2023.8.8\n",
316
- "requests 2.31.0\n",
317
- "requests-oauthlib 1.3.1\n",
318
- "rfc3339-validator 0.1.4\n",
319
- "rfc3986-validator 0.1.1\n",
320
- "rmm-cu11 23.8.0\n",
321
- "rpds-py 0.10.3\n",
322
- "rsa 4.9\n",
323
- "safetensors 0.3.3\n",
324
- "scikit-image 0.21.0\n",
325
- "scikit-learn 1.3.0\n",
326
- "scipy 1.11.2\n",
327
- "seaborn 0.12.2\n",
328
- "Send2Trash 1.8.2\n",
329
- "sentencepiece 0.1.99\n",
330
- "sentry-sdk 1.31.0\n",
331
- "setproctitle 1.3.2\n",
332
- "setuptools 68.0.0\n",
333
- "shap 0.42.1\n",
334
- "six 1.16.0\n",
335
- "slicer 0.0.7\n",
336
- "smart-open 6.4.0\n",
337
- "smmap 5.0.1\n",
338
- "sniffio 1.3.0\n",
339
- "snowballstemmer 2.2.0\n",
340
- "socksio 1.0.0\n",
341
- "sortedcontainers 2.4.0\n",
342
- "soundfile 0.12.1\n",
343
- "soupsieve 2.5\n",
344
- "soxr 0.3.6\n",
345
- "soynlp 0.0.493\n",
346
- "soyspacing 1.0.17\n",
347
- "spacy 3.6.1\n",
348
- "spacy-legacy 3.0.12\n",
349
- "spacy-loggers 1.0.5\n",
350
- "Sphinx 7.2.6\n",
351
- "sphinx-rtd-theme 1.3.0\n",
352
- "sphinxcontrib-applehelp 1.0.7\n",
353
- "sphinxcontrib-devhelp 1.0.5\n",
354
- "sphinxcontrib-htmlhelp 2.0.4\n",
355
- "sphinxcontrib-jquery 4.1\n",
356
- "sphinxcontrib-jsmath 1.0.1\n",
357
- "sphinxcontrib-qthelp 1.0.6\n",
358
- "sphinxcontrib-serializinghtml 1.1.9\n",
359
- "SQLAlchemy 2.0.21\n",
360
- "srsly 2.4.7\n",
361
- "stack-data 0.6.2\n",
362
- "starlette 0.27.0\n",
363
- "statsmodels 0.14.0\n",
364
- "sympy 1.12\n",
365
- "tabulate 0.9.0\n",
366
- "tbb 2021.10.0\n",
367
- "tblib 2.0.0\n",
368
- "tenacity 8.2.3\n",
369
- "tensorboard 2.13.0\n",
370
- "tensorboard-data-server 0.7.1\n",
371
- "tensorboardX 2.6.2.2\n",
372
- "tensorflow 2.13.0\n",
373
- "tensorflow-datasets 4.9.3\n",
374
- "tensorflow-estimator 2.13.0\n",
375
- "tensorflow-io-gcs-filesystem 0.34.0\n",
376
- "tensorflow-metadata 1.14.0\n",
377
- "termcolor 2.3.0\n",
378
- "terminado 0.17.1\n",
379
- "testpath 0.6.0\n",
380
- "text-unidecode 1.3\n",
381
- "thinc 8.1.12\n",
382
- "threadpoolctl 3.2.0\n",
383
- "tifffile 2023.9.18\n",
384
- "tiktoken 0.5.1\n",
385
- "tinycss2 1.2.1\n",
386
- "tokenizers 0.14.0\n",
387
- "toml 0.10.2\n",
388
- "tomli 2.0.1\n",
389
- "toolz 0.12.0\n",
390
- "torch 2.0.0\n",
391
- "torchaudio 2.0.2+cu118\n",
392
- "torchdata 0.6.0\n",
393
- "torchsummary 1.5.1\n",
394
- "torchtext 0.15.1\n",
395
- "torchtriton 2.0.0+f16138d447\n",
396
- "torchvision 0.15.2\n",
397
- "tornado 6.3.3\n",
398
- "tqdm 4.66.1\n",
399
- "traitlets 5.10.0\n",
400
- "transformers 4.34.0.dev0\n",
401
- "treelite 3.2.0\n",
402
- "treelite-runtime 3.2.0\n",
403
- "trio 0.22.2\n",
404
- "triton 2.0.0\n",
405
- "typer 0.9.0\n",
406
- "typing_extensions 4.5.0\n",
407
- "typing-inspect 0.9.0\n",
408
- "tzdata 2023.3\n",
409
- "ucx-py-cu11 0.33.0\n",
410
- "uri-template 1.3.0\n",
411
- "urllib3 1.26.16\n",
412
- "uvicorn 0.23.2\n",
413
- "uvloop 0.17.0\n",
414
- "wandb 0.15.10\n",
415
- "wasabi 1.1.2\n",
416
- "watchfiles 0.20.0\n",
417
- "wcwidth 0.2.6\n",
418
- "webcolors 1.13\n",
419
- "webencodings 0.5.1\n",
420
- "websocket-client 1.6.3\n",
421
- "websockets 11.0.3\n",
422
- "Werkzeug 2.3.7\n",
423
- "wheel 0.38.4\n",
424
- "widgetsnbextension 4.0.9\n",
425
- "wordcloud 1.9.2\n",
426
- "wrapt 1.15.0\n",
427
- "xgboost 2.0.0\n",
428
- "xxhash 3.3.0\n",
429
- "yacs 0.1.8\n",
430
- "yarl 1.9.2\n",
431
- "zict 3.0.0\n",
432
- "zipp 3.17.0\n"
433
- ]
434
- }
435
- ],
436
- "source": [
437
- "!pip list"
438
- ]
439
- },
440
- {
441
- "cell_type": "code",
442
- "execution_count": 1,
443
- "metadata": {},
444
- "outputs": [
445
- {
446
- "ename": "ModuleNotFoundError",
447
- "evalue": "No module named 'numpy'",
448
- "output_type": "error",
449
- "traceback": [
450
- "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
451
- "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)",
452
- "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mos\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnp\u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpd\u001b[39;00m\n\u001b[32m 4\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnatsort\u001b[39;00m\n",
453
- "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'numpy'"
454
- ]
455
- }
456
- ],
457
- "source": [
458
- "import os\n",
459
- "import numpy as np\n",
460
- "import pandas as pd\n",
461
- "import natsort\n",
462
- "from datetime import datetime\n",
463
- "from tqdm.auto import tqdm"
464
- ]
465
- },
466
- {
467
- "cell_type": "code",
468
- "execution_count": 2,
469
- "metadata": {},
470
- "outputs": [],
471
- "source": [
472
- "def get_data(year):\n",
473
- " files = natsort.natsorted(os.listdir(f'../../data/대기질/{year}/'))\n",
474
- " data = []\n",
475
- " for file in tqdm(files, desc=f\"Reading files...({len(files)})\"):\n",
476
- " data.append(pd.read_excel(f'../../data/대기질/{year}/{file}', usecols=[\"지역\", '망', \"측정소코드\", \"측정소명\", \"측정일시\", \"O3\", \"NO2\", \"PM10\", \"PM25\", \"주소\"]))\n",
477
- "\n",
478
- " return pd.concat(data)"
479
- ]
480
- },
481
- {
482
- "cell_type": "code",
483
- "execution_count": 3,
484
- "metadata": {},
485
- "outputs": [],
486
- "source": [
487
- "# 합친 데이터에 날짜 정보를 추가한다.\n",
488
- "def add_date(df):\n",
489
- "\n",
490
- " df[\"측정일시\"] = df[\"측정일시\"].astype(str).str[:10]\n",
491
- " df[\"측정일시\"] = pd.to_datetime(df[\"측정일시\"], format='%Y%m%d%H', errors=\"coerce\")\n",
492
- "\n",
493
- " df[\"year\"] = df[\"측정일시\"].dt.year\n",
494
- " df[\"month\"] = df[\"측정일시\"].dt.month\n",
495
- " df[\"day\"] = df[\"측정일시\"].dt.day\n",
496
- " df[\"hour\"] = df[\"측정일시\"].dt.hour\n",
497
- "\n",
498
- " return df"
499
- ]
500
- },
501
- {
502
- "cell_type": "code",
503
- "execution_count": 4,
504
- "metadata": {},
505
- "outputs": [
506
- {
507
- "name": "stderr",
508
- "output_type": "stream",
509
- "text": [
510
- " 0%| | 0/6 [00:00<?, ?it/s]\n",
511
- "Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
512
- "Reading files...(13): 8%|▊ | 1/13 [00:34<06:57, 34.80s/it]\u001b[A\n",
513
- "Reading files...(13): 15%|█▌ | 2/13 [01:12<06:41, 36.47s/it]\u001b[A\n",
514
- "Reading files...(13): 23%|██▎ | 3/13 [01:47<05:58, 35.89s/it]\u001b[A\n",
515
- "Reading files...(13): 31%|███ | 4/13 [02:23<05:23, 35.96s/it]\u001b[A\n",
516
- "Reading files...(13): 38%|███▊ | 5/13 [02:59<04:47, 35.92s/it]\u001b[A\n",
517
- "Reading files...(13): 46%|████▌ | 6/13 [03:35<04:12, 36.09s/it]\u001b[A\n",
518
- "Reading files...(13): 62%|██████▏ | 8/13 [04:12<02:16, 27.35s/it]\u001b[A\n",
519
- "Reading files...(13): 69%|██████▉ | 9/13 [04:46<01:56, 29.05s/it]\u001b[A\n",
520
- "Reading files...(13): 77%|███████▋ | 10/13 [05:21<01:31, 30.55s/it]\u001b[A\n",
521
- "Reading files...(13): 85%|████████▍ | 11/13 [05:58<01:04, 32.46s/it]\u001b[A\n",
522
- "Reading files...(13): 92%|█████████▏| 12/13 [06:37<00:34, 34.28s/it]\u001b[A\n",
523
- "Reading files...(13): 100%|██████████| 13/13 [07:08<00:00, 32.93s/it]\u001b[A\n",
524
- " 17%|█▋ | 1/6 [07:18<36:30, 438.18s/it]\n",
525
- "Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
526
- "Reading files...(13): 8%|▊ | 1/13 [00:43<08:41, 43.43s/it]\u001b[A\n",
527
- "Reading files...(13): 15%|█▌ | 2/13 [01:26<07:56, 43.29s/it]\u001b[A\n",
528
- "Reading files...(13): 23%|██▎ | 3/13 [02:07<07:02, 42.22s/it]\u001b[A\n",
529
- "Reading files...(13): 31%|███ | 4/13 [02:50<06:23, 42.66s/it]\u001b[A\n",
530
- "Reading files...(13): 38%|███▊ | 5/13 [03:28<05:27, 40.90s/it]\u001b[A\n",
531
- "Reading files...(13): 46%|████▌ | 6/13 [04:15<04:59, 42.79s/it]\u001b[A\n",
532
- "Reading files...(13): 54%|█████▍ | 7/13 [04:58<04:18, 43.14s/it]\u001b[A\n",
533
- "Reading files...(13): 62%|██████▏ | 8/13 [05:43<03:37, 43.47s/it]\u001b[A\n",
534
- "Reading files...(13): 69%|██████▉ | 9/13 [06:28<02:55, 43.96s/it]\u001b[A\n",
535
- "Reading files...(13): 77%|███████▋ | 10/13 [07:12<02:12, 44.01s/it]\u001b[A\n",
536
- "Reading files...(13): 85%|████████▍ | 11/13 [07:52<01:25, 42.90s/it]\u001b[A\n",
537
- "Reading files...(13): 100%|██████████| 13/13 [08:34<00:00, 39.61s/it]\u001b[A\n",
538
- " 33%|███▎ | 2/6 [16:05<32:42, 490.55s/it]\n",
539
- "Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
540
- "Reading files...(13): 8%|▊ | 1/13 [00:49<09:56, 49.74s/it]\u001b[A\n",
541
- "Reading files...(13): 15%|█▌ | 2/13 [01:43<09:31, 51.98s/it]\u001b[A\n",
542
- "Reading files...(13): 23%|██▎ | 3/13 [02:33<08:29, 50.96s/it]\u001b[A\n",
543
- "Reading files...(13): 31%|███ | 4/13 [03:23<07:38, 50.95s/it]\u001b[A\n",
544
- "Reading files...(13): 38%|███▊ | 5/13 [04:13<06:43, 50.46s/it]\u001b[A\n",
545
- "Reading files...(13): 46%|████▌ | 6/13 [04:58<05:40, 48.71s/it]\u001b[A\n",
546
- "Reading files...(13): 54%|█████▍ | 7/13 [05:50<04:57, 49.66s/it]\u001b[A\n",
547
- "Reading files...(13): 62%|██████▏ | 8/13 [06:45<04:16, 51.29s/it]\u001b[A\n",
548
- "Reading files...(13): 77%|███████▋ | 10/13 [07:38<01:58, 39.46s/it]\u001b[A\n",
549
- "Reading files...(13): 85%|████████▍ | 11/13 [08:30<01:25, 42.79s/it]\u001b[A\n",
550
- "Reading files...(13): 92%|█████████▏| 12/13 [09:26<00:46, 46.32s/it]\u001b[A\n",
551
- "Reading files...(13): 100%|██████████| 13/13 [10:13<00:00, 47.19s/it]\u001b[A\n",
552
- " 50%|█████ | 3/6 [26:32<27:38, 552.96s/it]\n",
553
- "Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
554
- "Reading files...(13): 8%|▊ | 1/13 [00:59<11:48, 59.01s/it]\u001b[A\n",
555
- "Reading files...(13): 15%|█▌ | 2/13 [01:56<10:40, 58.19s/it]\u001b[A\n",
556
- "Reading files...(13): 23%|██▎ | 3/13 [02:53<09:37, 57.77s/it]\u001b[A\n",
557
- "Reading files...(13): 31%|███ | 4/13 [03:52<08:41, 58.00s/it]\u001b[A\n",
558
- "Reading files...(13): 38%|███▊ | 5/13 [04:44<07:26, 55.77s/it]\u001b[A\n",
559
- "Reading files...(13): 46%|████▌ | 6/13 [05:40<06:32, 56.05s/it]\u001b[A\n",
560
- "Reading files...(13): 54%|█████▍ | 7/13 [06:36<05:36, 56.06s/it]\u001b[A\n",
561
- "Reading files...(13): 62%|██████▏ | 8/13 [07:33<04:42, 56.42s/it]\u001b[A\n",
562
- "Reading files...(13): 69%|██████▉ | 9/13 [08:34<03:51, 57.76s/it]\u001b[A\n",
563
- "Reading files...(13): 77%|███████▋ | 10/13 [09:35<02:56, 58.75s/it]\u001b[A\n",
564
- "Reading files...(13): 92%|█████████▏| 12/13 [10:33<00:44, 44.84s/it]\u001b[A\n",
565
- "Reading files...(13): 100%|██████████| 13/13 [11:32<00:00, 53.29s/it]\u001b[A\n",
566
- " 67%|██████▋ | 4/6 [38:20<20:28, 614.26s/it]\n",
567
- "Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
568
- "Reading files...(13): 8%|▊ | 1/13 [00:59<11:57, 59.79s/it]\u001b[A\n",
569
- "Reading files...(13): 15%|█▌ | 2/13 [02:01<11:07, 60.67s/it]\u001b[A\n",
570
- "Reading files...(13): 23%|██▎ | 3/13 [03:02<10:10, 61.02s/it]\u001b[A\n",
571
- "Reading files...(13): 31%|███ | 4/13 [03:57<08:48, 58.74s/it]\u001b[A\n",
572
- "Reading files...(13): 38%|███▊ | 5/13 [04:57<07:53, 59.18s/it]\u001b[A\n",
573
- "Reading files...(13): 46%|████▌ | 6/13 [06:00<07:03, 60.45s/it]\u001b[A\n",
574
- "Reading files...(13): 54%|█████▍ | 7/13 [07:00<06:02, 60.38s/it]\u001b[A\n",
575
- "Reading files...(13): 62%|██████▏ | 8/13 [08:02<05:04, 60.85s/it]\u001b[A\n",
576
- "Reading files...(13): 69%|██████▉ | 9/13 [09:04<04:04, 61.03s/it]\u001b[A\n",
577
- "Reading files...(13): 77%|███████▋ | 10/13 [10:04<03:02, 60.67s/it]\u001b[A\n",
578
- "Reading files...(13): 92%|█████████▏| 12/13 [11:06<00:46, 46.76s/it]\u001b[A\n",
579
- "Reading files...(13): 100%|██████████| 13/13 [12:09<00:00, 56.08s/it]\u001b[A\n",
580
- " 83%|████████▎ | 5/6 [50:46<11:01, 661.78s/it]\n",
581
- "Reading files...(13): 0%| | 0/13 [00:00<?, ?it/s]\u001b[A\n",
582
- "Reading files...(13): 8%|▊ | 1/13 [01:03<12:46, 63.88s/it]\u001b[A\n",
583
- "Reading files...(13): 15%|█▌ | 2/13 [02:08<11:50, 64.56s/it]\u001b[A\n",
584
- "Reading files...(13): 23%|██▎ | 3/13 [03:10<10:32, 63.22s/it]\u001b[A\n",
585
- "Reading files...(13): 31%|███ | 4/13 [04:07<09:05, 60.63s/it]\u001b[A\n",
586
- "Reading files...(13): 38%|███▊ | 5/13 [05:09<08:11, 61.41s/it]\u001b[A\n",
587
- "Reading files...(13): 46%|████▌ | 6/13 [06:12<07:13, 61.92s/it]\u001b[A\n",
588
- "Reading files...(13): 54%|█████▍ | 7/13 [07:13<06:09, 61.50s/it]\u001b[A\n",
589
- "Reading files...(13): 62%|██████▏ | 8/13 [08:15<05:08, 61.64s/it]\u001b[A\n",
590
- "Reading files...(13): 69%|██████▉ | 9/13 [09:17<04:07, 61.81s/it]\u001b[A\n",
591
- "Reading files...(13): 77%|███████▋ | 10/13 [10:19<03:05, 61.96s/it]\u001b[A\n",
592
- "Reading files...(13): 92%|█████████▏| 12/13 [11:23<00:47, 47.75s/it]\u001b[A\n",
593
- "Reading files...(13): 100%|██████████| 13/13 [12:27<00:00, 57.50s/it]\u001b[A\n",
594
- "100%|██████████| 6/6 [1:03:31<00:00, 635.28s/it]\n"
595
- ]
596
- }
597
- ],
598
- "source": [
599
- "import os\n",
600
- "import pandas as pd\n",
601
- "from tqdm.auto import tqdm\n",
602
- "\n",
603
- "# 대기질 데이터를 불러와서 하나의 파일로 합친다.\n",
604
- "def get_data(year):\n",
605
- " directory = f'../../data/대기질/{year}/'\n",
606
- " files = os.listdir(directory)\n",
607
- " data = []\n",
608
- " \n",
609
- " # 파일 목록에서 디렉토리를 제외하고 오직 Excel 파일만 처리\n",
610
- " for file in tqdm(files, desc=f\"Reading files...({len(files)})\"):\n",
611
- " file_path = os.path.join(directory, file)\n",
612
- " if os.path.isfile(file_path) and file_path.endswith(('.xls', '.xlsx')): # Excel 파일 확장자만 허용\n",
613
- " data.append(pd.read_excel(file_path, usecols=[\"지역\", '망', \"측정소코드\", \"측정소명\", \"측정일시\", \"O3\", \"NO2\", \"PM10\", \"PM25\", \"주소\"]))\n",
614
- " \n",
615
- " return pd.concat(data)\n",
616
- "\n",
617
- "years = [2018, 2019, 2020,2021,2022,2023] # 2018년부터 2023년까지의 데이터를 합친다.\n",
618
- "for year in tqdm(years):\n",
619
- " data = get_data(year)\n",
620
- " data = add_date(data)\n",
621
- " data.reset_index(drop=True, inplace=True)\n",
622
- " data.to_feather(f\"../../data/대기질/{year}.feather\")\n"
623
- ]
624
- },
625
- {
626
- "cell_type": "code",
627
- "execution_count": 6,
628
- "metadata": {},
629
- "outputs": [
630
- {
631
- "data": {
632
- "text/html": [
633
- "<div>\n",
634
- "<style scoped>\n",
635
- " .dataframe tbody tr th:only-of-type {\n",
636
- " vertical-align: middle;\n",
637
- " }\n",
638
- "\n",
639
- " .dataframe tbody tr th {\n",
640
- " vertical-align: top;\n",
641
- " }\n",
642
- "\n",
643
- " .dataframe thead th {\n",
644
- " text-align: right;\n",
645
- " }\n",
646
- "</style>\n",
647
- "<table border=\"1\" class=\"dataframe\">\n",
648
- " <thead>\n",
649
- " <tr style=\"text-align: right;\">\n",
650
- " <th></th>\n",
651
- " <th>지역</th>\n",
652
- " <th>망</th>\n",
653
- " <th>측정소코드</th>\n",
654
- " <th>측정소명</th>\n",
655
- " <th>측정일시</th>\n",
656
- " <th>O3</th>\n",
657
- " <th>NO2</th>\n",
658
- " <th>PM10</th>\n",
659
- " <th>PM25</th>\n",
660
- " <th>주소</th>\n",
661
- " <th>year</th>\n",
662
- " <th>month</th>\n",
663
- " <th>day</th>\n",
664
- " <th>hour</th>\n",
665
- " </tr>\n",
666
- " </thead>\n",
667
- " <tbody>\n",
668
- " <tr>\n",
669
- " <th>0</th>\n",
670
- " <td>서울 중구</td>\n",
671
- " <td>도시대기</td>\n",
672
- " <td>111121</td>\n",
673
- " <td>중구</td>\n",
674
- " <td>2023-07-01 01:00:00</td>\n",
675
- " <td>0.0249</td>\n",
676
- " <td>0.0188</td>\n",
677
- " <td>21.0</td>\n",
678
- " <td>19.0</td>\n",
679
- " <td>서울 중구 덕수궁길 15</td>\n",
680
- " <td>2023.0</td>\n",
681
- " <td>7.0</td>\n",
682
- " <td>1.0</td>\n",
683
- " <td>1.0</td>\n",
684
- " </tr>\n",
685
- " <tr>\n",
686
- " <th>1</th>\n",
687
- " <td>서울 중구</td>\n",
688
- " <td>도시대기</td>\n",
689
- " <td>111121</td>\n",
690
- " <td>중구</td>\n",
691
- " <td>2023-07-01 02:00:00</td>\n",
692
- " <td>0.0263</td>\n",
693
- " <td>0.0163</td>\n",
694
- " <td>18.0</td>\n",
695
- " <td>15.0</td>\n",
696
- " <td>서울 중구 덕수궁길 15</td>\n",
697
- " <td>2023.0</td>\n",
698
- " <td>7.0</td>\n",
699
- " <td>1.0</td>\n",
700
- " <td>2.0</td>\n",
701
- " </tr>\n",
702
- " <tr>\n",
703
- " <th>2</th>\n",
704
- " <td>서울 중구</td>\n",
705
- " <td>도시대기</td>\n",
706
- " <td>111121</td>\n",
707
- " <td>중구</td>\n",
708
- " <td>2023-07-01 03:00:00</td>\n",
709
- " <td>0.0218</td>\n",
710
- " <td>0.0192</td>\n",
711
- " <td>24.0</td>\n",
712
- " <td>21.0</td>\n",
713
- " <td>서울 중구 덕수궁길 15</td>\n",
714
- " <td>2023.0</td>\n",
715
- " <td>7.0</td>\n",
716
- " <td>1.0</td>\n",
717
- " <td>3.0</td>\n",
718
- " </tr>\n",
719
- " <tr>\n",
720
- " <th>3</th>\n",
721
- " <td>서울 중구</td>\n",
722
- " <td>도시대기</td>\n",
723
- " <td>111121</td>\n",
724
- " <td>중구</td>\n",
725
- " <td>2023-07-01 04:00:00</td>\n",
726
- " <td>0.0131</td>\n",
727
- " <td>0.0214</td>\n",
728
- " <td>25.0</td>\n",
729
- " <td>19.0</td>\n",
730
- " <td>서울 중구 덕수궁길 15</td>\n",
731
- " <td>2023.0</td>\n",
732
- " <td>7.0</td>\n",
733
- " <td>1.0</td>\n",
734
- " <td>4.0</td>\n",
735
- " </tr>\n",
736
- " <tr>\n",
737
- " <th>4</th>\n",
738
- " <td>서울 중구</td>\n",
739
- " <td>도시대기</td>\n",
740
- " <td>111121</td>\n",
741
- " <td>중구</td>\n",
742
- " <td>2023-07-01 05:00:00</td>\n",
743
- " <td>0.0131</td>\n",
744
- " <td>0.0160</td>\n",
745
- " <td>25.0</td>\n",
746
- " <td>21.0</td>\n",
747
- " <td>서울 중구 덕수궁길 15</td>\n",
748
- " <td>2023.0</td>\n",
749
- " <td>7.0</td>\n",
750
- " <td>1.0</td>\n",
751
- " <td>5.0</td>\n",
752
- " </tr>\n",
753
- " <tr>\n",
754
- " <th>5</th>\n",
755
- " <td>서울 중구</td>\n",
756
- " <td>도시대기</td>\n",
757
- " <td>111121</td>\n",
758
- " <td>중구</td>\n",
759
- " <td>2023-07-01 06:00:00</td>\n",
760
- " <td>0.0115</td>\n",
761
- " <td>0.0196</td>\n",
762
- " <td>23.0</td>\n",
763
- " <td>18.0</td>\n",
764
- " <td>서울 중구 덕수궁길 15</td>\n",
765
- " <td>2023.0</td>\n",
766
- " <td>7.0</td>\n",
767
- " <td>1.0</td>\n",
768
- " <td>6.0</td>\n",
769
- " </tr>\n",
770
- " <tr>\n",
771
- " <th>6</th>\n",
772
- " <td>서울 중구</td>\n",
773
- " <td>도시대기</td>\n",
774
- " <td>111121</td>\n",
775
- " <td>중구</td>\n",
776
- " <td>2023-07-01 07:00:00</td>\n",
777
- " <td>0.0094</td>\n",
778
- " <td>0.0230</td>\n",
779
- " <td>26.0</td>\n",
780
- " <td>21.0</td>\n",
781
- " <td>서울 중구 덕수궁길 15</td>\n",
782
- " <td>2023.0</td>\n",
783
- " <td>7.0</td>\n",
784
- " <td>1.0</td>\n",
785
- " <td>7.0</td>\n",
786
- " </tr>\n",
787
- " <tr>\n",
788
- " <th>7</th>\n",
789
- " <td>서울 중구</td>\n",
790
- " <td>도시대기</td>\n",
791
- " <td>111121</td>\n",
792
- " <td>중구</td>\n",
793
- " <td>2023-07-01 08:00:00</td>\n",
794
- " <td>0.0222</td>\n",
795
- " <td>0.0175</td>\n",
796
- " <td>26.0</td>\n",
797
- " <td>20.0</td>\n",
798
- " <td>서울 중구 덕수궁길 15</td>\n",
799
- " <td>2023.0</td>\n",
800
- " <td>7.0</td>\n",
801
- " <td>1.0</td>\n",
802
- " <td>8.0</td>\n",
803
- " </tr>\n",
804
- " <tr>\n",
805
- " <th>8</th>\n",
806
- " <td>서울 중구</td>\n",
807
- " <td>도시대기</td>\n",
808
- " <td>111121</td>\n",
809
- " <td>중구</td>\n",
810
- " <td>2023-07-01 09:00:00</td>\n",
811
- " <td>0.0396</td>\n",
812
- " <td>0.0153</td>\n",
813
- " <td>27.0</td>\n",
814
- " <td>20.0</td>\n",
815
- " <td>서울 중구 덕수궁길 15</td>\n",
816
- " <td>2023.0</td>\n",
817
- " <td>7.0</td>\n",
818
- " <td>1.0</td>\n",
819
- " <td>9.0</td>\n",
820
- " </tr>\n",
821
- " <tr>\n",
822
- " <th>9</th>\n",
823
- " <td>서울 중구</td>\n",
824
- " <td>도시대기</td>\n",
825
- " <td>111121</td>\n",
826
- " <td>중구</td>\n",
827
- " <td>2023-07-01 10:00:00</td>\n",
828
- " <td>0.0530</td>\n",
829
- " <td>0.0105</td>\n",
830
- " <td>19.0</td>\n",
831
- " <td>16.0</td>\n",
832
- " <td>서울 중구 덕수궁길 15</td>\n",
833
- " <td>2023.0</td>\n",
834
- " <td>7.0</td>\n",
835
- " <td>1.0</td>\n",
836
- " <td>10.0</td>\n",
837
- " </tr>\n",
838
- " <tr>\n",
839
- " <th>10</th>\n",
840
- " <td>서울 중구</td>\n",
841
- " <td>도시대기</td>\n",
842
- " <td>111121</td>\n",
843
- " <td>중구</td>\n",
844
- " <td>2023-07-01 11:00:00</td>\n",
845
- " <td>0.0607</td>\n",
846
- " <td>0.0090</td>\n",
847
- " <td>20.0</td>\n",
848
- " <td>20.0</td>\n",
849
- " <td>서울 중구 덕수궁길 15</td>\n",
850
- " <td>2023.0</td>\n",
851
- " <td>7.0</td>\n",
852
- " <td>1.0</td>\n",
853
- " <td>11.0</td>\n",
854
- " </tr>\n",
855
- " <tr>\n",
856
- " <th>11</th>\n",
857
- " <td>서울 중구</td>\n",
858
- " <td>도시대기</td>\n",
859
- " <td>111121</td>\n",
860
- " <td>중구</td>\n",
861
- " <td>2023-07-01 12:00:00</td>\n",
862
- " <td>0.0688</td>\n",
863
- " <td>0.0114</td>\n",
864
- " <td>20.0</td>\n",
865
- " <td>17.0</td>\n",
866
- " <td>서울 중구 덕수궁길 15</td>\n",
867
- " <td>2023.0</td>\n",
868
- " <td>7.0</td>\n",
869
- " <td>1.0</td>\n",
870
- " <td>12.0</td>\n",
871
- " </tr>\n",
872
- " <tr>\n",
873
- " <th>12</th>\n",
874
- " <td>서울 중구</td>\n",
875
- " <td>도시대기</td>\n",
876
- " <td>111121</td>\n",
877
- " <td>중구</td>\n",
878
- " <td>2023-07-01 13:00:00</td>\n",
879
- " <td>0.0758</td>\n",
880
- " <td>0.0101</td>\n",
881
- " <td>23.0</td>\n",
882
- " <td>17.0</td>\n",
883
- " <td>서울 중구 덕수궁길 15</td>\n",
884
- " <td>2023.0</td>\n",
885
- " <td>7.0</td>\n",
886
- " <td>1.0</td>\n",
887
- " <td>13.0</td>\n",
888
- " </tr>\n",
889
- " <tr>\n",
890
- " <th>13</th>\n",
891
- " <td>서울 중구</td>\n",
892
- " <td>도시대기</td>\n",
893
- " <td>111121</td>\n",
894
- " <td>중구</td>\n",
895
- " <td>2023-07-01 14:00:00</td>\n",
896
- " <td>0.0743</td>\n",
897
- " <td>0.0093</td>\n",
898
- " <td>20.0</td>\n",
899
- " <td>17.0</td>\n",
900
- " <td>서울 중구 덕수궁길 15</td>\n",
901
- " <td>2023.0</td>\n",
902
- " <td>7.0</td>\n",
903
- " <td>1.0</td>\n",
904
- " <td>14.0</td>\n",
905
- " </tr>\n",
906
- " <tr>\n",
907
- " <th>14</th>\n",
908
- " <td>서울 중구</td>\n",
909
- " <td>도시대기</td>\n",
910
- " <td>111121</td>\n",
911
- " <td>중구</td>\n",
912
- " <td>2023-07-01 15:00:00</td>\n",
913
- " <td>0.0749</td>\n",
914
- " <td>0.0100</td>\n",
915
- " <td>19.0</td>\n",
916
- " <td>11.0</td>\n",
917
- " <td>서울 중구 덕수궁길 15</td>\n",
918
- " <td>2023.0</td>\n",
919
- " <td>7.0</td>\n",
920
- " <td>1.0</td>\n",
921
- " <td>15.0</td>\n",
922
- " </tr>\n",
923
- " <tr>\n",
924
- " <th>15</th>\n",
925
- " <td>서울 중구</td>\n",
926
- " <td>도시대기</td>\n",
927
- " <td>111121</td>\n",
928
- " <td>중구</td>\n",
929
- " <td>2023-07-01 16:00:00</td>\n",
930
- " <td>0.0716</td>\n",
931
- " <td>0.0092</td>\n",
932
- " <td>19.0</td>\n",
933
- " <td>15.0</td>\n",
934
- " <td>서울 중구 덕수궁길 15</td>\n",
935
- " <td>2023.0</td>\n",
936
- " <td>7.0</td>\n",
937
- " <td>1.0</td>\n",
938
- " <td>16.0</td>\n",
939
- " </tr>\n",
940
- " <tr>\n",
941
- " <th>16</th>\n",
942
- " <td>서울 중구</td>\n",
943
- " <td>도시대기</td>\n",
944
- " <td>111121</td>\n",
945
- " <td>중구</td>\n",
946
- " <td>2023-07-01 17:00:00</td>\n",
947
- " <td>0.0613</td>\n",
948
- " <td>0.0099</td>\n",
949
- " <td>18.0</td>\n",
950
- " <td>15.0</td>\n",
951
- " <td>서울 중구 덕수궁길 15</td>\n",
952
- " <td>2023.0</td>\n",
953
- " <td>7.0</td>\n",
954
- " <td>1.0</td>\n",
955
- " <td>17.0</td>\n",
956
- " </tr>\n",
957
- " <tr>\n",
958
- " <th>17</th>\n",
959
- " <td>서울 중구</td>\n",
960
- " <td>도시대기</td>\n",
961
- " <td>111121</td>\n",
962
- " <td>중구</td>\n",
963
- " <td>2023-07-01 18:00:00</td>\n",
964
- " <td>0.0496</td>\n",
965
- " <td>0.0098</td>\n",
966
- " <td>18.0</td>\n",
967
- " <td>14.0</td>\n",
968
- " <td>서울 중구 덕수궁길 15</td>\n",
969
- " <td>2023.0</td>\n",
970
- " <td>7.0</td>\n",
971
- " <td>1.0</td>\n",
972
- " <td>18.0</td>\n",
973
- " </tr>\n",
974
- " <tr>\n",
975
- " <th>18</th>\n",
976
- " <td>서울 중구</td>\n",
977
- " <td>도시대기</td>\n",
978
- " <td>111121</td>\n",
979
- " <td>중구</td>\n",
980
- " <td>2023-07-01 19:00:00</td>\n",
981
- " <td>0.0473</td>\n",
982
- " <td>0.0124</td>\n",
983
- " <td>17.0</td>\n",
984
- " <td>17.0</td>\n",
985
- " <td>서울 중구 덕수궁길 15</td>\n",
986
- " <td>2023.0</td>\n",
987
- " <td>7.0</td>\n",
988
- " <td>1.0</td>\n",
989
- " <td>19.0</td>\n",
990
- " </tr>\n",
991
- " <tr>\n",
992
- " <th>19</th>\n",
993
- " <td>서울 중구</td>\n",
994
- " <td>도시대기</td>\n",
995
- " <td>111121</td>\n",
996
- " <td>중구</td>\n",
997
- " <td>2023-07-01 20:00:00</td>\n",
998
- " <td>0.0498</td>\n",
999
- " <td>0.0170</td>\n",
1000
- " <td>17.0</td>\n",
1001
- " <td>15.0</td>\n",
1002
- " <td>서울 중구 덕수궁길 15</td>\n",
1003
- " <td>2023.0</td>\n",
1004
- " <td>7.0</td>\n",
1005
- " <td>1.0</td>\n",
1006
- " <td>20.0</td>\n",
1007
- " </tr>\n",
1008
- " <tr>\n",
1009
- " <th>20</th>\n",
1010
- " <td>서울 중구</td>\n",
1011
- " <td>도시대기</td>\n",
1012
- " <td>111121</td>\n",
1013
- " <td>중구</td>\n",
1014
- " <td>2023-07-01 21:00:00</td>\n",
1015
- " <td>0.0616</td>\n",
1016
- " <td>0.0134</td>\n",
1017
- " <td>23.0</td>\n",
1018
- " <td>20.0</td>\n",
1019
- " <td>서울 중구 덕수궁길 15</td>\n",
1020
- " <td>2023.0</td>\n",
1021
- " <td>7.0</td>\n",
1022
- " <td>1.0</td>\n",
1023
- " <td>21.0</td>\n",
1024
- " </tr>\n",
1025
- " <tr>\n",
1026
- " <th>21</th>\n",
1027
- " <td>서울 중구</td>\n",
1028
- " <td>도시대기</td>\n",
1029
- " <td>111121</td>\n",
1030
- " <td>중구</td>\n",
1031
- " <td>2023-07-01 22:00:00</td>\n",
1032
- " <td>0.0543</td>\n",
1033
- " <td>0.0109</td>\n",
1034
- " <td>18.0</td>\n",
1035
- " <td>16.0</td>\n",
1036
- " <td>서울 중구 덕수궁길 15</td>\n",
1037
- " <td>2023.0</td>\n",
1038
- " <td>7.0</td>\n",
1039
- " <td>1.0</td>\n",
1040
- " <td>22.0</td>\n",
1041
- " </tr>\n",
1042
- " <tr>\n",
1043
- " <th>22</th>\n",
1044
- " <td>서울 중구</td>\n",
1045
- " <td>도시대기</td>\n",
1046
- " <td>111121</td>\n",
1047
- " <td>중구</td>\n",
1048
- " <td>2023-07-01 23:00:00</td>\n",
1049
- " <td>0.0507</td>\n",
1050
- " <td>0.0113</td>\n",
1051
- " <td>17.0</td>\n",
1052
- " <td>16.0</td>\n",
1053
- " <td>서울 중구 덕수궁길 15</td>\n",
1054
- " <td>2023.0</td>\n",
1055
- " <td>7.0</td>\n",
1056
- " <td>1.0</td>\n",
1057
- " <td>23.0</td>\n",
1058
- " </tr>\n",
1059
- " <tr>\n",
1060
- " <th>23</th>\n",
1061
- " <td>서울 중구</td>\n",
1062
- " <td>도시대기</td>\n",
1063
- " <td>111121</td>\n",
1064
- " <td>중구</td>\n",
1065
- " <td>NaT</td>\n",
1066
- " <td>0.0427</td>\n",
1067
- " <td>0.0125</td>\n",
1068
- " <td>17.0</td>\n",
1069
- " <td>16.0</td>\n",
1070
- " <td>서울 중구 덕수궁길 15</td>\n",
1071
- " <td>NaN</td>\n",
1072
- " <td>NaN</td>\n",
1073
- " <td>NaN</td>\n",
1074
- " <td>NaN</td>\n",
1075
- " </tr>\n",
1076
- " <tr>\n",
1077
- " <th>24</th>\n",
1078
- " <td>서울 중구</td>\n",
1079
- " <td>도시대기</td>\n",
1080
- " <td>111121</td>\n",
1081
- " <td>중구</td>\n",
1082
- " <td>2023-07-02 01:00:00</td>\n",
1083
- " <td>0.0334</td>\n",
1084
- " <td>0.0148</td>\n",
1085
- " <td>21.0</td>\n",
1086
- " <td>20.0</td>\n",
1087
- " <td>서울 중구 덕수궁길 15</td>\n",
1088
- " <td>2023.0</td>\n",
1089
- " <td>7.0</td>\n",
1090
- " <td>2.0</td>\n",
1091
- " <td>1.0</td>\n",
1092
- " </tr>\n",
1093
- " <tr>\n",
1094
- " <th>25</th>\n",
1095
- " <td>서울 중구</td>\n",
1096
- " <td>도시대기</td>\n",
1097
- " <td>111121</td>\n",
1098
- " <td>중구</td>\n",
1099
- " <td>2023-07-02 02:00:00</td>\n",
1100
- " <td>0.0337</td>\n",
1101
- " <td>0.0133</td>\n",
1102
- " <td>22.0</td>\n",
1103
- " <td>18.0</td>\n",
1104
- " <td>서울 중구 덕수궁길 15</td>\n",
1105
- " <td>2023.0</td>\n",
1106
- " <td>7.0</td>\n",
1107
- " <td>2.0</td>\n",
1108
- " <td>2.0</td>\n",
1109
- " </tr>\n",
1110
- " <tr>\n",
1111
- " <th>26</th>\n",
1112
- " <td>서울 중구</td>\n",
1113
- " <td>도시대기</td>\n",
1114
- " <td>111121</td>\n",
1115
- " <td>중구</td>\n",
1116
- " <td>2023-07-02 03:00:00</td>\n",
1117
- " <td>0.0260</td>\n",
1118
- " <td>0.0162</td>\n",
1119
- " <td>25.0</td>\n",
1120
- " <td>20.0</td>\n",
1121
- " <td>서울 중구 덕수궁길 15</td>\n",
1122
- " <td>2023.0</td>\n",
1123
- " <td>7.0</td>\n",
1124
- " <td>2.0</td>\n",
1125
- " <td>3.0</td>\n",
1126
- " </tr>\n",
1127
- " <tr>\n",
1128
- " <th>27</th>\n",
1129
- " <td>서울 중구</td>\n",
1130
- " <td>도시대기</td>\n",
1131
- " <td>111121</td>\n",
1132
- " <td>중구</td>\n",
1133
- " <td>2023-07-02 04:00:00</td>\n",
1134
- " <td>0.0195</td>\n",
1135
- " <td>0.0179</td>\n",
1136
- " <td>22.0</td>\n",
1137
- " <td>18.0</td>\n",
1138
- " <td>서울 중구 덕수궁길 15</td>\n",
1139
- " <td>2023.0</td>\n",
1140
- " <td>7.0</td>\n",
1141
- " <td>2.0</td>\n",
1142
- " <td>4.0</td>\n",
1143
- " </tr>\n",
1144
- " <tr>\n",
1145
- " <th>28</th>\n",
1146
- " <td>서울 중구</td>\n",
1147
- " <td>도시대기</td>\n",
1148
- " <td>111121</td>\n",
1149
- " <td>중구</td>\n",
1150
- " <td>2023-07-02 05:00:00</td>\n",
1151
- " <td>0.0171</td>\n",
1152
- " <td>0.0170</td>\n",
1153
- " <td>19.0</td>\n",
1154
- " <td>17.0</td>\n",
1155
- " <td>서울 중구 덕수궁길 15</td>\n",
1156
- " <td>2023.0</td>\n",
1157
- " <td>7.0</td>\n",
1158
- " <td>2.0</td>\n",
1159
- " <td>5.0</td>\n",
1160
- " </tr>\n",
1161
- " <tr>\n",
1162
- " <th>29</th>\n",
1163
- " <td>서울 중구</td>\n",
1164
- " <td>도시대기</td>\n",
1165
- " <td>111121</td>\n",
1166
- " <td>중구</td>\n",
1167
- " <td>2023-07-02 06:00:00</td>\n",
1168
- " <td>0.0181</td>\n",
1169
- " <td>0.0145</td>\n",
1170
- " <td>14.0</td>\n",
1171
- " <td>10.0</td>\n",
1172
- " <td>서울 중구 덕수궁길 15</td>\n",
1173
- " <td>2023.0</td>\n",
1174
- " <td>7.0</td>\n",
1175
- " <td>2.0</td>\n",
1176
- " <td>6.0</td>\n",
1177
- " </tr>\n",
1178
- " <tr>\n",
1179
- " <th>30</th>\n",
1180
- " <td>서울 중구</td>\n",
1181
- " <td>도시대기</td>\n",
1182
- " <td>111121</td>\n",
1183
- " <td>중구</td>\n",
1184
- " <td>2023-07-02 07:00:00</td>\n",
1185
- " <td>0.0174</td>\n",
1186
- " <td>0.0156</td>\n",
1187
- " <td>11.0</td>\n",
1188
- " <td>10.0</td>\n",
1189
- " <td>서울 중구 덕수궁길 15</td>\n",
1190
- " <td>2023.0</td>\n",
1191
- " <td>7.0</td>\n",
1192
- " <td>2.0</td>\n",
1193
- " <td>7.0</td>\n",
1194
- " </tr>\n",
1195
- " <tr>\n",
1196
- " <th>31</th>\n",
1197
- " <td>서울 중구</td>\n",
1198
- " <td>도시대기</td>\n",
1199
- " <td>111121</td>\n",
1200
- " <td>중구</td>\n",
1201
- " <td>2023-07-02 08:00:00</td>\n",
1202
- " <td>0.0213</td>\n",
1203
- " <td>0.0147</td>\n",
1204
- " <td>12.0</td>\n",
1205
- " <td>9.0</td>\n",
1206
- " <td>서울 중구 덕수궁길 15</td>\n",
1207
- " <td>2023.0</td>\n",
1208
- " <td>7.0</td>\n",
1209
- " <td>2.0</td>\n",
1210
- " <td>8.0</td>\n",
1211
- " </tr>\n",
1212
- " <tr>\n",
1213
- " <th>32</th>\n",
1214
- " <td>서울 중구</td>\n",
1215
- " <td>도시대기</td>\n",
1216
- " <td>111121</td>\n",
1217
- " <td>중구</td>\n",
1218
- " <td>2023-07-02 09:00:00</td>\n",
1219
- " <td>0.0267</td>\n",
1220
- " <td>0.0143</td>\n",
1221
- " <td>11.0</td>\n",
1222
- " <td>10.0</td>\n",
1223
- " <td>서울 중구 덕수궁길 15</td>\n",
1224
- " <td>2023.0</td>\n",
1225
- " <td>7.0</td>\n",
1226
- " <td>2.0</td>\n",
1227
- " <td>9.0</td>\n",
1228
- " </tr>\n",
1229
- " <tr>\n",
1230
- " <th>33</th>\n",
1231
- " <td>서울 중구</td>\n",
1232
- " <td>도시대기</td>\n",
1233
- " <td>111121</td>\n",
1234
- " <td>중구</td>\n",
1235
- " <td>2023-07-02 10:00:00</td>\n",
1236
- " <td>0.0289</td>\n",
1237
- " <td>0.0155</td>\n",
1238
- " <td>12.0</td>\n",
1239
- " <td>9.0</td>\n",
1240
- " <td>서울 중구 덕수궁길 15</td>\n",
1241
- " <td>2023.0</td>\n",
1242
- " <td>7.0</td>\n",
1243
- " <td>2.0</td>\n",
1244
- " <td>10.0</td>\n",
1245
- " </tr>\n",
1246
- " <tr>\n",
1247
- " <th>34</th>\n",
1248
- " <td>서울 중구</td>\n",
1249
- " <td>도시대기</td>\n",
1250
- " <td>111121</td>\n",
1251
- " <td>중구</td>\n",
1252
- " <td>2023-07-02 11:00:00</td>\n",
1253
- " <td>0.0381</td>\n",
1254
- " <td>0.0108</td>\n",
1255
- " <td>13.0</td>\n",
1256
- " <td>13.0</td>\n",
1257
- " <td>서울 중구 덕수궁길 15</td>\n",
1258
- " <td>2023.0</td>\n",
1259
- " <td>7.0</td>\n",
1260
- " <td>2.0</td>\n",
1261
- " <td>11.0</td>\n",
1262
- " </tr>\n",
1263
- " <tr>\n",
1264
- " <th>35</th>\n",
1265
- " <td>서울 중구</td>\n",
1266
- " <td>도시대기</td>\n",
1267
- " <td>111121</td>\n",
1268
- " <td>중구</td>\n",
1269
- " <td>2023-07-02 12:00:00</td>\n",
1270
- " <td>0.0441</td>\n",
1271
- " <td>0.0079</td>\n",
1272
- " <td>13.0</td>\n",
1273
- " <td>12.0</td>\n",
1274
- " <td>서울 중구 덕수궁길 15</td>\n",
1275
- " <td>2023.0</td>\n",
1276
- " <td>7.0</td>\n",
1277
- " <td>2.0</td>\n",
1278
- " <td>12.0</td>\n",
1279
- " </tr>\n",
1280
- " <tr>\n",
1281
- " <th>36</th>\n",
1282
- " <td>서울 중구</td>\n",
1283
- " <td>도시대기</td>\n",
1284
- " <td>111121</td>\n",
1285
- " <td>중구</td>\n",
1286
- " <td>2023-07-02 13:00:00</td>\n",
1287
- " <td>0.0489</td>\n",
1288
- " <td>0.0067</td>\n",
1289
- " <td>8.0</td>\n",
1290
- " <td>10.0</td>\n",
1291
- " <td>서울 중구 덕수궁길 15</td>\n",
1292
- " <td>2023.0</td>\n",
1293
- " <td>7.0</td>\n",
1294
- " <td>2.0</td>\n",
1295
- " <td>13.0</td>\n",
1296
- " </tr>\n",
1297
- " <tr>\n",
1298
- " <th>37</th>\n",
1299
- " <td>서울 중구</td>\n",
1300
- " <td>도시대기</td>\n",
1301
- " <td>111121</td>\n",
1302
- " <td>중구</td>\n",
1303
- " <td>2023-07-02 14:00:00</td>\n",
1304
- " <td>0.0498</td>\n",
1305
- " <td>0.0072</td>\n",
1306
- " <td>11.0</td>\n",
1307
- " <td>10.0</td>\n",
1308
- " <td>서울 중구 덕수궁길 15</td>\n",
1309
- " <td>2023.0</td>\n",
1310
- " <td>7.0</td>\n",
1311
- " <td>2.0</td>\n",
1312
- " <td>14.0</td>\n",
1313
- " </tr>\n",
1314
- " <tr>\n",
1315
- " <th>38</th>\n",
1316
- " <td>서울 중구</td>\n",
1317
- " <td>도시대기</td>\n",
1318
- " <td>111121</td>\n",
1319
- " <td>중구</td>\n",
1320
- " <td>2023-07-02 15:00:00</td>\n",
1321
- " <td>0.0459</td>\n",
1322
- " <td>0.0073</td>\n",
1323
- " <td>14.0</td>\n",
1324
- " <td>12.0</td>\n",
1325
- " <td>서울 중구 덕수궁길 15</td>\n",
1326
- " <td>2023.0</td>\n",
1327
- " <td>7.0</td>\n",
1328
- " <td>2.0</td>\n",
1329
- " <td>15.0</td>\n",
1330
- " </tr>\n",
1331
- " <tr>\n",
1332
- " <th>39</th>\n",
1333
- " <td>서울 중구</td>\n",
1334
- " <td>도시대기</td>\n",
1335
- " <td>111121</td>\n",
1336
- " <td>중구</td>\n",
1337
- " <td>2023-07-02 16:00:00</td>\n",
1338
- " <td>0.0474</td>\n",
1339
- " <td>0.0079</td>\n",
1340
- " <td>12.0</td>\n",
1341
- " <td>11.0</td>\n",
1342
- " <td>서울 중구 덕수궁길 15</td>\n",
1343
- " <td>2023.0</td>\n",
1344
- " <td>7.0</td>\n",
1345
- " <td>2.0</td>\n",
1346
- " <td>16.0</td>\n",
1347
- " </tr>\n",
1348
- " </tbody>\n",
1349
- "</table>\n",
1350
- "</div>"
1351
- ],
1352
- "text/plain": [
1353
- " 지역 망 측정소코드 측정소명 측정일시 O3 NO2 PM10 PM25 \\\n",
1354
- "0 서울 중구 도시대기 111121 중구 2023-07-01 01:00:00 0.0249 0.0188 21.0 19.0 \n",
1355
- "1 서울 중구 도시대기 111121 중구 2023-07-01 02:00:00 0.0263 0.0163 18.0 15.0 \n",
1356
- "2 서울 중구 도시대기 111121 중구 2023-07-01 03:00:00 0.0218 0.0192 24.0 21.0 \n",
1357
- "3 서울 중구 도시대기 111121 중구 2023-07-01 04:00:00 0.0131 0.0214 25.0 19.0 \n",
1358
- "4 서울 중구 도시대기 111121 중구 2023-07-01 05:00:00 0.0131 0.0160 25.0 21.0 \n",
1359
- "5 서울 중구 도시대기 111121 중구 2023-07-01 06:00:00 0.0115 0.0196 23.0 18.0 \n",
1360
- "6 서울 중구 도시대기 111121 중구 2023-07-01 07:00:00 0.0094 0.0230 26.0 21.0 \n",
1361
- "7 서울 중구 도시대기 111121 중구 2023-07-01 08:00:00 0.0222 0.0175 26.0 20.0 \n",
1362
- "8 서울 중구 도시대기 111121 중구 2023-07-01 09:00:00 0.0396 0.0153 27.0 20.0 \n",
1363
- "9 서울 중구 도시대기 111121 중구 2023-07-01 10:00:00 0.0530 0.0105 19.0 16.0 \n",
1364
- "10 서울 중구 도시대기 111121 중구 2023-07-01 11:00:00 0.0607 0.0090 20.0 20.0 \n",
1365
- "11 서울 중구 도시대기 111121 중구 2023-07-01 12:00:00 0.0688 0.0114 20.0 17.0 \n",
1366
- "12 서울 중구 도시대기 111121 중구 2023-07-01 13:00:00 0.0758 0.0101 23.0 17.0 \n",
1367
- "13 서울 중구 도시대기 111121 중구 2023-07-01 14:00:00 0.0743 0.0093 20.0 17.0 \n",
1368
- "14 서울 중구 도시대기 111121 중구 2023-07-01 15:00:00 0.0749 0.0100 19.0 11.0 \n",
1369
- "15 서울 중구 도시대기 111121 중구 2023-07-01 16:00:00 0.0716 0.0092 19.0 15.0 \n",
1370
- "16 서울 중구 도시대기 111121 중구 2023-07-01 17:00:00 0.0613 0.0099 18.0 15.0 \n",
1371
- "17 서울 중구 도시대기 111121 중구 2023-07-01 18:00:00 0.0496 0.0098 18.0 14.0 \n",
1372
- "18 서울 중구 도시대기 111121 중구 2023-07-01 19:00:00 0.0473 0.0124 17.0 17.0 \n",
1373
- "19 서울 중구 도시대기 111121 중구 2023-07-01 20:00:00 0.0498 0.0170 17.0 15.0 \n",
1374
- "20 서울 중구 도시대기 111121 중구 2023-07-01 21:00:00 0.0616 0.0134 23.0 20.0 \n",
1375
- "21 서울 중구 도시대기 111121 중구 2023-07-01 22:00:00 0.0543 0.0109 18.0 16.0 \n",
1376
- "22 서울 중구 도시대기 111121 중구 2023-07-01 23:00:00 0.0507 0.0113 17.0 16.0 \n",
1377
- "23 서울 중구 도시대기 111121 중구 NaT 0.0427 0.0125 17.0 16.0 \n",
1378
- "24 서울 중구 도시대기 111121 중구 2023-07-02 01:00:00 0.0334 0.0148 21.0 20.0 \n",
1379
- "25 서울 중구 도시대기 111121 중구 2023-07-02 02:00:00 0.0337 0.0133 22.0 18.0 \n",
1380
- "26 서울 중구 도시대기 111121 중구 2023-07-02 03:00:00 0.0260 0.0162 25.0 20.0 \n",
1381
- "27 서울 중구 도시대기 111121 중구 2023-07-02 04:00:00 0.0195 0.0179 22.0 18.0 \n",
1382
- "28 서울 중구 도시대기 111121 중구 2023-07-02 05:00:00 0.0171 0.0170 19.0 17.0 \n",
1383
- "29 서울 중구 도시대기 111121 중구 2023-07-02 06:00:00 0.0181 0.0145 14.0 10.0 \n",
1384
- "30 서울 중구 도시대기 111121 중구 2023-07-02 07:00:00 0.0174 0.0156 11.0 10.0 \n",
1385
- "31 서울 중구 도시대기 111121 중구 2023-07-02 08:00:00 0.0213 0.0147 12.0 9.0 \n",
1386
- "32 서울 중구 도시대기 111121 중구 2023-07-02 09:00:00 0.0267 0.0143 11.0 10.0 \n",
1387
- "33 서울 중구 도시대기 111121 중구 2023-07-02 10:00:00 0.0289 0.0155 12.0 9.0 \n",
1388
- "34 서울 중구 도시대기 111121 중구 2023-07-02 11:00:00 0.0381 0.0108 13.0 13.0 \n",
1389
- "35 서울 중구 도시대기 111121 중구 2023-07-02 12:00:00 0.0441 0.0079 13.0 12.0 \n",
1390
- "36 서울 중구 도시대기 111121 중구 2023-07-02 13:00:00 0.0489 0.0067 8.0 10.0 \n",
1391
- "37 서울 중구 도시대기 111121 중구 2023-07-02 14:00:00 0.0498 0.0072 11.0 10.0 \n",
1392
- "38 서울 중구 도시대기 111121 중구 2023-07-02 15:00:00 0.0459 0.0073 14.0 12.0 \n",
1393
- "39 서울 중구 도시대기 111121 중구 2023-07-02 16:00:00 0.0474 0.0079 12.0 11.0 \n",
1394
- "\n",
1395
- " 주소 year month day hour \n",
1396
- "0 서울 중구 덕수궁길 15 2023.0 7.0 1.0 1.0 \n",
1397
- "1 서울 중구 덕수궁길 15 2023.0 7.0 1.0 2.0 \n",
1398
- "2 서울 중구 덕수궁길 15 2023.0 7.0 1.0 3.0 \n",
1399
- "3 서울 중구 덕수궁길 15 2023.0 7.0 1.0 4.0 \n",
1400
- "4 서울 중구 덕수궁길 15 2023.0 7.0 1.0 5.0 \n",
1401
- "5 서울 중구 덕수궁길 15 2023.0 7.0 1.0 6.0 \n",
1402
- "6 서울 중구 덕수궁길 15 2023.0 7.0 1.0 7.0 \n",
1403
- "7 서울 중구 덕수궁길 15 2023.0 7.0 1.0 8.0 \n",
1404
- "8 서울 중구 덕수궁길 15 2023.0 7.0 1.0 9.0 \n",
1405
- "9 서울 중구 덕수궁길 15 2023.0 7.0 1.0 10.0 \n",
1406
- "10 서울 중구 덕수궁길 15 2023.0 7.0 1.0 11.0 \n",
1407
- "11 서울 중구 덕수궁길 15 2023.0 7.0 1.0 12.0 \n",
1408
- "12 서울 중구 덕수궁길 15 2023.0 7.0 1.0 13.0 \n",
1409
- "13 서울 중구 덕수궁길 15 2023.0 7.0 1.0 14.0 \n",
1410
- "14 서울 중구 덕수궁길 15 2023.0 7.0 1.0 15.0 \n",
1411
- "15 서울 중구 덕수궁길 15 2023.0 7.0 1.0 16.0 \n",
1412
- "16 서울 중구 덕수궁길 15 2023.0 7.0 1.0 17.0 \n",
1413
- "17 서울 중구 덕수궁길 15 2023.0 7.0 1.0 18.0 \n",
1414
- "18 서울 중구 덕수궁길 15 2023.0 7.0 1.0 19.0 \n",
1415
- "19 서울 중구 덕수궁길 15 2023.0 7.0 1.0 20.0 \n",
1416
- "20 서울 중구 덕수궁길 15 2023.0 7.0 1.0 21.0 \n",
1417
- "21 서울 중구 덕수궁길 15 2023.0 7.0 1.0 22.0 \n",
1418
- "22 서울 중구 덕수궁길 15 2023.0 7.0 1.0 23.0 \n",
1419
- "23 서울 중구 덕수궁길 15 NaN NaN NaN NaN \n",
1420
- "24 서울 중구 덕수궁길 15 2023.0 7.0 2.0 1.0 \n",
1421
- "25 서울 중구 덕수궁길 15 2023.0 7.0 2.0 2.0 \n",
1422
- "26 서울 중구 덕수궁길 15 2023.0 7.0 2.0 3.0 \n",
1423
- "27 서울 중구 덕수궁길 15 2023.0 7.0 2.0 4.0 \n",
1424
- "28 서울 중구 덕수궁길 15 2023.0 7.0 2.0 5.0 \n",
1425
- "29 서울 중구 덕수궁길 15 2023.0 7.0 2.0 6.0 \n",
1426
- "30 서울 중구 덕수궁길 15 2023.0 7.0 2.0 7.0 \n",
1427
- "31 서울 중구 덕수궁길 15 2023.0 7.0 2.0 8.0 \n",
1428
- "32 서울 중구 덕수궁길 15 2023.0 7.0 2.0 9.0 \n",
1429
- "33 서울 중구 덕수궁길 15 2023.0 7.0 2.0 10.0 \n",
1430
- "34 서울 중구 덕수궁길 15 2023.0 7.0 2.0 11.0 \n",
1431
- "35 서울 중구 덕수궁길 15 2023.0 7.0 2.0 12.0 \n",
1432
- "36 서울 중구 덕수궁길 15 2023.0 7.0 2.0 13.0 \n",
1433
- "37 서울 중구 덕수궁길 15 2023.0 7.0 2.0 14.0 \n",
1434
- "38 서울 중구 덕수궁길 15 2023.0 7.0 2.0 15.0 \n",
1435
- "39 서울 중구 덕수궁길 15 2023.0 7.0 2.0 16.0 "
1436
- ]
1437
- },
1438
- "execution_count": 6,
1439
- "metadata": {},
1440
- "output_type": "execute_result"
1441
- }
1442
- ],
1443
- "source": [
1444
- "data.head(40)"
1445
- ]
1446
- }
1447
- ],
1448
- "metadata": {
1449
- "kernelspec": {
1450
- "display_name": "py39",
1451
- "language": "python",
1452
- "name": "python3"
1453
- },
1454
- "language_info": {
1455
- "codemirror_mode": {
1456
- "name": "ipython",
1457
- "version": 3
1458
- },
1459
- "file_extension": ".py",
1460
- "mimetype": "text/x-python",
1461
- "name": "python",
1462
- "nbconvert_exporter": "python",
1463
- "pygments_lexer": "ipython3",
1464
- "version": "3.9.18"
1465
- }
1466
- },
1467
- "nbformat": 4,
1468
- "nbformat_minor": 4
1469
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:add9940cd69df5b8475875a3732861be7dd1f0f81b10dbcd669e08fb8434925b
3
+ size 65675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Analysis_code/1.data_preprocessing/1.data_merge.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
Analysis_code/1.data_preprocessing/3.make_train_test.ipynb CHANGED
@@ -1,1099 +1,3 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import pandas as pd\n",
10
- "import numpy as np\n",
11
- "import matplotlib.pyplot as plt\n",
12
- "import seaborn as sns\n",
13
- "from sklearn.model_selection import train_test_split\n",
14
- "from collections import Counter"
15
- ]
16
- },
17
- {
18
- "cell_type": "code",
19
- "execution_count": 2,
20
- "metadata": {},
21
- "outputs": [],
22
- "source": [
23
- "df_seoul = pd.read_feather(\"../../data/data_for_modeling/df_seoul.feather\")\n",
24
- "df_busan = pd.read_feather(\"../../data/data_for_modeling/df_busan.feather\")\n",
25
- "df_incheon = pd.read_feather(\"../../data/data_for_modeling/df_incheon.feather\")\n",
26
- "df_daegu = pd.read_feather(\"../../data/data_for_modeling/df_daegu.feather\")\n",
27
- "df_daejeon = pd.read_feather(\"../../data/data_for_modeling/df_daejeon.feather\")\n",
28
- "df_gwangju = pd.read_feather(\"../../data/data_for_modeling/df_gwangju.feather\")"
29
- ]
30
- },
31
- {
32
- "cell_type": "code",
33
- "execution_count": 3,
34
- "metadata": {},
35
- "outputs": [
36
- {
37
- "data": {
38
- "text/plain": [
39
- "Counter({2: 48534, 1: 3941, 0: 109})"
40
- ]
41
- },
42
- "execution_count": 3,
43
- "metadata": {},
44
- "output_type": "execute_result"
45
- }
46
- ],
47
- "source": [
48
- "Counter(df_seoul['multi_class'])"
49
- ]
50
- },
51
- {
52
- "cell_type": "code",
53
- "execution_count": 4,
54
- "metadata": {},
55
- "outputs": [
56
- {
57
- "data": {
58
- "text/plain": [
59
- "Counter({2: 50069, 1: 2350, 0: 165})"
60
- ]
61
- },
62
- "execution_count": 4,
63
- "metadata": {},
64
- "output_type": "execute_result"
65
- }
66
- ],
67
- "source": [
68
- "Counter(df_busan['multi_class'])"
69
- ]
70
- },
71
- {
72
- "cell_type": "code",
73
- "execution_count": 5,
74
- "metadata": {},
75
- "outputs": [
76
- {
77
- "data": {
78
- "text/plain": [
79
- "Counter({2: 44944, 1: 6658, 0: 982})"
80
- ]
81
- },
82
- "execution_count": 5,
83
- "metadata": {},
84
- "output_type": "execute_result"
85
- }
86
- ],
87
- "source": [
88
- "Counter(df_incheon['multi_class'])"
89
- ]
90
- },
91
- {
92
- "cell_type": "code",
93
- "execution_count": 6,
94
- "metadata": {},
95
- "outputs": [
96
- {
97
- "data": {
98
- "text/plain": [
99
- "Counter({2: 50919, 1: 1610, 0: 55})"
100
- ]
101
- },
102
- "execution_count": 6,
103
- "metadata": {},
104
- "output_type": "execute_result"
105
- }
106
- ],
107
- "source": [
108
- "Counter(df_daegu['multi_class'])"
109
- ]
110
- },
111
- {
112
- "cell_type": "code",
113
- "execution_count": 7,
114
- "metadata": {},
115
- "outputs": [
116
- {
117
- "data": {
118
- "text/plain": [
119
- "Counter({2: 48047, 1: 4227, 0: 310})"
120
- ]
121
- },
122
- "execution_count": 7,
123
- "metadata": {},
124
- "output_type": "execute_result"
125
- }
126
- ],
127
- "source": [
128
- "Counter(df_daejeon['multi_class'])"
129
- ]
130
- },
131
- {
132
- "cell_type": "code",
133
- "execution_count": 8,
134
- "metadata": {},
135
- "outputs": [
136
- {
137
- "data": {
138
- "text/plain": [
139
- "Counter({2: 48405, 1: 4015, 0: 164})"
140
- ]
141
- },
142
- "execution_count": 8,
143
- "metadata": {},
144
- "output_type": "execute_result"
145
- }
146
- ],
147
- "source": [
148
- "Counter(df_gwangju['multi_class'])"
149
- ]
150
- },
151
- {
152
- "cell_type": "code",
153
- "execution_count": 9,
154
- "metadata": {},
155
- "outputs": [
156
- {
157
- "data": {
158
- "text/plain": [
159
- "(52584, 30)"
160
- ]
161
- },
162
- "execution_count": 9,
163
- "metadata": {},
164
- "output_type": "execute_result"
165
- }
166
- ],
167
- "source": [
168
- "df_seoul.shape"
169
- ]
170
- },
171
- {
172
- "cell_type": "code",
173
- "execution_count": 10,
174
- "metadata": {},
175
- "outputs": [],
176
- "source": [
177
- "df_seoul = df_seoul.loc[df_seoul['year'].isin([2018, 2019, 2020, 2021]),:].copy()\n",
178
- "df_busan = df_busan.loc[df_busan['year'].isin([2018, 2019, 2020, 2021]),:].copy()\n",
179
- "df_incheon = df_incheon.loc[df_incheon['year'].isin([2018, 2019, 2020, 2021]),:].copy()\n",
180
- "df_daegu = df_daegu.loc[df_daegu['year'].isin([2018, 2019, 2020, 2021]),:].copy()\n",
181
- "df_daejeon = df_daejeon.loc[df_daejeon['year'].isin([2018, 2019, 2020, 2021]),:].copy()\n",
182
- "df_gwangju = df_gwangju.loc[df_gwangju['year'].isin([2018, 2019, 2020, 2021]),:].copy()"
183
- ]
184
- },
185
- {
186
- "cell_type": "code",
187
- "execution_count": 11,
188
- "metadata": {},
189
- "outputs": [],
190
- "source": [
191
- "cols = [col for col in df_seoul.columns if col != \"multi_class\"] + [\"multi_class\"]"
192
- ]
193
- },
194
- {
195
- "cell_type": "code",
196
- "execution_count": 12,
197
- "metadata": {},
198
- "outputs": [],
199
- "source": [
200
- "df_seoul = df_seoul[cols]\n",
201
- "df_busan = df_busan[cols]\n",
202
- "df_incheon = df_incheon[cols]\n",
203
- "df_daegu = df_daegu[cols]\n",
204
- "df_daejeon = df_daejeon[cols]\n",
205
- "df_gwangju = df_gwangju[cols]"
206
- ]
207
- },
208
- {
209
- "cell_type": "code",
210
- "execution_count": 13,
211
- "metadata": {},
212
- "outputs": [],
213
- "source": [
214
- "df_seoul_train = df_seoul.loc[df_seoul['year'].isin([2018, 2019, 2020]),:].copy()\n",
215
- "df_seoul_test = df_seoul.loc[df_seoul['year'].isin([2021]),:].copy()\n",
216
- "\n",
217
- "df_busan_train = df_busan.loc[df_busan['year'].isin([2018, 2019, 2020]),:].copy()\n",
218
- "df_busan_test = df_busan.loc[df_busan['year'].isin([2021]),:].copy()\n",
219
- "\n",
220
- "df_incheon_train = df_incheon.loc[df_incheon['year'].isin([2018, 2019, 2020]),:].copy()\n",
221
- "df_incheon_test = df_incheon.loc[df_incheon['year'].isin([2021]),:].copy()\n",
222
- "\n",
223
- "df_daegu_train = df_daegu.loc[df_daegu['year'].isin([2018, 2019, 2020]),:].copy()\n",
224
- "df_daegu_test = df_daegu.loc[df_daegu['year'].isin([2021]),:].copy()\n",
225
- "\n",
226
- "df_daejeon_train = df_daejeon.loc[df_daejeon['year'].isin([2018, 2019, 2020]),:].copy()\n",
227
- "df_daejeon_test = df_daejeon.loc[df_daejeon['year'].isin([2021]),:].copy()\n",
228
- "\n",
229
- "df_gwangju_train = df_gwangju.loc[df_gwangju['year'].isin([2018, 2019, 2020]),:].copy()\n",
230
- "df_gwangju_test = df_gwangju.loc[df_gwangju['year'].isin([2021]),:].copy()"
231
- ]
232
- },
233
- {
234
- "cell_type": "code",
235
- "execution_count": 14,
236
- "metadata": {},
237
- "outputs": [
238
- {
239
- "data": {
240
- "text/html": [
241
- "<div>\n",
242
- "<style scoped>\n",
243
- " .dataframe tbody tr th:only-of-type {\n",
244
- " vertical-align: middle;\n",
245
- " }\n",
246
- "\n",
247
- " .dataframe tbody tr th {\n",
248
- " vertical-align: top;\n",
249
- " }\n",
250
- "\n",
251
- " .dataframe thead th {\n",
252
- " text-align: right;\n",
253
- " }\n",
254
- "</style>\n",
255
- "<table border=\"1\" class=\"dataframe\">\n",
256
- " <thead>\n",
257
- " <tr style=\"text-align: right;\">\n",
258
- " <th></th>\n",
259
- " <th>temp_C</th>\n",
260
- " <th>precip_mm</th>\n",
261
- " <th>wind_speed</th>\n",
262
- " <th>wind_dir</th>\n",
263
- " <th>hm</th>\n",
264
- " <th>vap_pressure</th>\n",
265
- " <th>dewpoint_C</th>\n",
266
- " <th>loc_pressure</th>\n",
267
- " <th>sea_pressure</th>\n",
268
- " <th>solarRad</th>\n",
269
- " <th>...</th>\n",
270
- " <th>year</th>\n",
271
- " <th>month</th>\n",
272
- " <th>hour</th>\n",
273
- " <th>ground_temp - temp_C</th>\n",
274
- " <th>hour_sin</th>\n",
275
- " <th>hour_cos</th>\n",
276
- " <th>month_sin</th>\n",
277
- " <th>month_cos</th>\n",
278
- " <th>visi</th>\n",
279
- " <th>multi_class</th>\n",
280
- " </tr>\n",
281
- " </thead>\n",
282
- " <tbody>\n",
283
- " <tr>\n",
284
- " <th>0</th>\n",
285
- " <td>1.2</td>\n",
286
- " <td>0.0</td>\n",
287
- " <td>1.6</td>\n",
288
- " <td>360</td>\n",
289
- " <td>35.0</td>\n",
290
- " <td>2.3</td>\n",
291
- " <td>-12.6</td>\n",
292
- " <td>1015.8</td>\n",
293
- " <td>1024.6</td>\n",
294
- " <td>0.00</td>\n",
295
- " <td>...</td>\n",
296
- " <td>2018</td>\n",
297
- " <td>1</td>\n",
298
- " <td>0</td>\n",
299
- " <td>-5.4</td>\n",
300
- " <td>0.000000</td>\n",
301
- " <td>1.000000e+00</td>\n",
302
- " <td>0.5</td>\n",
303
- " <td>0.866025</td>\n",
304
- " <td>2000.0</td>\n",
305
- " <td>2</td>\n",
306
- " </tr>\n",
307
- " <tr>\n",
308
- " <th>1</th>\n",
309
- " <td>0.5</td>\n",
310
- " <td>0.0</td>\n",
311
- " <td>1.3</td>\n",
312
- " <td>360</td>\n",
313
- " <td>33.0</td>\n",
314
- " <td>2.1</td>\n",
315
- " <td>-13.9</td>\n",
316
- " <td>1015.5</td>\n",
317
- " <td>1024.3</td>\n",
318
- " <td>0.00</td>\n",
319
- " <td>...</td>\n",
320
- " <td>2018</td>\n",
321
- " <td>1</td>\n",
322
- " <td>1</td>\n",
323
- " <td>-5.4</td>\n",
324
- " <td>0.258819</td>\n",
325
- " <td>9.659258e-01</td>\n",
326
- " <td>0.5</td>\n",
327
- " <td>0.866025</td>\n",
328
- " <td>2000.0</td>\n",
329
- " <td>2</td>\n",
330
- " </tr>\n",
331
- " <tr>\n",
332
- " <th>2</th>\n",
333
- " <td>0.1</td>\n",
334
- " <td>0.0</td>\n",
335
- " <td>1.5</td>\n",
336
- " <td>20</td>\n",
337
- " <td>34.0</td>\n",
338
- " <td>2.1</td>\n",
339
- " <td>-13.9</td>\n",
340
- " <td>1015.7</td>\n",
341
- " <td>1024.5</td>\n",
342
- " <td>0.00</td>\n",
343
- " <td>...</td>\n",
344
- " <td>2018</td>\n",
345
- " <td>1</td>\n",
346
- " <td>2</td>\n",
347
- " <td>-5.4</td>\n",
348
- " <td>0.500000</td>\n",
349
- " <td>8.660254e-01</td>\n",
350
- " <td>0.5</td>\n",
351
- " <td>0.866025</td>\n",
352
- " <td>2000.0</td>\n",
353
- " <td>2</td>\n",
354
- " </tr>\n",
355
- " <tr>\n",
356
- " <th>3</th>\n",
357
- " <td>0.0</td>\n",
358
- " <td>0.0</td>\n",
359
- " <td>2.1</td>\n",
360
- " <td>320</td>\n",
361
- " <td>37.0</td>\n",
362
- " <td>2.3</td>\n",
363
- " <td>-12.9</td>\n",
364
- " <td>1015.9</td>\n",
365
- " <td>1024.7</td>\n",
366
- " <td>0.00</td>\n",
367
- " <td>...</td>\n",
368
- " <td>2018</td>\n",
369
- " <td>1</td>\n",
370
- " <td>3</td>\n",
371
- " <td>-5.0</td>\n",
372
- " <td>0.707107</td>\n",
373
- " <td>7.071068e-01</td>\n",
374
- " <td>0.5</td>\n",
375
- " <td>0.866025</td>\n",
376
- " <td>2000.0</td>\n",
377
- " <td>2</td>\n",
378
- " </tr>\n",
379
- " <tr>\n",
380
- " <th>4</th>\n",
381
- " <td>-0.1</td>\n",
382
- " <td>0.0</td>\n",
383
- " <td>2.3</td>\n",
384
- " <td>340</td>\n",
385
- " <td>42.0</td>\n",
386
- " <td>2.5</td>\n",
387
- " <td>-11.5</td>\n",
388
- " <td>1016.0</td>\n",
389
- " <td>1024.9</td>\n",
390
- " <td>0.00</td>\n",
391
- " <td>...</td>\n",
392
- " <td>2018</td>\n",
393
- " <td>1</td>\n",
394
- " <td>4</td>\n",
395
- " <td>-4.3</td>\n",
396
- " <td>0.866025</td>\n",
397
- " <td>5.000000e-01</td>\n",
398
- " <td>0.5</td>\n",
399
- " <td>0.866025</td>\n",
400
- " <td>2000.0</td>\n",
401
- " <td>2</td>\n",
402
- " </tr>\n",
403
- " <tr>\n",
404
- " <th>5</th>\n",
405
- " <td>-0.1</td>\n",
406
- " <td>0.0</td>\n",
407
- " <td>2.8</td>\n",
408
- " <td>50</td>\n",
409
- " <td>43.0</td>\n",
410
- " <td>2.6</td>\n",
411
- " <td>-11.2</td>\n",
412
- " <td>1016.0</td>\n",
413
- " <td>1024.9</td>\n",
414
- " <td>0.00</td>\n",
415
- " <td>...</td>\n",
416
- " <td>2018</td>\n",
417
- " <td>1</td>\n",
418
- " <td>5</td>\n",
419
- " <td>-4.0</td>\n",
420
- " <td>0.965926</td>\n",
421
- " <td>2.588190e-01</td>\n",
422
- " <td>0.5</td>\n",
423
- " <td>0.866025</td>\n",
424
- " <td>2000.0</td>\n",
425
- " <td>2</td>\n",
426
- " </tr>\n",
427
- " <tr>\n",
428
- " <th>6</th>\n",
429
- " <td>-0.5</td>\n",
430
- " <td>0.0</td>\n",
431
- " <td>2.1</td>\n",
432
- " <td>20</td>\n",
433
- " <td>45.0</td>\n",
434
- " <td>2.6</td>\n",
435
- " <td>-11.0</td>\n",
436
- " <td>1016.5</td>\n",
437
- " <td>1025.4</td>\n",
438
- " <td>0.00</td>\n",
439
- " <td>...</td>\n",
440
- " <td>2018</td>\n",
441
- " <td>1</td>\n",
442
- " <td>6</td>\n",
443
- " <td>-4.1</td>\n",
444
- " <td>1.000000</td>\n",
445
- " <td>6.123234e-17</td>\n",
446
- " <td>0.5</td>\n",
447
- " <td>0.866025</td>\n",
448
- " <td>2000.0</td>\n",
449
- " <td>2</td>\n",
450
- " </tr>\n",
451
- " <tr>\n",
452
- " <th>7</th>\n",
453
- " <td>-0.8</td>\n",
454
- " <td>0.0</td>\n",
455
- " <td>2.5</td>\n",
456
- " <td>340</td>\n",
457
- " <td>45.0</td>\n",
458
- " <td>2.6</td>\n",
459
- " <td>-11.2</td>\n",
460
- " <td>1017.1</td>\n",
461
- " <td>1026.0</td>\n",
462
- " <td>0.00</td>\n",
463
- " <td>...</td>\n",
464
- " <td>2018</td>\n",
465
- " <td>1</td>\n",
466
- " <td>7</td>\n",
467
- " <td>-4.5</td>\n",
468
- " <td>0.965926</td>\n",
469
- " <td>-2.588190e-01</td>\n",
470
- " <td>0.5</td>\n",
471
- " <td>0.866025</td>\n",
472
- " <td>2000.0</td>\n",
473
- " <td>2</td>\n",
474
- " </tr>\n",
475
- " <tr>\n",
476
- " <th>8</th>\n",
477
- " <td>-0.5</td>\n",
478
- " <td>0.0</td>\n",
479
- " <td>1.2</td>\n",
480
- " <td>360</td>\n",
481
- " <td>43.0</td>\n",
482
- " <td>2.5</td>\n",
483
- " <td>-11.5</td>\n",
484
- " <td>1017.4</td>\n",
485
- " <td>1026.3</td>\n",
486
- " <td>0.03</td>\n",
487
- " <td>...</td>\n",
488
- " <td>2018</td>\n",
489
- " <td>1</td>\n",
490
- " <td>8</td>\n",
491
- " <td>-4.0</td>\n",
492
- " <td>0.866025</td>\n",
493
- " <td>-5.000000e-01</td>\n",
494
- " <td>0.5</td>\n",
495
- " <td>0.866025</td>\n",
496
- " <td>2000.0</td>\n",
497
- " <td>2</td>\n",
498
- " </tr>\n",
499
- " <tr>\n",
500
- " <th>9</th>\n",
501
- " <td>1.7</td>\n",
502
- " <td>0.0</td>\n",
503
- " <td>2.1</td>\n",
504
- " <td>20</td>\n",
505
- " <td>39.0</td>\n",
506
- " <td>2.7</td>\n",
507
- " <td>-10.8</td>\n",
508
- " <td>1018.1</td>\n",
509
- " <td>1026.9</td>\n",
510
- " <td>0.46</td>\n",
511
- " <td>...</td>\n",
512
- " <td>2018</td>\n",
513
- " <td>1</td>\n",
514
- " <td>9</td>\n",
515
- " <td>2.8</td>\n",
516
- " <td>0.707107</td>\n",
517
- " <td>-7.071068e-01</td>\n",
518
- " <td>0.5</td>\n",
519
- " <td>0.866025</td>\n",
520
- " <td>1953.0</td>\n",
521
- " <td>2</td>\n",
522
- " </tr>\n",
523
- " </tbody>\n",
524
- "</table>\n",
525
- "<p>10 rows × 30 columns</p>\n",
526
- "</div>"
527
- ],
528
- "text/plain": [
529
- " temp_C precip_mm wind_speed wind_dir hm vap_pressure dewpoint_C \\\n",
530
- "0 1.2 0.0 1.6 360 35.0 2.3 -12.6 \n",
531
- "1 0.5 0.0 1.3 360 33.0 2.1 -13.9 \n",
532
- "2 0.1 0.0 1.5 20 34.0 2.1 -13.9 \n",
533
- "3 0.0 0.0 2.1 320 37.0 2.3 -12.9 \n",
534
- "4 -0.1 0.0 2.3 340 42.0 2.5 -11.5 \n",
535
- "5 -0.1 0.0 2.8 50 43.0 2.6 -11.2 \n",
536
- "6 -0.5 0.0 2.1 20 45.0 2.6 -11.0 \n",
537
- "7 -0.8 0.0 2.5 340 45.0 2.6 -11.2 \n",
538
- "8 -0.5 0.0 1.2 360 43.0 2.5 -11.5 \n",
539
- "9 1.7 0.0 2.1 20 39.0 2.7 -10.8 \n",
540
- "\n",
541
- " loc_pressure sea_pressure solarRad ... year month hour \\\n",
542
- "0 1015.8 1024.6 0.00 ... 2018 1 0 \n",
543
- "1 1015.5 1024.3 0.00 ... 2018 1 1 \n",
544
- "2 1015.7 1024.5 0.00 ... 2018 1 2 \n",
545
- "3 1015.9 1024.7 0.00 ... 2018 1 3 \n",
546
- "4 1016.0 1024.9 0.00 ... 2018 1 4 \n",
547
- "5 1016.0 1024.9 0.00 ... 2018 1 5 \n",
548
- "6 1016.5 1025.4 0.00 ... 2018 1 6 \n",
549
- "7 1017.1 1026.0 0.00 ... 2018 1 7 \n",
550
- "8 1017.4 1026.3 0.03 ... 2018 1 8 \n",
551
- "9 1018.1 1026.9 0.46 ... 2018 1 9 \n",
552
- "\n",
553
- " ground_temp - temp_C hour_sin hour_cos month_sin month_cos visi \\\n",
554
- "0 -5.4 0.000000 1.000000e+00 0.5 0.866025 2000.0 \n",
555
- "1 -5.4 0.258819 9.659258e-01 0.5 0.866025 2000.0 \n",
556
- "2 -5.4 0.500000 8.660254e-01 0.5 0.866025 2000.0 \n",
557
- "3 -5.0 0.707107 7.071068e-01 0.5 0.866025 2000.0 \n",
558
- "4 -4.3 0.866025 5.000000e-01 0.5 0.866025 2000.0 \n",
559
- "5 -4.0 0.965926 2.588190e-01 0.5 0.866025 2000.0 \n",
560
- "6 -4.1 1.000000 6.123234e-17 0.5 0.866025 2000.0 \n",
561
- "7 -4.5 0.965926 -2.588190e-01 0.5 0.866025 2000.0 \n",
562
- "8 -4.0 0.866025 -5.000000e-01 0.5 0.866025 2000.0 \n",
563
- "9 2.8 0.707107 -7.071068e-01 0.5 0.866025 1953.0 \n",
564
- "\n",
565
- " multi_class \n",
566
- "0 2 \n",
567
- "1 2 \n",
568
- "2 2 \n",
569
- "3 2 \n",
570
- "4 2 \n",
571
- "5 2 \n",
572
- "6 2 \n",
573
- "7 2 \n",
574
- "8 2 \n",
575
- "9 2 \n",
576
- "\n",
577
- "[10 rows x 30 columns]"
578
- ]
579
- },
580
- "execution_count": 14,
581
- "metadata": {},
582
- "output_type": "execute_result"
583
- }
584
- ],
585
- "source": [
586
- "df_busan_train.head(10)"
587
- ]
588
- },
589
- {
590
- "cell_type": "code",
591
- "execution_count": 15,
592
- "metadata": {},
593
- "outputs": [
594
- {
595
- "data": {
596
- "text/html": [
597
- "<div>\n",
598
- "<style scoped>\n",
599
- " .dataframe tbody tr th:only-of-type {\n",
600
- " vertical-align: middle;\n",
601
- " }\n",
602
- "\n",
603
- " .dataframe tbody tr th {\n",
604
- " vertical-align: top;\n",
605
- " }\n",
606
- "\n",
607
- " .dataframe thead th {\n",
608
- " text-align: right;\n",
609
- " }\n",
610
- "</style>\n",
611
- "<table border=\"1\" class=\"dataframe\">\n",
612
- " <thead>\n",
613
- " <tr style=\"text-align: right;\">\n",
614
- " <th></th>\n",
615
- " <th>temp_C</th>\n",
616
- " <th>precip_mm</th>\n",
617
- " <th>wind_speed</th>\n",
618
- " <th>wind_dir</th>\n",
619
- " <th>hm</th>\n",
620
- " <th>vap_pressure</th>\n",
621
- " <th>dewpoint_C</th>\n",
622
- " <th>loc_pressure</th>\n",
623
- " <th>sea_pressure</th>\n",
624
- " <th>solarRad</th>\n",
625
- " <th>...</th>\n",
626
- " <th>year</th>\n",
627
- " <th>month</th>\n",
628
- " <th>hour</th>\n",
629
- " <th>ground_temp - temp_C</th>\n",
630
- " <th>hour_sin</th>\n",
631
- " <th>hour_cos</th>\n",
632
- " <th>month_sin</th>\n",
633
- " <th>month_cos</th>\n",
634
- " <th>visi</th>\n",
635
- " <th>multi_class</th>\n",
636
- " </tr>\n",
637
- " </thead>\n",
638
- " <tbody>\n",
639
- " <tr>\n",
640
- " <th>26294</th>\n",
641
- " <td>0.1</td>\n",
642
- " <td>0.0</td>\n",
643
- " <td>6.3</td>\n",
644
- " <td>270</td>\n",
645
- " <td>37.0</td>\n",
646
- " <td>2.3</td>\n",
647
- " <td>-12.9</td>\n",
648
- " <td>1013.3</td>\n",
649
- " <td>1022.1</td>\n",
650
- " <td>2.07</td>\n",
651
- " <td>...</td>\n",
652
- " <td>2020</td>\n",
653
- " <td>12</td>\n",
654
- " <td>14</td>\n",
655
- " <td>5.8</td>\n",
656
- " <td>-0.500000</td>\n",
657
- " <td>-8.660254e-01</td>\n",
658
- " <td>-2.449294e-16</td>\n",
659
- " <td>1.0</td>\n",
660
- " <td>5000.0</td>\n",
661
- " <td>2</td>\n",
662
- " </tr>\n",
663
- " <tr>\n",
664
- " <th>26295</th>\n",
665
- " <td>1.2</td>\n",
666
- " <td>0.0</td>\n",
667
- " <td>5.9</td>\n",
668
- " <td>270</td>\n",
669
- " <td>35.0</td>\n",
670
- " <td>2.3</td>\n",
671
- " <td>-12.6</td>\n",
672
- " <td>1013.2</td>\n",
673
- " <td>1022.0</td>\n",
674
- " <td>1.71</td>\n",
675
- " <td>...</td>\n",
676
- " <td>2020</td>\n",
677
- " <td>12</td>\n",
678
- " <td>15</td>\n",
679
- " <td>5.6</td>\n",
680
- " <td>-0.707107</td>\n",
681
- " <td>-7.071068e-01</td>\n",
682
- " <td>-2.449294e-16</td>\n",
683
- " <td>1.0</td>\n",
684
- " <td>5000.0</td>\n",
685
- " <td>2</td>\n",
686
- " </tr>\n",
687
- " <tr>\n",
688
- " <th>26296</th>\n",
689
- " <td>1.6</td>\n",
690
- " <td>0.0</td>\n",
691
- " <td>3.6</td>\n",
692
- " <td>290</td>\n",
693
- " <td>34.0</td>\n",
694
- " <td>2.3</td>\n",
695
- " <td>-12.6</td>\n",
696
- " <td>1012.8</td>\n",
697
- " <td>1021.6</td>\n",
698
- " <td>1.14</td>\n",
699
- " <td>...</td>\n",
700
- " <td>2020</td>\n",
701
- " <td>12</td>\n",
702
- " <td>16</td>\n",
703
- " <td>1.4</td>\n",
704
- " <td>-0.866025</td>\n",
705
- " <td>-5.000000e-01</td>\n",
706
- " <td>-2.449294e-16</td>\n",
707
- " <td>1.0</td>\n",
708
- " <td>5000.0</td>\n",
709
- " <td>2</td>\n",
710
- " </tr>\n",
711
- " <tr>\n",
712
- " <th>26297</th>\n",
713
- " <td>1.2</td>\n",
714
- " <td>0.0</td>\n",
715
- " <td>3.8</td>\n",
716
- " <td>250</td>\n",
717
- " <td>38.0</td>\n",
718
- " <td>2.5</td>\n",
719
- " <td>-11.5</td>\n",
720
- " <td>1012.8</td>\n",
721
- " <td>1021.6</td>\n",
722
- " <td>0.48</td>\n",
723
- " <td>...</td>\n",
724
- " <td>2020</td>\n",
725
- " <td>12</td>\n",
726
- " <td>17</td>\n",
727
- " <td>-0.4</td>\n",
728
- " <td>-0.965926</td>\n",
729
- " <td>-2.588190e-01</td>\n",
730
- " <td>-2.449294e-16</td>\n",
731
- " <td>1.0</td>\n",
732
- " <td>5000.0</td>\n",
733
- " <td>2</td>\n",
734
- " </tr>\n",
735
- " <tr>\n",
736
- " <th>26298</th>\n",
737
- " <td>0.9</td>\n",
738
- " <td>0.0</td>\n",
739
- " <td>3.8</td>\n",
740
- " <td>270</td>\n",
741
- " <td>40.0</td>\n",
742
- " <td>2.6</td>\n",
743
- " <td>-11.2</td>\n",
744
- " <td>1013.1</td>\n",
745
- " <td>1021.9</td>\n",
746
- " <td>0.02</td>\n",
747
- " <td>...</td>\n",
748
- " <td>2020</td>\n",
749
- " <td>12</td>\n",
750
- " <td>18</td>\n",
751
- " <td>-0.8</td>\n",
752
- " <td>-1.000000</td>\n",
753
- " <td>-1.836970e-16</td>\n",
754
- " <td>-2.449294e-16</td>\n",
755
- " <td>1.0</td>\n",
756
- " <td>5000.0</td>\n",
757
- " <td>2</td>\n",
758
- " </tr>\n",
759
- " <tr>\n",
760
- " <th>26299</th>\n",
761
- " <td>0.6</td>\n",
762
- " <td>0.0</td>\n",
763
- " <td>6.2</td>\n",
764
- " <td>270</td>\n",
765
- " <td>41.0</td>\n",
766
- " <td>2.6</td>\n",
767
- " <td>-11.1</td>\n",
768
- " <td>1014.0</td>\n",
769
- " <td>1022.8</td>\n",
770
- " <td>0.00</td>\n",
771
- " <td>...</td>\n",
772
- " <td>2020</td>\n",
773
- " <td>12</td>\n",
774
- " <td>19</td>\n",
775
- " <td>-1.1</td>\n",
776
- " <td>-0.965926</td>\n",
777
- " <td>2.588190e-01</td>\n",
778
- " <td>-2.449294e-16</td>\n",
779
- " <td>1.0</td>\n",
780
- " <td>5000.0</td>\n",
781
- " <td>2</td>\n",
782
- " </tr>\n",
783
- " <tr>\n",
784
- " <th>26300</th>\n",
785
- " <td>0.1</td>\n",
786
- " <td>0.0</td>\n",
787
- " <td>6.0</td>\n",
788
- " <td>270</td>\n",
789
- " <td>44.0</td>\n",
790
- " <td>2.7</td>\n",
791
- " <td>-10.7</td>\n",
792
- " <td>1014.8</td>\n",
793
- " <td>1023.6</td>\n",
794
- " <td>0.00</td>\n",
795
- " <td>...</td>\n",
796
- " <td>2020</td>\n",
797
- " <td>12</td>\n",
798
- " <td>20</td>\n",
799
- " <td>-0.9</td>\n",
800
- " <td>-0.866025</td>\n",
801
- " <td>5.000000e-01</td>\n",
802
- " <td>-2.449294e-16</td>\n",
803
- " <td>1.0</td>\n",
804
- " <td>5000.0</td>\n",
805
- " <td>2</td>\n",
806
- " </tr>\n",
807
- " <tr>\n",
808
- " <th>26301</th>\n",
809
- " <td>-0.2</td>\n",
810
- " <td>0.0</td>\n",
811
- " <td>5.0</td>\n",
812
- " <td>290</td>\n",
813
- " <td>48.0</td>\n",
814
- " <td>2.9</td>\n",
815
- " <td>-9.9</td>\n",
816
- " <td>1014.6</td>\n",
817
- " <td>1023.4</td>\n",
818
- " <td>0.00</td>\n",
819
- " <td>...</td>\n",
820
- " <td>2020</td>\n",
821
- " <td>12</td>\n",
822
- " <td>21</td>\n",
823
- " <td>-0.8</td>\n",
824
- " <td>-0.707107</td>\n",
825
- " <td>7.071068e-01</td>\n",
826
- " <td>-2.449294e-16</td>\n",
827
- " <td>1.0</td>\n",
828
- " <td>5000.0</td>\n",
829
- " <td>2</td>\n",
830
- " </tr>\n",
831
- " <tr>\n",
832
- " <th>26302</th>\n",
833
- " <td>-0.7</td>\n",
834
- " <td>0.0</td>\n",
835
- " <td>2.7</td>\n",
836
- " <td>270</td>\n",
837
- " <td>51.0</td>\n",
838
- " <td>3.0</td>\n",
839
- " <td>-9.6</td>\n",
840
- " <td>1014.8</td>\n",
841
- " <td>1023.6</td>\n",
842
- " <td>0.00</td>\n",
843
- " <td>...</td>\n",
844
- " <td>2020</td>\n",
845
- " <td>12</td>\n",
846
- " <td>22</td>\n",
847
- " <td>-0.6</td>\n",
848
- " <td>-0.500000</td>\n",
849
- " <td>8.660254e-01</td>\n",
850
- " <td>-2.449294e-16</td>\n",
851
- " <td>1.0</td>\n",
852
- " <td>5000.0</td>\n",
853
- " <td>2</td>\n",
854
- " </tr>\n",
855
- " <tr>\n",
856
- " <th>26303</th>\n",
857
- " <td>-0.7</td>\n",
858
- " <td>0.0</td>\n",
859
- " <td>3.8</td>\n",
860
- " <td>250</td>\n",
861
- " <td>55.0</td>\n",
862
- " <td>3.2</td>\n",
863
- " <td>-8.6</td>\n",
864
- " <td>1015.1</td>\n",
865
- " <td>1024.0</td>\n",
866
- " <td>0.00</td>\n",
867
- " <td>...</td>\n",
868
- " <td>2020</td>\n",
869
- " <td>12</td>\n",
870
- " <td>23</td>\n",
871
- " <td>-0.6</td>\n",
872
- " <td>-0.258819</td>\n",
873
- " <td>9.659258e-01</td>\n",
874
- " <td>-2.449294e-16</td>\n",
875
- " <td>1.0</td>\n",
876
- " <td>5000.0</td>\n",
877
- " <td>2</td>\n",
878
- " </tr>\n",
879
- " </tbody>\n",
880
- "</table>\n",
881
- "<p>10 rows × 30 columns</p>\n",
882
- "</div>"
883
- ],
884
- "text/plain": [
885
- " temp_C precip_mm wind_speed wind_dir hm vap_pressure dewpoint_C \\\n",
886
- "26294 0.1 0.0 6.3 270 37.0 2.3 -12.9 \n",
887
- "26295 1.2 0.0 5.9 270 35.0 2.3 -12.6 \n",
888
- "26296 1.6 0.0 3.6 290 34.0 2.3 -12.6 \n",
889
- "26297 1.2 0.0 3.8 250 38.0 2.5 -11.5 \n",
890
- "26298 0.9 0.0 3.8 270 40.0 2.6 -11.2 \n",
891
- "26299 0.6 0.0 6.2 270 41.0 2.6 -11.1 \n",
892
- "26300 0.1 0.0 6.0 270 44.0 2.7 -10.7 \n",
893
- "26301 -0.2 0.0 5.0 290 48.0 2.9 -9.9 \n",
894
- "26302 -0.7 0.0 2.7 270 51.0 3.0 -9.6 \n",
895
- "26303 -0.7 0.0 3.8 250 55.0 3.2 -8.6 \n",
896
- "\n",
897
- " loc_pressure sea_pressure solarRad ... year month hour \\\n",
898
- "26294 1013.3 1022.1 2.07 ... 2020 12 14 \n",
899
- "26295 1013.2 1022.0 1.71 ... 2020 12 15 \n",
900
- "26296 1012.8 1021.6 1.14 ... 2020 12 16 \n",
901
- "26297 1012.8 1021.6 0.48 ... 2020 12 17 \n",
902
- "26298 1013.1 1021.9 0.02 ... 2020 12 18 \n",
903
- "26299 1014.0 1022.8 0.00 ... 2020 12 19 \n",
904
- "26300 1014.8 1023.6 0.00 ... 2020 12 20 \n",
905
- "26301 1014.6 1023.4 0.00 ... 2020 12 21 \n",
906
- "26302 1014.8 1023.6 0.00 ... 2020 12 22 \n",
907
- "26303 1015.1 1024.0 0.00 ... 2020 12 23 \n",
908
- "\n",
909
- " ground_temp - temp_C hour_sin hour_cos month_sin month_cos \\\n",
910
- "26294 5.8 -0.500000 -8.660254e-01 -2.449294e-16 1.0 \n",
911
- "26295 5.6 -0.707107 -7.071068e-01 -2.449294e-16 1.0 \n",
912
- "26296 1.4 -0.866025 -5.000000e-01 -2.449294e-16 1.0 \n",
913
- "26297 -0.4 -0.965926 -2.588190e-01 -2.449294e-16 1.0 \n",
914
- "26298 -0.8 -1.000000 -1.836970e-16 -2.449294e-16 1.0 \n",
915
- "26299 -1.1 -0.965926 2.588190e-01 -2.449294e-16 1.0 \n",
916
- "26300 -0.9 -0.866025 5.000000e-01 -2.449294e-16 1.0 \n",
917
- "26301 -0.8 -0.707107 7.071068e-01 -2.449294e-16 1.0 \n",
918
- "26302 -0.6 -0.500000 8.660254e-01 -2.449294e-16 1.0 \n",
919
- "26303 -0.6 -0.258819 9.659258e-01 -2.449294e-16 1.0 \n",
920
- "\n",
921
- " visi multi_class \n",
922
- "26294 5000.0 2 \n",
923
- "26295 5000.0 2 \n",
924
- "26296 5000.0 2 \n",
925
- "26297 5000.0 2 \n",
926
- "26298 5000.0 2 \n",
927
- "26299 5000.0 2 \n",
928
- "26300 5000.0 2 \n",
929
- "26301 5000.0 2 \n",
930
- "26302 5000.0 2 \n",
931
- "26303 5000.0 2 \n",
932
- "\n",
933
- "[10 rows x 30 columns]"
934
- ]
935
- },
936
- "execution_count": 15,
937
- "metadata": {},
938
- "output_type": "execute_result"
939
- }
940
- ],
941
- "source": [
942
- "df_busan_train.tail(10)"
943
- ]
944
- },
945
- {
946
- "cell_type": "code",
947
- "execution_count": 16,
948
- "metadata": {},
949
- "outputs": [
950
- {
951
- "name": "stdout",
952
- "output_type": "stream",
953
- "text": [
954
- "<class 'pandas.core.frame.DataFrame'>\n",
955
- "Index: 26304 entries, 0 to 26303\n",
956
- "Data columns (total 30 columns):\n",
957
- " # Column Non-Null Count Dtype \n",
958
- "--- ------ -------------- ----- \n",
959
- " 0 temp_C 26304 non-null float64 \n",
960
- " 1 precip_mm 26304 non-null float64 \n",
961
- " 2 wind_speed 26304 non-null float64 \n",
962
- " 3 wind_dir 26304 non-null category\n",
963
- " 4 hm 26304 non-null float64 \n",
964
- " 5 vap_pressure 26304 non-null float64 \n",
965
- " 6 dewpoint_C 26304 non-null float64 \n",
966
- " 7 loc_pressure 26304 non-null float64 \n",
967
- " 8 sea_pressure 26304 non-null float64 \n",
968
- " 9 solarRad 26304 non-null float64 \n",
969
- " 10 snow_cm 26304 non-null float64 \n",
970
- " 11 cloudcover 26304 non-null category\n",
971
- " 12 lm_cloudcover 26304 non-null category\n",
972
- " 13 low_cloudbase 26304 non-null float64 \n",
973
- " 14 groundtemp 26304 non-null float64 \n",
974
- " 15 O3 26304 non-null float64 \n",
975
- " 16 NO2 26304 non-null float64 \n",
976
- " 17 PM10 26304 non-null float64 \n",
977
- " 18 PM25 26304 non-null float64 \n",
978
- " 19 binary_class 26304 non-null int64 \n",
979
- " 20 year 26304 non-null int64 \n",
980
- " 21 month 26304 non-null int64 \n",
981
- " 22 hour 26304 non-null int64 \n",
982
- " 23 ground_temp - temp_C 26304 non-null float64 \n",
983
- " 24 hour_sin 26304 non-null float64 \n",
984
- " 25 hour_cos 26304 non-null float64 \n",
985
- " 26 month_sin 26304 non-null float64 \n",
986
- " 27 month_cos 26304 non-null float64 \n",
987
- " 28 visi 26304 non-null float64 \n",
988
- " 29 multi_class 26304 non-null int64 \n",
989
- "dtypes: category(3), float64(22), int64(5)\n",
990
- "memory usage: 5.7 MB\n"
991
- ]
992
- }
993
- ],
994
- "source": [
995
- "df_busan_train.info()"
996
- ]
997
- },
998
- {
999
- "cell_type": "code",
1000
- "execution_count": 17,
1001
- "metadata": {},
1002
- "outputs": [],
1003
- "source": [
1004
- "df_seoul_train.to_csv(\"../../data/data_for_modeling/seoul_train.csv\")\n",
1005
- "df_seoul_test.to_csv(\"../../data/data_for_modeling/seoul_test.csv\")\n",
1006
- "\n",
1007
- "df_busan_train.to_csv(\"../../data/data_for_modeling/busan_train.csv\")\n",
1008
- "df_busan_test.to_csv(\"../../data/data_for_modeling/busan_test.csv\")\n",
1009
- "\n",
1010
- "df_incheon_train.to_csv(\"../../data/data_for_modeling/incheon_train.csv\")\n",
1011
- "df_incheon_test.to_csv(\"../../data/data_for_modeling/incheon_test.csv\")\n",
1012
- "\n",
1013
- "df_daegu_train.to_csv(\"../../data/data_for_modeling/daegu_train.csv\")\n",
1014
- "df_daegu_test.to_csv(\"../../data/data_for_modeling/daegu_test.csv\")\n",
1015
- "\n",
1016
- "df_daejeon_train.to_csv(\"../../data/data_for_modeling/daejeon_train.csv\")\n",
1017
- "df_daejeon_test.to_csv(\"../../data/data_for_modeling/daejeon_test.csv\")\n",
1018
- "\n",
1019
- "df_gwangju_train.to_csv(\"../../data/data_for_modeling/gwangju_train.csv\")\n",
1020
- "df_gwangju_test.to_csv(\"../../data/data_for_modeling/gwangju_test.csv\")\n",
1021
- "\n",
1022
- "df_seoul_train = pd.read_csv(\"../../data/data_for_modeling/seoul_train.csv\")\n",
1023
- "df_seoul_test = pd.read_csv(\"../../data/data_for_modeling/seoul_test.csv\")\n"
1024
- ]
1025
- },
1026
- {
1027
- "cell_type": "code",
1028
- "execution_count": 18,
1029
- "metadata": {},
1030
- "outputs": [
1031
- {
1032
- "name": "stdout",
1033
- "output_type": "stream",
1034
- "text": [
1035
- "Counter({2: 8266, 1: 481, 0: 13})\n",
1036
- "Counter({2: 23686, 1: 2579, 0: 39})\n",
1037
- "Counter({2: 8455, 1: 281, 0: 24})\n",
1038
- "Counter({2: 24694, 1: 1516, 0: 94})\n",
1039
- "Counter({2: 7373, 1: 1205, 0: 182})\n",
1040
- "Counter({2: 21893, 1: 3892, 0: 519})\n",
1041
- "Counter({2: 8631, 1: 128, 0: 1})\n",
1042
- "Counter({2: 25149, 1: 1107, 0: 48})\n",
1043
- "Counter({2: 8089, 1: 618, 0: 53})\n",
1044
- "Counter({2: 23471, 1: 2660, 0: 173})\n",
1045
- "Counter({2: 8087, 1: 643, 0: 30})\n",
1046
- "Counter({2: 23798, 1: 2411, 0: 95})\n"
1047
- ]
1048
- }
1049
- ],
1050
- "source": [
1051
- "print(Counter(df_seoul_test['multi_class']))\n",
1052
- "print(Counter(df_seoul_train['multi_class']))\n",
1053
- "\n",
1054
- "print(Counter(df_busan_test['multi_class']))\n",
1055
- "print(Counter(df_busan_train['multi_class']))\n",
1056
- "\n",
1057
- "print(Counter(df_incheon_test['multi_class']))\n",
1058
- "print(Counter(df_incheon_train['multi_class']))\n",
1059
- "\n",
1060
- "print(Counter(df_daegu_test['multi_class']))\n",
1061
- "print(Counter(df_daegu_train['multi_class']))\n",
1062
- "\n",
1063
- "print(Counter(df_daejeon_test['multi_class']))\n",
1064
- "print(Counter(df_daejeon_train['multi_class']))\n",
1065
- "\n",
1066
- "print(Counter(df_gwangju_test['multi_class']))\n",
1067
- "print(Counter(df_gwangju_train['multi_class']))"
1068
- ]
1069
- },
1070
- {
1071
- "cell_type": "code",
1072
- "execution_count": null,
1073
- "metadata": {},
1074
- "outputs": [],
1075
- "source": []
1076
- }
1077
- ],
1078
- "metadata": {
1079
- "kernelspec": {
1080
- "display_name": "Python 3",
1081
- "language": "python",
1082
- "name": "python3"
1083
- },
1084
- "language_info": {
1085
- "codemirror_mode": {
1086
- "name": "ipython",
1087
- "version": 3
1088
- },
1089
- "file_extension": ".py",
1090
- "mimetype": "text/x-python",
1091
- "name": "python",
1092
- "nbconvert_exporter": "python",
1093
- "pygments_lexer": "ipython3",
1094
- "version": "3.8.10"
1095
- }
1096
- },
1097
- "nbformat": 4,
1098
- "nbformat_minor": 2
1099
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34adf698f9d895fa830b2ded30023e489cd14a1a81b52959b6cb90089953f906
3
+ size 37198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Analysis_code/3.sampled_data_analysis/oversampling_model_hyperparameter.ipynb CHANGED
@@ -1,574 +1,3 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 2,
6
- "id": "829c34fa",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "\"\"\"\n",
11
- "CTGAN 모델 하이퍼파라미터 추출 및 정리\n",
12
- "논문 작성용으로 모든 저장된 모델의 하이퍼파라미터를 추출합니다.\n",
13
- "\"\"\"\n",
14
- "\n",
15
- "import pandas as pd\n",
16
- "import numpy as np\n",
17
- "from pathlib import Path\n",
18
- "from ctgan import CTGAN\n",
19
- "import re\n",
20
- "from typing import Dict, Any\n",
21
- "import warnings\n",
22
- "warnings.filterwarnings('ignore')\n"
23
- ]
24
- },
25
- {
26
- "cell_type": "code",
27
- "execution_count": 3,
28
- "id": "98679ba3",
29
- "metadata": {},
30
- "outputs": [
31
- {
32
- "name": "stdout",
33
- "output_type": "stream",
34
- "text": [
35
- "총 216개의 모델 파일을 찾았습니다.\n",
36
- "\n",
37
- "처음 5개 파일 예시:\n",
38
- " - ctgan_only_10000_1_busan_class0.pkl\n",
39
- " - ctgan_only_10000_1_busan_class1.pkl\n",
40
- " - ctgan_only_10000_1_daegu_class0.pkl\n",
41
- " - ctgan_only_10000_1_daegu_class1.pkl\n",
42
- " - ctgan_only_10000_1_daejeon_class0.pkl\n"
43
- ]
44
- }
45
- ],
46
- "source": [
47
- "# 모델 디렉토리 경로 설정\n",
48
- "model_dir = Path(\"../save_model/oversampling_models\")\n",
49
- "\n",
50
- "# 모델 파일 목록 확인\n",
51
- "model_files = sorted(list(model_dir.glob(\"*.pkl\")))\n",
52
- "print(f\"총 {len(model_files)}개의 모델 파일을 찾았습니다.\")\n",
53
- "print(f\"\\n처음 5개 파일 예시:\")\n",
54
- "for f in model_files[:5]:\n",
55
- " print(f\" - {f.name}\")\n"
56
- ]
57
- },
58
- {
59
- "cell_type": "code",
60
- "execution_count": 4,
61
- "id": "97cde9e3",
62
- "metadata": {},
63
- "outputs": [
64
- {
65
- "name": "stdout",
66
- "output_type": "stream",
67
- "text": [
68
- "CTGAN 모델 하이퍼파라미터:\n",
69
- " embedding_dim: 64\n",
70
- " generator_dim: (64, 64)\n",
71
- " discriminator_dim: (128, 128)\n",
72
- " batch_size: 256\n",
73
- " epochs: 300\n",
74
- " pac: 8\n",
75
- " discriminator_steps: 2\n",
76
- " generator_lr: 0.0002\n",
77
- " discriminator_lr: 0.0002\n",
78
- " generator_decay: 1e-06\n",
79
- " discriminator_decay: 1e-06\n",
80
- "\n",
81
- "딕셔너리 형태:\n",
82
- "{'embedding_dim': 64, 'generator_dim': (64, 64), 'discriminator_dim': (128, 128), 'batch_size': 256, 'epochs': 300, 'pac': 8, 'discriminator_steps': 2, 'generator_lr': 0.0002, 'discriminator_lr': 0.0002, 'generator_decay': 1e-06, 'discriminator_decay': 1e-06}\n"
83
- ]
84
- }
85
- ],
86
- "source": [
87
- "# CTGAN 모델 로드 및 하이퍼파라미터 확인 예제\n",
88
- "model = CTGAN.load(\"../save_model/oversampling_models/ctgan_only_10000_1_busan_class0.pkl\")\n",
89
- "\n",
90
- "# CTGAN 모델의 하이퍼파라미터는 내부 속성(_로 시작)에 저장되어 있습니다\n",
91
- "print(\"CTGAN 모델 하이퍼파라미터:\")\n",
92
- "print(f\" embedding_dim: {model._embedding_dim}\")\n",
93
- "print(f\" generator_dim: {model._generator_dim}\")\n",
94
- "print(f\" discriminator_dim: {model._discriminator_dim}\")\n",
95
- "print(f\" batch_size: {model._batch_size}\")\n",
96
- "print(f\" epochs: {model._epochs}\")\n",
97
- "print(f\" pac: {model.pac}\") # pac는 공개 속성으로도 접근 가능\n",
98
- "print(f\" discriminator_steps: {model._discriminator_steps}\")\n",
99
- "print(f\" generator_lr: {model._generator_lr}\")\n",
100
- "print(f\" discriminator_lr: {model._discriminator_lr}\")\n",
101
- "print(f\" generator_decay: {model._generator_decay}\")\n",
102
- "print(f\" discriminator_decay: {model._discriminator_decay}\")\n",
103
- "\n",
104
- "# 모든 하이퍼파라미터를 딕셔너리로 추출하는 방법\n",
105
- "hyperparams = {\n",
106
- " 'embedding_dim': model._embedding_dim,\n",
107
- " 'generator_dim': model._generator_dim,\n",
108
- " 'discriminator_dim': model._discriminator_dim,\n",
109
- " 'batch_size': model._batch_size,\n",
110
- " 'epochs': model._epochs,\n",
111
- " 'pac': model.pac,\n",
112
- " 'discriminator_steps': model._discriminator_steps,\n",
113
- " 'generator_lr': model._generator_lr,\n",
114
- " 'discriminator_lr': model._discriminator_lr,\n",
115
- " 'generator_decay': model._generator_decay,\n",
116
- " 'discriminator_decay': model._discriminator_decay,\n",
117
- "}\n",
118
- "print(\"\\n딕셔너리 형태:\")\n",
119
- "print(hyperparams)"
120
- ]
121
- },
122
- {
123
- "cell_type": "code",
124
- "execution_count": 5,
125
- "id": "e3631f3b",
126
- "metadata": {},
127
- "outputs": [
128
- {
129
- "name": "stdout",
130
- "output_type": "stream",
131
- "text": [
132
- "테스트 파일: ctgan_only_10000_1_busan_class0.pkl\n",
133
- "파싱 결과: {'method': 'ctgan', 'sample_size': 10000, 'fold': 1, 'region': 'busan', 'class': 0}\n",
134
- "하이퍼파라미터: {'embedding_dim': 64, 'generator_dim': '(64, 64)', 'discriminator_dim': '(128, 128)', 'pac': 8, 'batch_size': 256, 'discriminator_steps': 2, 'epochs': 300, 'generator_lr': 0.0002, 'discriminator_lr': 0.0002, 'generator_decay': 1e-06, 'discriminator_decay': 1e-06}\n"
135
- ]
136
- }
137
- ],
138
- "source": [
139
- "def parse_model_filename(filename: str) -> Dict[str, Any]:\n",
140
- " \"\"\"\n",
141
- " 모델 파일명에서 정보를 파싱합니다.\n",
142
- " \n",
143
- " 파일명 패턴:\n",
144
- " - ctgan_only_{sample_size}_{fold}_{region}_class{0|1}.pkl\n",
145
- " - smotenc_ctgan_{sample_size}_{fold}_{region}_class{0|1}.pkl\n",
146
- " \n",
147
- " Returns:\n",
148
- " 파싱된 정보 딕셔너리\n",
149
- " \"\"\"\n",
150
- " # 파일명에서 확장자 제거\n",
151
- " name = filename.replace('.pkl', '')\n",
152
- " \n",
153
- " # 패턴 매칭\n",
154
- " if name.startswith('ctgan_only_'):\n",
155
- " method = 'ctgan'\n",
156
- " parts = name.replace('ctgan_only_', '').split('_')\n",
157
- " elif name.startswith('smotenc_ctgan_'):\n",
158
- " method = 'smotenc_ctgan'\n",
159
- " parts = name.replace('smotenc_ctgan_', '').split('_')\n",
160
- " else:\n",
161
- " return None\n",
162
- " \n",
163
- " # sample_size, fold, region, class 추출\n",
164
- " sample_size = int(parts[0])\n",
165
- " fold = int(parts[1])\n",
166
- " region = parts[2]\n",
167
- " class_label = int(parts[3].replace('class', ''))\n",
168
- " \n",
169
- " return {\n",
170
- " 'method': method,\n",
171
- " 'sample_size': sample_size,\n",
172
- " 'fold': fold,\n",
173
- " 'region': region,\n",
174
- " 'class': class_label\n",
175
- " }\n",
176
- "\n",
177
- "\n",
178
- "def extract_hyperparameters(model_path: Path) -> Dict[str, Any]:\n",
179
- " \"\"\"\n",
180
- " CTGAN 모델에서 하이퍼파라미터를 추출합니다.\n",
181
- " \n",
182
- " CTGAN 모델의 하이퍼파라미터는 내부 속성(_로 시작)에 저장되어 있습니다:\n",
183
- " - _embedding_dim: 임베딩 차원\n",
184
- " - _generator_dim: 생성기 네트워크 차원 (튜플)\n",
185
- " - _discriminator_dim: 판별기 네트워크 차원 (튜플)\n",
186
- " - _batch_size: 배치 크기\n",
187
- " - _epochs: 에포크 수\n",
188
- " - _pac: PAC 파라미터 (또는 pac 속성으로 접근 가능)\n",
189
- " - _generator_lr: 생성기 학습률\n",
190
- " - _discriminator_lr: 판별기 학습률\n",
191
- " - _discriminator_steps: 판별기 업데이트 스텝 수\n",
192
- " \n",
193
- " Args:\n",
194
- " model_path: 모델 파일 경로\n",
195
- " \n",
196
- " Returns:\n",
197
- " 하이퍼파라미터 딕셔너리\n",
198
- " \"\"\"\n",
199
- " try:\n",
200
- " # 모델 로드\n",
201
- " model = CTGAN.load(str(model_path))\n",
202
- " \n",
203
- " # 하이퍼파라미터 추출 (내부 속성 사용)\n",
204
- " hyperparams = {\n",
205
- " 'embedding_dim': getattr(model, '_embedding_dim', None),\n",
206
- " 'generator_dim': str(getattr(model, '_generator_dim', None)), # 튜플을 문자열로 변환\n",
207
- " 'discriminator_dim': str(getattr(model, '_discriminator_dim', None)), # 튜플을 문자열로 변환\n",
208
- " 'pac': getattr(model, 'pac', None) or getattr(model, '_pac', None), # pac 속성 또는 _pac 속성\n",
209
- " 'batch_size': getattr(model, '_batch_size', None),\n",
210
- " 'discriminator_steps': getattr(model, '_discriminator_steps', None),\n",
211
- " 'epochs': getattr(model, '_epochs', None),\n",
212
- " 'generator_lr': getattr(model, '_generator_lr', None),\n",
213
- " 'discriminator_lr': getattr(model, '_discriminator_lr', None),\n",
214
- " 'generator_decay': getattr(model, '_generator_decay', None),\n",
215
- " 'discriminator_decay': getattr(model, '_discriminator_decay', None),\n",
216
- " }\n",
217
- " \n",
218
- " return hyperparams\n",
219
- " except Exception as e:\n",
220
- " print(f\"Error loading {model_path.name}: {str(e)}\")\n",
221
- " import traceback\n",
222
- " print(traceback.format_exc())\n",
223
- " return None\n",
224
- "\n",
225
- "\n",
226
- "# 테스트: 첫 번째 모델 파일로 테스트\n",
227
- "if len(model_files) > 0:\n",
228
- " test_file = model_files[0]\n",
229
- " print(f\"테스트 파일: {test_file.name}\")\n",
230
- " parsed = parse_model_filename(test_file.name)\n",
231
- " print(f\"파싱 결과: {parsed}\")\n",
232
- " hyperparams = extract_hyperparameters(test_file)\n",
233
- " print(f\"하이퍼파라미터: {hyperparams}\")\n"
234
- ]
235
- },
236
- {
237
- "cell_type": "code",
238
- "execution_count": 6,
239
- "id": "9fc03ebe",
240
- "metadata": {},
241
- "outputs": [
242
- {
243
- "name": "stdout",
244
- "output_type": "stream",
245
- "text": [
246
- "모든 모델 파일에서 하이퍼파라미터 추출 중...\n",
247
- "================================================================================\n",
248
- "[20/216] 진행 중... (20개 성공)\n",
249
- "[40/216] 진행 중... (40개 성공)\n",
250
- "[60/216] 진행 중... (60개 성공)\n",
251
- "[80/216] 진행 중... (80개 성공)\n",
252
- "[100/216] 진행 중... (100개 성공)\n",
253
- "[120/216] 진행 중... (120개 성공)\n",
254
- "[140/216] 진행 중... (140개 성공)\n",
255
- "[160/216] 진행 중... (160개 성공)\n",
256
- "[180/216] 진행 중... (180개 성공)\n",
257
- "[200/216] 진행 중... (200개 성공)\n",
258
- "================================================================================\n",
259
- "완료! 총 216개의 모델에서 하이퍼파라미터를 추출했습니다.\n"
260
- ]
261
- }
262
- ],
263
- "source": [
264
- "# 모든 모델 파일에서 하이퍼파라미터 추출\n",
265
- "all_results = []\n",
266
- "\n",
267
- "print(\"모든 모델 파일에서 하이퍼파라미터 추출 중...\")\n",
268
- "print(\"=\" * 80)\n",
269
- "\n",
270
- "for i, model_file in enumerate(model_files, 1):\n",
271
- " # 파일명 파싱\n",
272
- " parsed_info = parse_model_filename(model_file.name)\n",
273
- " if parsed_info is None:\n",
274
- " print(f\"[{i}/{len(model_files)}] 스킵: {model_file.name} (파일명 패턴 불일치)\")\n",
275
- " continue\n",
276
- " \n",
277
- " # 하이퍼파라미터 추출\n",
278
- " hyperparams = extract_hyperparameters(model_file)\n",
279
- " if hyperparams is None:\n",
280
- " print(f\"[{i}/{len(model_files)}] 실패: {model_file.name}\")\n",
281
- " continue\n",
282
- " \n",
283
- " # 정보 합치기\n",
284
- " result = {**parsed_info, **hyperparams}\n",
285
- " result['filename'] = model_file.name\n",
286
- " all_results.append(result)\n",
287
- " \n",
288
- " if i % 20 == 0:\n",
289
- " print(f\"[{i}/{len(model_files)}] 진행 중... ({len(all_results)}개 성공)\")\n",
290
- "\n",
291
- "print(\"=\" * 80)\n",
292
- "print(f\"완료! 총 {len(all_results)}개의 모델에서 하이퍼파라미터를 추출했습니다.\")\n"
293
- ]
294
- },
295
- {
296
- "cell_type": "code",
297
- "execution_count": 7,
298
- "id": "223e2b49",
299
- "metadata": {},
300
- "outputs": [
301
- {
302
- "name": "stdout",
303
- "output_type": "stream",
304
- "text": [
305
- "총 216개의 모델 하이퍼파라미터가 정리되었습니다.\n",
306
- "\n",
307
- "컬럼: ['method', 'sample_size', 'fold', 'region', 'class', 'embedding_dim', 'generator_dim', 'discriminator_dim', 'pac', 'batch_size', 'discriminator_steps', 'epochs', 'generator_lr', 'discriminator_lr', 'filename']\n",
308
- "\n",
309
- "처음 5개 행:\n"
310
- ]
311
- },
312
- {
313
- "data": {
314
- "text/html": [
315
- "<div>\n",
316
- "<style scoped>\n",
317
- " .dataframe tbody tr th:only-of-type {\n",
318
- " vertical-align: middle;\n",
319
- " }\n",
320
- "\n",
321
- " .dataframe tbody tr th {\n",
322
- " vertical-align: top;\n",
323
- " }\n",
324
- "\n",
325
- " .dataframe thead th {\n",
326
- " text-align: right;\n",
327
- " }\n",
328
- "</style>\n",
329
- "<table border=\"1\" class=\"dataframe\">\n",
330
- " <thead>\n",
331
- " <tr style=\"text-align: right;\">\n",
332
- " <th></th>\n",
333
- " <th>method</th>\n",
334
- " <th>sample_size</th>\n",
335
- " <th>fold</th>\n",
336
- " <th>region</th>\n",
337
- " <th>class</th>\n",
338
- " <th>embedding_dim</th>\n",
339
- " <th>generator_dim</th>\n",
340
- " <th>discriminator_dim</th>\n",
341
- " <th>pac</th>\n",
342
- " <th>batch_size</th>\n",
343
- " <th>discriminator_steps</th>\n",
344
- " <th>epochs</th>\n",
345
- " <th>generator_lr</th>\n",
346
- " <th>discriminator_lr</th>\n",
347
- " <th>filename</th>\n",
348
- " </tr>\n",
349
- " </thead>\n",
350
- " <tbody>\n",
351
- " <tr>\n",
352
- " <th>0</th>\n",
353
- " <td>ctgan</td>\n",
354
- " <td>7000</td>\n",
355
- " <td>1</td>\n",
356
- " <td>busan</td>\n",
357
- " <td>0</td>\n",
358
- " <td>78</td>\n",
359
- " <td>(128, 128)</td>\n",
360
- " <td>(128, 128)</td>\n",
361
- " <td>8</td>\n",
362
- " <td>256</td>\n",
363
- " <td>3</td>\n",
364
- " <td>300</td>\n",
365
- " <td>0.0002</td>\n",
366
- " <td>0.0002</td>\n",
367
- " <td>ctgan_only_7000_1_busan_class0.pkl</td>\n",
368
- " </tr>\n",
369
- " <tr>\n",
370
- " <th>1</th>\n",
371
- " <td>ctgan</td>\n",
372
- " <td>7000</td>\n",
373
- " <td>1</td>\n",
374
- " <td>busan</td>\n",
375
- " <td>1</td>\n",
376
- " <td>269</td>\n",
377
- " <td>(256, 256)</td>\n",
378
- " <td>(128, 128)</td>\n",
379
- " <td>4</td>\n",
380
- " <td>1024</td>\n",
381
- " <td>1</td>\n",
382
- " <td>300</td>\n",
383
- " <td>0.0002</td>\n",
384
- " <td>0.0002</td>\n",
385
- " <td>ctgan_only_7000_1_busan_class1.pkl</td>\n",
386
- " </tr>\n",
387
- " <tr>\n",
388
- " <th>2</th>\n",
389
- " <td>ctgan</td>\n",
390
- " <td>7000</td>\n",
391
- " <td>1</td>\n",
392
- " <td>daegu</td>\n",
393
- " <td>0</td>\n",
394
- " <td>121</td>\n",
395
- " <td>(128, 128)</td>\n",
396
- " <td>(64, 64)</td>\n",
397
- " <td>4</td>\n",
398
- " <td>64</td>\n",
399
- " <td>2</td>\n",
400
- " <td>300</td>\n",
401
- " <td>0.0002</td>\n",
402
- " <td>0.0002</td>\n",
403
- " <td>ctgan_only_7000_1_daegu_class0.pkl</td>\n",
404
- " </tr>\n",
405
- " <tr>\n",
406
- " <th>3</th>\n",
407
- " <td>ctgan</td>\n",
408
- " <td>7000</td>\n",
409
- " <td>1</td>\n",
410
- " <td>daegu</td>\n",
411
- " <td>1</td>\n",
412
- " <td>217</td>\n",
413
- " <td>(128, 128)</td>\n",
414
- " <td>(128, 128)</td>\n",
415
- " <td>4</td>\n",
416
- " <td>256</td>\n",
417
- " <td>5</td>\n",
418
- " <td>300</td>\n",
419
- " <td>0.0002</td>\n",
420
- " <td>0.0002</td>\n",
421
- " <td>ctgan_only_7000_1_daegu_class1.pkl</td>\n",
422
- " </tr>\n",
423
- " <tr>\n",
424
- " <th>4</th>\n",
425
- " <td>ctgan</td>\n",
426
- " <td>7000</td>\n",
427
- " <td>1</td>\n",
428
- " <td>daejeon</td>\n",
429
- " <td>0</td>\n",
430
- " <td>101</td>\n",
431
- " <td>(128, 128)</td>\n",
432
- " <td>(128, 128)</td>\n",
433
- " <td>4</td>\n",
434
- " <td>128</td>\n",
435
- " <td>2</td>\n",
436
- " <td>300</td>\n",
437
- " <td>0.0002</td>\n",
438
- " <td>0.0002</td>\n",
439
- " <td>ctgan_only_7000_1_daejeon_class0.pkl</td>\n",
440
- " </tr>\n",
441
- " </tbody>\n",
442
- "</table>\n",
443
- "</div>"
444
- ],
445
- "text/plain": [
446
- " method sample_size fold region class embedding_dim generator_dim \\\n",
447
- "0 ctgan 7000 1 busan 0 78 (128, 128) \n",
448
- "1 ctgan 7000 1 busan 1 269 (256, 256) \n",
449
- "2 ctgan 7000 1 daegu 0 121 (128, 128) \n",
450
- "3 ctgan 7000 1 daegu 1 217 (128, 128) \n",
451
- "4 ctgan 7000 1 daejeon 0 101 (128, 128) \n",
452
- "\n",
453
- " discriminator_dim pac batch_size discriminator_steps epochs \\\n",
454
- "0 (128, 128) 8 256 3 300 \n",
455
- "1 (128, 128) 4 1024 1 300 \n",
456
- "2 (64, 64) 4 64 2 300 \n",
457
- "3 (128, 128) 4 256 5 300 \n",
458
- "4 (128, 128) 4 128 2 300 \n",
459
- "\n",
460
- " generator_lr discriminator_lr filename \n",
461
- "0 0.0002 0.0002 ctgan_only_7000_1_busan_class0.pkl \n",
462
- "1 0.0002 0.0002 ctgan_only_7000_1_busan_class1.pkl \n",
463
- "2 0.0002 0.0002 ctgan_only_7000_1_daegu_class0.pkl \n",
464
- "3 0.0002 0.0002 ctgan_only_7000_1_daegu_class1.pkl \n",
465
- "4 0.0002 0.0002 ctgan_only_7000_1_daejeon_class0.pkl "
466
- ]
467
- },
468
- "execution_count": 7,
469
- "metadata": {},
470
- "output_type": "execute_result"
471
- }
472
- ],
473
- "source": [
474
- "# DataFrame으로 변환\n",
475
- "df_hyperparams = pd.DataFrame(all_results)\n",
476
- "\n",
477
- "# 컬럼 순서 정리\n",
478
- "column_order = [\n",
479
- " 'method', 'sample_size', 'fold', 'region', 'class',\n",
480
- " 'embedding_dim', 'generator_dim', 'discriminator_dim',\n",
481
- " 'pac', 'batch_size', 'discriminator_steps',\n",
482
- " 'epochs', 'generator_lr', 'discriminator_lr',\n",
483
- " 'filename'\n",
484
- "]\n",
485
- "df_hyperparams = df_hyperparams[column_order]\n",
486
- "\n",
487
- "# 정렬: method -> sample_size -> fold -> region -> class\n",
488
- "df_hyperparams = df_hyperparams.sort_values(\n",
489
- " ['method', 'sample_size', 'fold', 'region', 'class']\n",
490
- ").reset_index(drop=True)\n",
491
- "\n",
492
- "print(f\"총 {len(df_hyperparams)}개의 모델 하이퍼파라미터가 정리되었습니다.\")\n",
493
- "print(f\"\\n컬럼: {list(df_hyperparams.columns)}\")\n",
494
- "print(f\"\\n처음 5개 행:\")\n",
495
- "df_hyperparams.head()\n"
496
- ]
497
- },
498
- {
499
- "cell_type": "code",
500
- "execution_count": 17,
501
- "id": "9d3a8a65",
502
- "metadata": {},
503
- "outputs": [],
504
- "source": [
505
- "df_hyperparams.sort_values(by=['region','method','sample_size','fold','class'], inplace=True)"
506
- ]
507
- },
508
- {
509
- "cell_type": "code",
510
- "execution_count": 24,
511
- "id": "f92f352e",
512
- "metadata": {},
513
- "outputs": [
514
- {
515
- "name": "stdout",
516
- "output_type": "stream",
517
- "text": [
518
- "하이퍼파라미터 데이터가 'oversampling_models_hyperparameters_all.csv'에 저장되었습니다.\n"
519
- ]
520
- }
521
- ],
522
- "source": [
523
- "# CSV로 저장 (선택사항)\n",
524
- "output_csv = \"oversampling_models_hyperparameters_all.csv\"\n",
525
- "df_hyperparams.to_csv(output_csv, index=False, encoding='utf-8-sig')\n",
526
- "print(f\"하이퍼파라미터 데이터가 '{output_csv}'에 저장되었습니다.\")"
527
- ]
528
- },
529
- {
530
- "cell_type": "code",
531
- "execution_count": 25,
532
- "id": "8ee1c56a",
533
- "metadata": {},
534
- "outputs": [
535
- {
536
- "data": {
537
- "text/plain": [
538
- "ctgan 108\n",
539
- "smotenc_ctgan 108\n",
540
- "Name: method, dtype: int64"
541
- ]
542
- },
543
- "execution_count": 25,
544
- "metadata": {},
545
- "output_type": "execute_result"
546
- }
547
- ],
548
- "source": [
549
- "df_hyperparams['method'].value_counts()"
550
- ]
551
- }
552
- ],
553
- "metadata": {
554
- "kernelspec": {
555
- "display_name": "py39",
556
- "language": "python",
557
- "name": "python3"
558
- },
559
- "language_info": {
560
- "codemirror_mode": {
561
- "name": "ipython",
562
- "version": 3
563
- },
564
- "file_extension": ".py",
565
- "mimetype": "text/x-python",
566
- "name": "python",
567
- "nbconvert_exporter": "python",
568
- "pygments_lexer": "ipython3",
569
- "version": "3.9.18"
570
- }
571
- },
572
- "nbformat": 4,
573
- "nbformat_minor": 5
574
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac95f90bc473f127749903da0a1645bc2554566dc7d786d4515c77a811677e46
3
+ size 21101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Analysis_code/3.sampled_data_analysis/oversampling_models_hyperparameters_all.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c59aa728c79076dc833be3d19a82763bfc723148cdac08f124b2a1dd1f9357a1
3
+ size 25991
Analysis_code/4.oversampling_data_test/analysis_for_oversampling_data.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
Analysis_code/4.oversampling_data_test/lgb_sampled_test.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
Analysis_code/4.oversampling_data_test/xgb_sampled_test.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
Analysis_code/6.optima_models_analysis/best_samples_best_datasample_per_model_per_region.csv CHANGED
@@ -1,31 +1,3 @@
1
- model,data_sample,region,optimization_library,best_csi,n_trials,best_params
2
- ft_transformer,ctgan10000,seoul,optuna,0.5936576575682742,100,"{'d_token': 224, 'n_blocks': 4, 'n_heads': 8, 'attention_dropout': 0.15689079322421481, 'ffn_dropout': 0.27970845546044715, 'lr': 0.00011858823893766563, 'weight_decay': 0.02942637540327823, 'batch_size': 64}"
3
- xgb,smote,seoul,hyperopt,0.5914,100,"{'colsample_bytree': 0.9204650350059359, 'gamma': 0.006794691671452219, 'learning_rate': 0.08811009219503893, 'max_depth': 12.0, 'min_child_weight': 3.0, 'reg_alpha': 0.6690476963880017, 'reg_lambda': 0.5388102924254845, 'subsample': 0.8320339664880525}"
4
- lgb,smote,seoul,hyperopt,0.5895,100,"{'colsample_bytree': 0.7712993813350446, 'learning_rate': 0.014764448164722338, 'max_depth': 13.0, 'min_child_weight': 2.0, 'num_leaves': 144.0, 'reg_alpha': 0.9375760221934153, 'reg_lambda': 0.6881109653195318, 'subsample': 0.6932106255794361}"
5
- resnet_like,ctgan10000,seoul,optuna,0.5889744558864985,100,"{'d_main': 160, 'd_hidden': 128, 'n_blocks': 4, 'dropout_first': 0.24169683403631037, 'dropout_second': 0.018646743579449815, 'lr': 0.003891520111503718, 'weight_decay': 0.09736563298725749, 'batch_size': 128}"
6
- deepgbm,smotenc_ctgan20000,seoul,optuna,0.5657974901513136,100,"{'d_main': 128, 'd_hidden': 64, 'n_blocks': 5, 'dropout': 0.1024240157205574, 'lr': 0.009019177625524915, 'weight_decay': 0.08874117499066106, 'batch_size': 256}"
7
- xgb,smote,incheon,hyperopt,0.6,100,"{'colsample_bytree': 0.8863531635625073, 'gamma': 1.4432252696586687, 'learning_rate': 0.14431831840673584, 'max_depth': 4.0, 'min_child_weight': 4.0, 'reg_alpha': 0.7656890601027424, 'reg_lambda': 0.5796745106013773, 'subsample': 0.8862819830666011}"
8
- lgb,smote,incheon,hyperopt,0.5986,100,"{'colsample_bytree': 0.7149911519913482, 'learning_rate': 0.14061649313221522, 'max_depth': 4.0, 'min_child_weight': 4.0, 'num_leaves': 46.0, 'reg_alpha': 0.3323739596170201, 'reg_lambda': 0.3615804769440283, 'subsample': 0.7361106038020775}"
9
- resnet_like,ctgan10000,incheon,optuna,0.5876200434398301,100,"{'d_main': 160, 'd_hidden': 192, 'n_blocks': 3, 'dropout_first': 0.213366405042877, 'dropout_second': 0.0616930432275245, 'lr': 0.005092968501532562, 'weight_decay': 0.06153947659623341, 'batch_size': 256}"
10
- ft_transformer,smote,incheon,optuna,0.5674050423289939,100,"{'d_token': 96, 'n_blocks': 3, 'n_heads': 8, 'attention_dropout': 0.3637871224837107, 'ffn_dropout': 0.11479322703553738, 'lr': 0.0003009840584939789, 'weight_decay': 0.0003336039035587163, 'batch_size': 32}"
11
- deepgbm,ctgan10000,incheon,optuna,0.5644485264432356,100,"{'d_main': 64, 'd_hidden': 192, 'n_blocks': 5, 'dropout': 0.16846849111235224, 'lr': 0.007871644587352598, 'weight_decay': 0.0005399258093557023, 'batch_size': 128}"
12
- xgb,smote,gwangju,hyperopt,0.53,100,"{'colsample_bytree': 0.7658195937298418, 'gamma': 1.040884657831581, 'learning_rate': 0.04553328563585195, 'max_depth': 7.0, 'min_child_weight': 12.0, 'reg_alpha': 0.8031012977426317, 'reg_lambda': 0.6205464163959697, 'subsample': 0.6524796151581305}"
13
- lgb,smote,gwangju,hyperopt,0.5297,100,"{'colsample_bytree': 0.9919060649789312, 'learning_rate': 0.054631157314326724, 'max_depth': 15.0, 'min_child_weight': 3.0, 'num_leaves': 47.0, 'reg_alpha': 0.9190252546800255, 'reg_lambda': 0.8800706832709921, 'subsample': 0.7859941375783913}"
14
- deepgbm,ctgan10000,gwangju,optuna,0.5204031176113428,100,"{'d_main': 128, 'd_hidden': 192, 'n_blocks': 6, 'dropout': 0.3938212564993552, 'lr': 0.007164979269975063, 'weight_decay': 0.0923681134285374, 'batch_size': 64}"
15
- resnet_like,ctgan10000,gwangju,optuna,0.510302842874457,100,"{'d_main': 64, 'd_hidden': 320, 'n_blocks': 3, 'dropout_first': 0.29522157561687634, 'dropout_second': 0.15684305218104422, 'lr': 0.008180657303015811, 'weight_decay': 0.01303718192830624, 'batch_size': 128}"
16
- ft_transformer,ctgan10000,gwangju,optuna,0.5052817328725289,100,"{'d_token': 160, 'n_blocks': 3, 'n_heads': 4, 'attention_dropout': 0.21128609103276186, 'ffn_dropout': 0.18610731195171396, 'lr': 0.0019139767886005993, 'weight_decay': 0.03127877612669642, 'batch_size': 128}"
17
- xgb,smote,daejeon,hyperopt,0.5371,100,"{'colsample_bytree': 0.733236256331133, 'gamma': 0.7990977235867733, 'learning_rate': 0.17558281930946487, 'max_depth': 9.0, 'min_child_weight': 11.0, 'reg_alpha': 0.1596833778659402, 'reg_lambda': 0.9170555745286906, 'subsample': 0.6403574066792026}"
18
- lgb,smote,daejeon,hyperopt,0.5317,100,"{'colsample_bytree': 0.7585295616897205, 'learning_rate': 0.012807299958074884, 'max_depth': 8.0, 'min_child_weight': 2.0, 'num_leaves': 149.0, 'reg_alpha': 0.8175154308532824, 'reg_lambda': 0.7481509687757377, 'subsample': 0.8155067304500027}"
19
- resnet_like,ctgan10000,daejeon,optuna,0.5101768615009369,100,"{'d_main': 128, 'd_hidden': 256, 'n_blocks': 5, 'dropout_first': 0.1381181811099212, 'dropout_second': 0.11702484025760711, 'lr': 0.0064016726045039805, 'weight_decay': 0.004366638608686326, 'batch_size': 256}"
20
- deepgbm,ctgan10000,daejeon,optuna,0.5101248146449113,100,"{'d_main': 64, 'd_hidden': 256, 'n_blocks': 2, 'dropout': 0.2845653149911174, 'lr': 0.0030479748817488737, 'weight_decay': 0.08478209494184558, 'batch_size': 32}"
21
- ft_transformer,ctgan10000,daejeon,optuna,0.5026041056392309,100,"{'d_token': 64, 'n_blocks': 5, 'n_heads': 8, 'attention_dropout': 0.39639878146052787, 'ffn_dropout': 0.16243660840447227, 'lr': 0.0005258652715359098, 'weight_decay': 0.06319928258911829, 'batch_size': 128}"
22
- xgb,smote,daegu,hyperopt,0.4672,100,"{'colsample_bytree': 0.8132816721507904, 'gamma': 0.9002659162503241, 'learning_rate': 0.04046864452016672, 'max_depth': 4.0, 'min_child_weight': 17.0, 'reg_alpha': 0.4681545450085154, 'reg_lambda': 0.531313515098387, 'subsample': 0.827198506312037}"
23
- lgb,smote,daegu,hyperopt,0.4671,100,"{'colsample_bytree': 0.999946333457191, 'learning_rate': 0.07031680296643952, 'max_depth': 4.0, 'min_child_weight': 17.0, 'num_leaves': 32.0, 'reg_alpha': 0.055815317687804816, 'reg_lambda': 0.2293760134119255, 'subsample': 0.6363907923464539}"
24
- resnet_like,ctgan10000,daegu,optuna,0.46086300130604796,100,"{'d_main': 96, 'd_hidden': 256, 'n_blocks': 3, 'dropout_first': 0.27926914874893893, 'dropout_second': 0.13114004557533837, 'lr': 0.004133395383387492, 'weight_decay': 0.05462768451276688, 'batch_size': 32}"
25
- ft_transformer,ctgan10000,daegu,optuna,0.44918319157422554,100,"{'d_token': 128, 'n_blocks': 4, 'n_heads': 8, 'attention_dropout': 0.1615322006432558, 'ffn_dropout': 0.14353691142809796, 'lr': 0.00025225999310114116, 'weight_decay': 0.002948085679186959, 'batch_size': 32}"
26
- deepgbm,ctgan10000,daegu,optuna,0.4390250453058284,100,"{'d_main': 64, 'd_hidden': 192, 'n_blocks': 2, 'dropout': 0.29112938728448373, 'lr': 0.002745246324742509, 'weight_decay': 0.07823286969698617, 'batch_size': 32}"
27
- ft_transformer,ctgan10000,busan,optuna,0.4960458104166546,100,"{'d_token': 224, 'n_blocks': 2, 'n_heads': 8, 'attention_dropout': 0.3873943566967484, 'ffn_dropout': 0.14296280926606936, 'lr': 0.0007665967810279031, 'weight_decay': 0.00878158688959246, 'batch_size': 32}"
28
- xgb,pure,busan,hyperopt,0.4949,100,"{'colsample_bytree': 0.8651175745135303, 'gamma': 2.0220518303820976, 'learning_rate': 0.04196437449161767, 'max_depth': 7.0, 'min_child_weight': 17.0, 'reg_alpha': 0.9213159636887744, 'reg_lambda': 0.9407811453878014, 'subsample': 0.7200034080497129}"
29
- resnet_like,ctgan10000,busan,optuna,0.49363300915248276,100,"{'d_main': 128, 'd_hidden': 512, 'n_blocks': 2, 'dropout_first': 0.3784300740258752, 'dropout_second': 0.026029354045211155, 'lr': 0.008483242466300268, 'weight_decay': 0.00016367394584020504, 'batch_size': 128}"
30
- lgb,pure,busan,hyperopt,0.4849,100,"{'colsample_bytree': 0.9406061312055983, 'learning_rate': 0.17468151642796886, 'max_depth': 6.0, 'min_child_weight': 17.0, 'num_leaves': 138.0, 'reg_alpha': 0.4593420461637059, 'reg_lambda': 0.5948987302333338, 'subsample': 0.6557839186758097}"
31
- deepgbm,ctgan10000,busan,optuna,0.475435864212341,100,"{'d_main': 128, 'd_hidden': 128, 'n_blocks': 2, 'dropout': 0.1477619274404685, 'lr': 0.00606559205480389, 'weight_decay': 0.08582651929034574, 'batch_size': 64}"
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea0e00ff77e5c9125b0745c5fb189e75ca419dc2c18495c79238ae6fceccdbae
3
+ size 8099
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Analysis_code/6.optima_models_analysis/extract_result_from_omptimized_models.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
Analysis_code/6.optima_models_analysis/optimization_result.csv CHANGED
@@ -1,121 +1,3 @@
1
- model,data_sample,region,optimization_library,best_csi,n_trials,best_params
2
- ft_transformer,smote,busan,optuna,0.4524714686603366,100,"{'d_token': 96, 'n_blocks': 2, 'n_heads': 4, 'attention_dropout': 0.39058519125437824, 'ffn_dropout': 0.35781462874340314, 'lr': 0.0006705029713893173, 'weight_decay': 0.0001426209011329072, 'batch_size': 32}"
3
- ft_transformer,pure,daegu,optuna,0.3846594392002736,100,"{'d_token': 224, 'n_blocks': 2, 'n_heads': 8, 'attention_dropout': 0.2747124536980523, 'ffn_dropout': 0.1909754518903343, 'lr': 0.00016739997447268043, 'weight_decay': 0.027811856175591093, 'batch_size': 32}"
4
- xgb,smotenc_ctgan20000,daegu,hyperopt,0.4277,100,"{'colsample_bytree': 0.7114033147242765, 'gamma': 0.9625212091684073, 'learning_rate': 0.029642239831837253, 'max_depth': 12.0, 'min_child_weight': 10.0, 'reg_alpha': 0.1587708255877442, 'reg_lambda': 0.4648399906011745, 'subsample': 0.90006374997474}"
5
- resnet_like,pure,seoul,optuna,0.5566756722322981,100,"{'d_main': 96, 'd_hidden': 512, 'n_blocks': 2, 'dropout_first': 0.3834324386470386, 'dropout_second': 0.029813309707734794, 'lr': 0.0005002634113960448, 'weight_decay': 0.05408195750724482, 'batch_size': 32}"
6
- resnet_like,smote,gwangju,optuna,0.48671080174496345,100,"{'d_main': 96, 'd_hidden': 64, 'n_blocks': 4, 'dropout_first': 0.2808602138036661, 'dropout_second': 0.14619910058769206, 'lr': 0.0055368090707776305, 'weight_decay': 0.001341561695377514, 'batch_size': 256}"
7
- resnet_like,smotenc_ctgan20000,busan,optuna,0.45568873098715307,100,"{'d_main': 256, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.1045169105881141, 'dropout_second': 0.05372596450552944, 'lr': 0.00943776593390798, 'weight_decay': 0.0010575459741554466, 'batch_size': 128}"
8
- ft_transformer,smote,daegu,optuna,0.40272068323222654,100,"{'d_token': 96, 'n_blocks': 3, 'n_heads': 8, 'attention_dropout': 0.2530528162951749, 'ffn_dropout': 0.1899328578250674, 'lr': 0.0013866055842525612, 'weight_decay': 0.00020752948982868583, 'batch_size': 64}"
9
- xgb,ctgan10000,daejeon,hyperopt,0.5029,100,"{'colsample_bytree': 0.7816067762051363, 'gamma': 0.5799629095615334, 'learning_rate': 0.06287412401679013, 'max_depth': 3.0, 'min_child_weight': 5.0, 'reg_alpha': 0.36731169488772253, 'reg_lambda': 0.03192020366519255, 'subsample': 0.6004605601176048}"
10
- resnet_like,smotenc_ctgan20000,gwangju,optuna,0.4937343167803819,100,"{'d_main': 64, 'd_hidden': 256, 'n_blocks': 3, 'dropout_first': 0.3792559101674334, 'dropout_second': 0.19251437428644594, 'lr': 0.00405325576056642, 'weight_decay': 0.0005628495861261046, 'batch_size': 128}"
11
- lgb,smote,seoul,hyperopt,0.5895,100,"{'colsample_bytree': 0.7712993813350446, 'learning_rate': 0.014764448164722338, 'max_depth': 13.0, 'min_child_weight': 2.0, 'num_leaves': 144.0, 'reg_alpha': 0.9375760221934153, 'reg_lambda': 0.6881109653195318, 'subsample': 0.6932106255794361}"
12
- ft_transformer,pure,busan,optuna,0.44685896765010097,100,"{'d_token': 192, 'n_blocks': 3, 'n_heads': 8, 'attention_dropout': 0.11973480060658612, 'ffn_dropout': 0.15614454735278546, 'lr': 0.00016413327466283583, 'weight_decay': 0.002548468041818763, 'batch_size': 64}"
13
- ft_transformer,ctgan10000,gwangju,optuna,0.5052817328725289,100,"{'d_token': 160, 'n_blocks': 3, 'n_heads': 4, 'attention_dropout': 0.21128609103276186, 'ffn_dropout': 0.18610731195171396, 'lr': 0.0019139767886005993, 'weight_decay': 0.03127877612669642, 'batch_size': 128}"
14
- xgb,ctgan10000,gwangju,hyperopt,0.4999,100,"{'colsample_bytree': 0.9825715751506052, 'gamma': 0.029399796024525915, 'learning_rate': 0.08575762330828458, 'max_depth': 11.0, 'min_child_weight': 8.0, 'reg_alpha': 0.8534545581720909, 'reg_lambda': 0.43478744549007325, 'subsample': 0.7629247503643918}"
15
- resnet_like,smote,daejeon,optuna,0.48175460460533026,100,"{'d_main': 192, 'd_hidden': 64, 'n_blocks': 4, 'dropout_first': 0.3137027413848088, 'dropout_second': 0.18760941198412298, 'lr': 0.00565595693998524, 'weight_decay': 0.04866049178978196, 'batch_size': 128}"
16
- ft_transformer,smotenc_ctgan20000,busan,optuna,0.4767443843216419,100,"{'d_token': 128, 'n_blocks': 2, 'n_heads': 8, 'attention_dropout': 0.1711218539171086, 'ffn_dropout': 0.2636519595986711, 'lr': 0.004358575253869887, 'weight_decay': 0.00046754184762985804, 'batch_size': 256}"
17
- xgb,ctgan10000,daegu,hyperopt,0.4392,100,"{'colsample_bytree': 0.8557253370653987, 'gamma': 1.1563957561197729, 'learning_rate': 0.19232018363683898, 'max_depth': 10.0, 'min_child_weight': 8.0, 'reg_alpha': 0.21149066093980878, 'reg_lambda': 0.2690618200166374, 'subsample': 0.714526167464626}"
18
- deepgbm,smote,gwangju,optuna,0.45804386918458945,100,"{'d_main': 160, 'd_hidden': 64, 'n_blocks': 2, 'dropout': 0.1698480826585593, 'lr': 0.0003762961039889282, 'weight_decay': 0.0014343061148430866, 'batch_size': 64}"
19
- ft_transformer,ctgan10000,seoul,optuna,0.5936576575682742,100,"{'d_token': 224, 'n_blocks': 4, 'n_heads': 8, 'attention_dropout': 0.15689079322421481, 'ffn_dropout': 0.27970845546044715, 'lr': 0.00011858823893766563, 'weight_decay': 0.02942637540327823, 'batch_size': 64}"
20
- lgb,smote,daejeon,hyperopt,0.5317,100,"{'colsample_bytree': 0.7585295616897205, 'learning_rate': 0.012807299958074884, 'max_depth': 8.0, 'min_child_weight': 2.0, 'num_leaves': 149.0, 'reg_alpha': 0.8175154308532824, 'reg_lambda': 0.7481509687757377, 'subsample': 0.8155067304500027}"
21
- resnet_like,pure,incheon,optuna,0.5717111423727251,100,"{'d_main': 224, 'd_hidden': 256, 'n_blocks': 4, 'dropout_first': 0.3502671083503836, 'dropout_second': 0.15938013319711236, 'lr': 0.0006801289543741389, 'weight_decay': 0.005292744372677132, 'batch_size': 128}"
22
- lgb,ctgan10000,daejeon,hyperopt,0.4908,100,"{'colsample_bytree': 0.7077604272501928, 'learning_rate': 0.10351387699107398, 'max_depth': 6.0, 'min_child_weight': 4.0, 'num_leaves': 51.0, 'reg_alpha': 0.06973941883143871, 'reg_lambda': 0.8477821589656351, 'subsample': 0.8664583588640111}"
23
- lgb,smotenc_ctgan20000,daegu,hyperopt,0.4455,100,"{'colsample_bytree': 0.9935566129264934, 'learning_rate': 0.011803766157843702, 'max_depth': 10.0, 'min_child_weight': 18.0, 'num_leaves': 78.0, 'reg_alpha': 0.5555179443217245, 'reg_lambda': 0.23478947295729824, 'subsample': 0.7059612447576378}"
24
- deepgbm,smotenc_ctgan20000,busan,optuna,0.40671898361820197,100,"{'d_main': 64, 'd_hidden': 256, 'n_blocks': 3, 'dropout': 0.2843714965028271, 'lr': 0.009315324608427497, 'weight_decay': 0.022174119634941862, 'batch_size': 128}"
25
- ft_transformer,smotenc_ctgan20000,daejeon,optuna,0.4794390839030278,100,"{'d_token': 64, 'n_blocks': 3, 'n_heads': 4, 'attention_dropout': 0.13096961306141697, 'ffn_dropout': 0.10368225384926379, 'lr': 0.0008212785631177437, 'weight_decay': 0.0007599672775598784, 'batch_size': 32}"
26
- lgb,ctgan10000,busan,hyperopt,0.4836,100,"{'colsample_bytree': 0.7243835590014314, 'learning_rate': 0.052472053724070156, 'max_depth': 15.0, 'min_child_weight': 9.0, 'num_leaves': 120.0, 'reg_alpha': 0.566895668532905, 'reg_lambda': 0.9659771198744264, 'subsample': 0.8425484904296862}"
27
- ft_transformer,pure,daejeon,optuna,0.4655886251588111,100,"{'d_token': 64, 'n_blocks': 4, 'n_heads': 8, 'attention_dropout': 0.24759508454949322, 'ffn_dropout': 0.2013907953941948, 'lr': 0.0003711331440914647, 'weight_decay': 0.06954769328501528, 'batch_size': 32}"
28
- xgb,pure,gwangju,hyperopt,0.5016,100,"{'colsample_bytree': 0.9713207386536029, 'gamma': 2.2482753887012703, 'learning_rate': 0.16732973947259167, 'max_depth': 8.0, 'min_child_weight': 3.0, 'reg_alpha': 0.2664406256084806, 'reg_lambda': 0.7263114796775476, 'subsample': 0.6483131273031051}"
29
- ft_transformer,pure,seoul,optuna,0.562070126103511,100,"{'d_token': 256, 'n_blocks': 2, 'n_heads': 4, 'attention_dropout': 0.2855445640312001, 'ffn_dropout': 0.20563167448836292, 'lr': 7.430025637172839e-05, 'weight_decay': 0.012136192435211931, 'batch_size': 32}"
30
- resnet_like,smote,busan,optuna,0.4473603019416436,100,"{'d_main': 128, 'd_hidden': 256, 'n_blocks': 5, 'dropout_first': 0.37677375956516684, 'dropout_second': 0.18705700465884292, 'lr': 0.005004477317296484, 'weight_decay': 0.00012190453086381686, 'batch_size': 64}"
31
- deepgbm,pure,incheon,optuna,0.5622375492647842,100,"{'d_main': 64, 'd_hidden': 256, 'n_blocks': 2, 'dropout': 0.3553750103803738, 'lr': 0.0017038392317957017, 'weight_decay': 0.04010324241876258, 'batch_size': 32}"
32
- ft_transformer,smote,gwangju,optuna,0.48424711598330084,100,"{'d_token': 96, 'n_blocks': 4, 'n_heads': 8, 'attention_dropout': 0.19167241349972552, 'ffn_dropout': 0.10372384139481815, 'lr': 0.0003028578585093515, 'weight_decay': 0.021302792376896054, 'batch_size': 32}"
33
- lgb,pure,busan,hyperopt,0.4849,100,"{'colsample_bytree': 0.9406061312055983, 'learning_rate': 0.17468151642796886, 'max_depth': 6.0, 'min_child_weight': 17.0, 'num_leaves': 138.0, 'reg_alpha': 0.4593420461637059, 'reg_lambda': 0.5948987302333338, 'subsample': 0.6557839186758097}"
34
- xgb,ctgan10000,seoul,hyperopt,0.5824,100,"{'colsample_bytree': 0.8837225390968168, 'gamma': 0.1115044781500254, 'learning_rate': 0.10805110293567466, 'max_depth': 12.0, 'min_child_weight': 4.0, 'reg_alpha': 0.02183712172236562, 'reg_lambda': 0.6932207560084631, 'subsample': 0.6767341133785678}"
35
- resnet_like,ctgan10000,gwangju,optuna,0.510302842874457,100,"{'d_main': 64, 'd_hidden': 320, 'n_blocks': 3, 'dropout_first': 0.29522157561687634, 'dropout_second': 0.15684305218104422, 'lr': 0.008180657303015811, 'weight_decay': 0.01303718192830624, 'batch_size': 128}"
36
- deepgbm,pure,busan,optuna,0.4429286988285861,100,"{'d_main': 192, 'd_hidden': 64, 'n_blocks': 3, 'dropout': 0.2275228702990485, 'lr': 0.002636923316902024, 'weight_decay': 0.05132270066782015, 'batch_size': 32}"
37
- ft_transformer,ctgan10000,busan,optuna,0.4960458104166546,100,"{'d_token': 224, 'n_blocks': 2, 'n_heads': 8, 'attention_dropout': 0.3873943566967484, 'ffn_dropout': 0.14296280926606936, 'lr': 0.0007665967810279031, 'weight_decay': 0.00878158688959246, 'batch_size': 32}"
38
- resnet_like,ctgan10000,daejeon,optuna,0.5101768615009369,100,"{'d_main': 128, 'd_hidden': 256, 'n_blocks': 5, 'dropout_first': 0.1381181811099212, 'dropout_second': 0.11702484025760711, 'lr': 0.0064016726045039805, 'weight_decay': 0.004366638608686326, 'batch_size': 256}"
39
- resnet_like,smotenc_ctgan20000,incheon,optuna,0.5707798446437679,100,"{'d_main': 224, 'd_hidden': 256, 'n_blocks': 4, 'dropout_first': 0.3626907864360457, 'dropout_second': 0.08738106548329602, 'lr': 0.005205322934309404, 'weight_decay': 0.0002577881849067971, 'batch_size': 256}"
40
- xgb,pure,daegu,hyperopt,0.4409,100,"{'colsample_bytree': 0.8800494992202731, 'gamma': 0.28651615767316957, 'learning_rate': 0.025526450870185433, 'max_depth': 3.0, 'min_child_weight': 5.0, 'reg_alpha': 0.6787273055071508, 'reg_lambda': 0.6641153401816423, 'subsample': 0.7222783789369407}"
41
- ft_transformer,smotenc_ctgan20000,daegu,optuna,0.4488825045464337,100,"{'d_token': 64, 'n_blocks': 3, 'n_heads': 4, 'attention_dropout': 0.270864990314075, 'ffn_dropout': 0.3735925199999471, 'lr': 0.0016588396308948813, 'weight_decay': 0.00014669112268617685, 'batch_size': 128}"
42
- resnet_like,ctgan10000,daegu,optuna,0.46086300130604796,100,"{'d_main': 96, 'd_hidden': 256, 'n_blocks': 3, 'dropout_first': 0.27926914874893893, 'dropout_second': 0.13114004557533837, 'lr': 0.004133395383387492, 'weight_decay': 0.05462768451276688, 'batch_size': 32}"
43
- deepgbm,smote,daegu,optuna,0.3991148650568916,100,"{'d_main': 64, 'd_hidden': 192, 'n_blocks': 5, 'dropout': 0.15327234825163508, 'lr': 0.0021257323880618864, 'weight_decay': 0.06824750948093047, 'batch_size': 32}"
44
- deepgbm,smote,seoul,optuna,0.5551122990255606,100,"{'d_main': 64, 'd_hidden': 64, 'n_blocks': 2, 'dropout': 0.2869842021080176, 'lr': 0.002077449117732186, 'weight_decay': 0.00019614736051849963, 'batch_size': 32}"
45
- lgb,smote,incheon,hyperopt,0.5986,100,"{'colsample_bytree': 0.7149911519913482, 'learning_rate': 0.14061649313221522, 'max_depth': 4.0, 'min_child_weight': 4.0, 'num_leaves': 46.0, 'reg_alpha': 0.3323739596170201, 'reg_lambda': 0.3615804769440283, 'subsample': 0.7361106038020775}"
46
- deepgbm,ctgan10000,daejeon,optuna,0.5101248146449113,100,"{'d_main': 64, 'd_hidden': 256, 'n_blocks': 2, 'dropout': 0.2845653149911174, 'lr': 0.0030479748817488737, 'weight_decay': 0.08478209494184558, 'batch_size': 32}"
47
- resnet_like,smote,seoul,optuna,0.5589425055467122,100,"{'d_main': 128, 'd_hidden': 512, 'n_blocks': 3, 'dropout_first': 0.3304233130420639, 'dropout_second': 0.14346618645388706, 'lr': 0.006514721583103225, 'weight_decay': 0.0719094067694046, 'batch_size': 64}"
48
- deepgbm,smote,incheon,optuna,0.5640599235240535,100,"{'d_main': 128, 'd_hidden': 192, 'n_blocks': 4, 'dropout': 0.3785345227219112, 'lr': 0.0018955834640803318, 'weight_decay': 0.02389399379756967, 'batch_size': 32}"
49
- xgb,smote,gwangju,hyperopt,0.53,100,"{'colsample_bytree': 0.7658195937298418, 'gamma': 1.040884657831581, 'learning_rate': 0.04553328563585195, 'max_depth': 7.0, 'min_child_weight': 12.0, 'reg_alpha': 0.8031012977426317, 'reg_lambda': 0.6205464163959697, 'subsample': 0.6524796151581305}"
50
- xgb,smotenc_ctgan20000,incheon,hyperopt,0.5653,100,"{'colsample_bytree': 0.780674346441659, 'gamma': 1.066921965325722, 'learning_rate': 0.09975688732352224, 'max_depth': 11.0, 'min_child_weight': 8.0, 'reg_alpha': 0.8401568358231439, 'reg_lambda': 0.6475753573884905, 'subsample': 0.6026764334015118}"
51
- lgb,pure,gwangju,hyperopt,0.4976,100,"{'colsample_bytree': 0.944925569294789, 'learning_rate': 0.014333972349102263, 'max_depth': 13.0, 'min_child_weight': 13.0, 'num_leaves': 70.0, 'reg_alpha': 0.31059473707139207, 'reg_lambda': 0.19125854192836045, 'subsample': 0.6106523377701687}"
52
- lgb,pure,incheon,hyperopt,0.5617,100,"{'colsample_bytree': 0.7451758674748375, 'learning_rate': 0.012136256520039064, 'max_depth': 10.0, 'min_child_weight': 9.0, 'num_leaves': 89.0, 'reg_alpha': 0.41323940989949803, 'reg_lambda': 0.9961125910856155, 'subsample': 0.7322009271446389}"
53
- deepgbm,pure,daejeon,optuna,0.45092661677493817,100,"{'d_main': 256, 'd_hidden': 64, 'n_blocks': 2, 'dropout': 0.16512768923755844, 'lr': 0.00013316728360925615, 'weight_decay': 0.0001651587899552549, 'batch_size': 32}"
54
- xgb,smote,daegu,hyperopt,0.4672,100,"{'colsample_bytree': 0.8132816721507904, 'gamma': 0.9002659162503241, 'learning_rate': 0.04046864452016672, 'max_depth': 4.0, 'min_child_weight': 17.0, 'reg_alpha': 0.4681545450085154, 'reg_lambda': 0.531313515098387, 'subsample': 0.827198506312037}"
55
- ft_transformer,ctgan10000,daegu,optuna,0.44918319157422554,100,"{'d_token': 128, 'n_blocks': 4, 'n_heads': 8, 'attention_dropout': 0.1615322006432558, 'ffn_dropout': 0.14353691142809796, 'lr': 0.00025225999310114116, 'weight_decay': 0.002948085679186959, 'batch_size': 32}"
56
- ft_transformer,smote,incheon,optuna,0.5674050423289939,100,"{'d_token': 96, 'n_blocks': 3, 'n_heads': 8, 'attention_dropout': 0.3637871224837107, 'ffn_dropout': 0.11479322703553738, 'lr': 0.0003009840584939789, 'weight_decay': 0.0003336039035587163, 'batch_size': 32}"
57
- deepgbm,ctgan10000,busan,optuna,0.475435864212341,100,"{'d_main': 128, 'd_hidden': 128, 'n_blocks': 2, 'dropout': 0.1477619274404685, 'lr': 0.00606559205480389, 'weight_decay': 0.08582651929034574, 'batch_size': 64}"
58
- resnet_like,ctgan10000,seoul,optuna,0.5889744558864985,100,"{'d_main': 160, 'd_hidden': 128, 'n_blocks': 4, 'dropout_first': 0.24169683403631037, 'dropout_second': 0.018646743579449815, 'lr': 0.003891520111503718, 'weight_decay': 0.09736563298725749, 'batch_size': 128}"
59
- ft_transformer,smotenc_ctgan20000,seoul,optuna,0.5675273679868712,100,"{'d_token': 192, 'n_blocks': 4, 'n_heads': 8, 'attention_dropout': 0.10255871141670557, 'ffn_dropout': 0.3735851591876593, 'lr': 0.00038893486056538687, 'weight_decay': 0.0020864133600508325, 'batch_size': 128}"
60
- lgb,smotenc_ctgan20000,incheon,hyperopt,0.5827,100,"{'colsample_bytree': 0.8256738533611337, 'learning_rate': 0.03224794137518454, 'max_depth': 12.0, 'min_child_weight': 6.0, 'num_leaves': 128.0, 'reg_alpha': 0.1310437036184594, 'reg_lambda': 0.6161059673451368, 'subsample': 0.8412610992284884}"
61
- deepgbm,smotenc_ctgan20000,incheon,optuna,0.5515777046032605,100,"{'d_main': 64, 'd_hidden': 256, 'n_blocks': 6, 'dropout': 0.10014845072807586, 'lr': 0.008578508208533469, 'weight_decay': 0.07361767414227417, 'batch_size': 256}"
62
- lgb,smote,busan,hyperopt,0.4798,100,"{'colsample_bytree': 0.8798108126736107, 'learning_rate': 0.018973645502331604, 'max_depth': 15.0, 'min_child_weight': 12.0, 'num_leaves': 147.0, 'reg_alpha': 0.03444822116314217, 'reg_lambda': 0.19407225134838502, 'subsample': 0.7009485341014761}"
63
- lgb,ctgan10000,daegu,hyperopt,0.4339,100,"{'colsample_bytree': 0.885595994008888, 'learning_rate': 0.010106842270671093, 'max_depth': 14.0, 'min_child_weight': 1.0, 'num_leaves': 122.0, 'reg_alpha': 0.12648188723964116, 'reg_lambda': 0.873682924904797, 'subsample': 0.7806598236159817}"
64
- lgb,ctgan10000,gwangju,hyperopt,0.4994,100,"{'colsample_bytree': 0.9974290644555891, 'learning_rate': 0.03488079849604818, 'max_depth': 15.0, 'min_child_weight': 13.0, 'num_leaves': 47.0, 'reg_alpha': 0.22968749134640387, 'reg_lambda': 0.42745540115072206, 'subsample': 0.9875948361694687}"
65
- deepgbm,smotenc_ctgan20000,daegu,optuna,0.43407422794617584,100,"{'d_main': 160, 'd_hidden': 192, 'n_blocks': 2, 'dropout': 0.2100809214334562, 'lr': 0.0045042768238180075, 'weight_decay': 0.09966580216480231, 'batch_size': 32}"
66
- deepgbm,pure,gwangju,optuna,0.4507852210916714,100,"{'d_main': 96, 'd_hidden': 256, 'n_blocks': 4, 'dropout': 0.22201246225002172, 'lr': 0.0018802322915298384, 'weight_decay': 0.04341402361540322, 'batch_size': 64}"
67
- lgb,smote,daegu,hyperopt,0.4671,100,"{'colsample_bytree': 0.999946333457191, 'learning_rate': 0.07031680296643952, 'max_depth': 4.0, 'min_child_weight': 17.0, 'num_leaves': 32.0, 'reg_alpha': 0.055815317687804816, 'reg_lambda': 0.2293760134119255, 'subsample': 0.6363907923464539}"
68
- lgb,smotenc_ctgan20000,daejeon,hyperopt,0.5056,100,"{'colsample_bytree': 0.7827606965781482, 'learning_rate': 0.013695421561409111, 'max_depth': 13.0, 'min_child_weight': 6.0, 'num_leaves': 147.0, 'reg_alpha': 0.11677800966310199, 'reg_lambda': 0.7741495746536297, 'subsample': 0.7509016581783318}"
69
- xgb,pure,incheon,hyperopt,0.566,100,"{'colsample_bytree': 0.7546508432340249, 'gamma': 1.043370422174196, 'learning_rate': 0.015864658027101238, 'max_depth': 11.0, 'min_child_weight': 6.0, 'reg_alpha': 0.7662203770275335, 'reg_lambda': 0.5754328276715727, 'subsample': 0.6332898543620008}"
70
- deepgbm,smotenc_ctgan20000,gwangju,optuna,0.4832850681063274,100,"{'d_main': 192, 'd_hidden': 128, 'n_blocks': 5, 'dropout': 0.3184031589315668, 'lr': 0.00908394353608693, 'weight_decay': 0.005195807637314873, 'batch_size': 64}"
71
- lgb,smotenc_ctgan20000,gwangju,hyperopt,0.5098,100,"{'colsample_bytree': 0.9823131365674758, 'learning_rate': 0.09913135171477433, 'max_depth': 12.0, 'min_child_weight': 9.0, 'num_leaves': 123.0, 'reg_alpha': 0.7300219334294252, 'reg_lambda': 0.1069375287034794, 'subsample': 0.6733134215038767}"
72
- deepgbm,ctgan10000,incheon,optuna,0.5644485264432356,100,"{'d_main': 64, 'd_hidden': 192, 'n_blocks': 5, 'dropout': 0.16846849111235224, 'lr': 0.007871644587352598, 'weight_decay': 0.0005399258093557023, 'batch_size': 128}"
73
- ft_transformer,pure,gwangju,optuna,0.46517686569307726,100,"{'d_token': 64, 'n_blocks': 3, 'n_heads': 4, 'attention_dropout': 0.22191677554696004, 'ffn_dropout': 0.3593673195268826, 'lr': 0.001039914773988631, 'weight_decay': 0.002578609852830777, 'batch_size': 64}"
74
- xgb,smotenc_ctgan20000,seoul,hyperopt,0.5595,100,"{'colsample_bytree': 0.8639896996820762, 'gamma': 1.3315807011964704, 'learning_rate': 0.15781883407025307, 'max_depth': 4.0, 'min_child_weight': 8.0, 'reg_alpha': 0.4792043224918665, 'reg_lambda': 0.2705699063386674, 'subsample': 0.6068794375013623}"
75
- lgb,smotenc_ctgan20000,busan,hyperopt,0.4447,100,"{'colsample_bytree': 0.6317882901026403, 'learning_rate': 0.16009804087232835, 'max_depth': 12.0, 'min_child_weight': 7.0, 'num_leaves': 79.0, 'reg_alpha': 0.7559497285087418, 'reg_lambda': 0.36818298791457293, 'subsample': 0.6759358549390172}"
76
- ft_transformer,ctgan10000,daejeon,optuna,0.5026041056392309,100,"{'d_token': 64, 'n_blocks': 5, 'n_heads': 8, 'attention_dropout': 0.39639878146052787, 'ffn_dropout': 0.16243660840447227, 'lr': 0.0005258652715359098, 'weight_decay': 0.06319928258911829, 'batch_size': 128}"
77
- lgb,pure,seoul,hyperopt,0.5561,100,"{'colsample_bytree': 0.9656226027993231, 'learning_rate': 0.19802455575535455, 'max_depth': 9.0, 'min_child_weight': 20.0, 'num_leaves': 40.0, 'reg_alpha': 0.9688960116885452, 'reg_lambda': 0.8497573873429717, 'subsample': 0.9553638492982583}"
78
- ft_transformer,pure,incheon,optuna,0.5673234518633549,100,"{'d_token': 96, 'n_blocks': 4, 'n_heads': 4, 'attention_dropout': 0.12006219567079585, 'ffn_dropout': 0.19152888035195653, 'lr': 0.0011919865831209798, 'weight_decay': 0.00010616617862067153, 'batch_size': 128}"
79
- xgb,smote,daejeon,hyperopt,0.5371,100,"{'colsample_bytree': 0.733236256331133, 'gamma': 0.7990977235867733, 'learning_rate': 0.17558281930946487, 'max_depth': 9.0, 'min_child_weight': 11.0, 'reg_alpha': 0.1596833778659402, 'reg_lambda': 0.9170555745286906, 'subsample': 0.6403574066792026}"
80
- xgb,pure,seoul,hyperopt,0.5722,100,"{'colsample_bytree': 0.9998832589057977, 'gamma': 1.1284338309478381, 'learning_rate': 0.07547053757720854, 'max_depth': 11.0, 'min_child_weight': 3.0, 'reg_alpha': 0.005163550196038334, 'reg_lambda': 0.7838372117209551, 'subsample': 0.6965117137201219}"
81
- deepgbm,smote,daejeon,optuna,0.4669846754411795,100,"{'d_main': 128, 'd_hidden': 192, 'n_blocks': 2, 'dropout': 0.17471030464369763, 'lr': 0.0034006630423833845, 'weight_decay': 0.008657423885357477, 'batch_size': 32}"
82
- lgb,ctgan10000,seoul,hyperopt,0.5671,100,"{'colsample_bytree': 0.661683034534645, 'learning_rate': 0.02117850018558707, 'max_depth': 15.0, 'min_child_weight': 16.0, 'num_leaves': 83.0, 'reg_alpha': 0.6438462886327296, 'reg_lambda': 0.8929153518940249, 'subsample': 0.9547922722157274}"
83
- resnet_like,smotenc_ctgan20000,daegu,optuna,0.43482923124906,100,"{'d_main': 192, 'd_hidden': 128, 'n_blocks': 3, 'dropout_first': 0.38064877864842067, 'dropout_second': 0.011258432283909839, 'lr': 0.003421315789279206, 'weight_decay': 0.0004979175699002674, 'batch_size': 64}"
84
- ft_transformer,smote,seoul,optuna,0.5788930444782093,100,"{'d_token': 96, 'n_blocks': 5, 'n_heads': 8, 'attention_dropout': 0.3074211243640513, 'ffn_dropout': 0.27614330827417055, 'lr': 0.0008013502604362781, 'weight_decay': 0.07731586815781724, 'batch_size': 32}"
85
- xgb,smote,incheon,hyperopt,0.6,100,"{'colsample_bytree': 0.8863531635625073, 'gamma': 1.4432252696586687, 'learning_rate': 0.14431831840673584, 'max_depth': 4.0, 'min_child_weight': 4.0, 'reg_alpha': 0.7656890601027424, 'reg_lambda': 0.5796745106013773, 'subsample': 0.8862819830666011}"
86
- ft_transformer,smotenc_ctgan20000,gwangju,optuna,0.49840606228991896,100,"{'d_token': 160, 'n_blocks': 3, 'n_heads': 8, 'attention_dropout': 0.13369638195384545, 'ffn_dropout': 0.25693944180246137, 'lr': 0.00046073450640759476, 'weight_decay': 0.0005511169366746759, 'batch_size': 32}"
87
- lgb,ctgan10000,incheon,hyperopt,0.5592,100,"{'colsample_bytree': 0.7615743363801121, 'learning_rate': 0.032013705340192794, 'max_depth': 12.0, 'min_child_weight': 4.0, 'num_leaves': 135.0, 'reg_alpha': 0.07355917150019742, 'reg_lambda': 0.7693270890686972, 'subsample': 0.8491133431153928}"
88
- xgb,ctgan10000,incheon,hyperopt,0.5752,100,"{'colsample_bytree': 0.9876604099689714, 'gamma': 2.7259563165720655, 'learning_rate': 0.014353110732979967, 'max_depth': 5.0, 'min_child_weight': 8.0, 'reg_alpha': 0.43412935826888077, 'reg_lambda': 0.45790677460553664, 'subsample': 0.6390967315026312}"
89
- lgb,smote,gwangju,hyperopt,0.5297,100,"{'colsample_bytree': 0.9919060649789312, 'learning_rate': 0.054631157314326724, 'max_depth': 15.0, 'min_child_weight': 3.0, 'num_leaves': 47.0, 'reg_alpha': 0.9190252546800255, 'reg_lambda': 0.8800706832709921, 'subsample': 0.7859941375783913}"
90
- deepgbm,smote,busan,optuna,0.44276076380851953,100,"{'d_main': 64, 'd_hidden': 192, 'n_blocks': 3, 'dropout': 0.3745726718399158, 'lr': 0.005338808759265471, 'weight_decay': 0.06558728678415406, 'batch_size': 32}"
91
- xgb,smotenc_ctgan20000,gwangju,hyperopt,0.5012,100,"{'colsample_bytree': 0.9791354222802998, 'gamma': 1.1338312824344885, 'learning_rate': 0.06507978835501058, 'max_depth': 7.0, 'min_child_weight': 20.0, 'reg_alpha': 0.4224203070076331, 'reg_lambda': 0.548023995725087, 'subsample': 0.6318874518405971}"
92
- resnet_like,pure,daejeon,optuna,0.4661049144479625,100,"{'d_main': 128, 'd_hidden': 128, 'n_blocks': 5, 'dropout_first': 0.17640987113900017, 'dropout_second': 0.07455114486955416, 'lr': 0.0014433413138854976, 'weight_decay': 0.002550708908998169, 'batch_size': 32}"
93
- deepgbm,pure,seoul,optuna,0.5441091848294762,100,"{'d_main': 96, 'd_hidden': 256, 'n_blocks': 2, 'dropout': 0.38755576686685356, 'lr': 0.001481020211477321, 'weight_decay': 0.0036076992783665223, 'batch_size': 32}"
94
- deepgbm,ctgan10000,daegu,optuna,0.4390250453058284,100,"{'d_main': 64, 'd_hidden': 192, 'n_blocks': 2, 'dropout': 0.29112938728448373, 'lr': 0.002745246324742509, 'weight_decay': 0.07823286969698617, 'batch_size': 32}"
95
- ft_transformer,smotenc_ctgan20000,incheon,optuna,0.5658689261675397,100,"{'d_token': 64, 'n_blocks': 6, 'n_heads': 4, 'attention_dropout': 0.3014135618352386, 'ffn_dropout': 0.2377846194971104, 'lr': 0.0023493513093616647, 'weight_decay': 0.0983836135411264, 'batch_size': 256}"
96
- xgb,smotenc_ctgan20000,busan,hyperopt,0.4764,100,"{'colsample_bytree': 0.6617270669071449, 'gamma': 1.8648711247698304, 'learning_rate': 0.116012512145597, 'max_depth': 5.0, 'min_child_weight': 8.0, 'reg_alpha': 0.0033591561849573015, 'reg_lambda': 0.6137037380779208, 'subsample': 0.6181654880316922}"
97
- ft_transformer,smote,daejeon,optuna,0.482035457043179,100,"{'d_token': 64, 'n_blocks': 3, 'n_heads': 8, 'attention_dropout': 0.3951058265943127, 'ffn_dropout': 0.15816355519966163, 'lr': 0.0011481409855838528, 'weight_decay': 0.00011134016766733501, 'batch_size': 64}"
98
- resnet_like,smote,incheon,optuna,0.5677136089270878,100,"{'d_main': 192, 'd_hidden': 256, 'n_blocks': 3, 'dropout_first': 0.29114464826990016, 'dropout_second': 0.1740467381980997, 'lr': 0.0013843537735809836, 'weight_decay': 0.032020003146016864, 'batch_size': 128}"
99
- xgb,smote,busan,hyperopt,0.4773,100,"{'colsample_bytree': 0.7955102167770075, 'gamma': 0.16947237102826285, 'learning_rate': 0.04201247161970075, 'max_depth': 11.0, 'min_child_weight': 4.0, 'reg_alpha': 0.9136392250501164, 'reg_lambda': 0.4323511178052387, 'subsample': 0.6554957061124282}"
100
- xgb,pure,busan,hyperopt,0.4949,100,"{'colsample_bytree': 0.8651175745135303, 'gamma': 2.0220518303820976, 'learning_rate': 0.04196437449161767, 'max_depth': 7.0, 'min_child_weight': 17.0, 'reg_alpha': 0.9213159636887744, 'reg_lambda': 0.9407811453878014, 'subsample': 0.7200034080497129}"
101
- resnet_like,smotenc_ctgan20000,seoul,optuna,0.5571199836340152,100,"{'d_main': 192, 'd_hidden': 64, 'n_blocks': 5, 'dropout_first': 0.2550924108208247, 'dropout_second': 0.1294554884955162, 'lr': 0.0046329086177801316, 'weight_decay': 0.012820236692231602, 'batch_size': 64}"
102
- deepgbm,ctgan10000,seoul,optuna,0.5652632820174084,100,"{'d_main': 160, 'd_hidden': 64, 'n_blocks': 3, 'dropout': 0.2113225708109025, 'lr': 0.0038029027900849553, 'weight_decay': 0.0005824342055366228, 'batch_size': 32}"
103
- xgb,smote,seoul,hyperopt,0.5914,100,"{'colsample_bytree': 0.9204650350059359, 'gamma': 0.006794691671452219, 'learning_rate': 0.08811009219503893, 'max_depth': 12.0, 'min_child_weight': 3.0, 'reg_alpha': 0.6690476963880017, 'reg_lambda': 0.5388102924254845, 'subsample': 0.8320339664880525}"
104
- resnet_like,ctgan10000,busan,optuna,0.49363300915248276,100,"{'d_main': 128, 'd_hidden': 512, 'n_blocks': 2, 'dropout_first': 0.3784300740258752, 'dropout_second': 0.026029354045211155, 'lr': 0.008483242466300268, 'weight_decay': 0.00016367394584020504, 'batch_size': 128}"
105
- resnet_like,pure,gwangju,optuna,0.48295928519708414,100,"{'d_main': 224, 'd_hidden': 448, 'n_blocks': 5, 'dropout_first': 0.19145077532270985, 'dropout_second': 0.17709489175426982, 'lr': 0.0013336041583887023, 'weight_decay': 0.012922488005108791, 'batch_size': 32}"
106
- lgb,pure,daegu,hyperopt,0.405,100,"{'colsample_bytree': 0.7032353545561223, 'learning_rate': 0.027019407889171166, 'max_depth': 3.0, 'min_child_weight': 4.0, 'num_leaves': 134.0, 'reg_alpha': 0.3506952189889989, 'reg_lambda': 0.46506999012541095, 'subsample': 0.9411873767438697}"
107
- deepgbm,smotenc_ctgan20000,seoul,optuna,0.5657974901513136,100,"{'d_main': 128, 'd_hidden': 64, 'n_blocks': 5, 'dropout': 0.1024240157205574, 'lr': 0.009019177625524915, 'weight_decay': 0.08874117499066106, 'batch_size': 256}"
108
- resnet_like,ctgan10000,incheon,optuna,0.5876200434398301,100,"{'d_main': 160, 'd_hidden': 192, 'n_blocks': 3, 'dropout_first': 0.213366405042877, 'dropout_second': 0.0616930432275245, 'lr': 0.005092968501532562, 'weight_decay': 0.06153947659623341, 'batch_size': 256}"
109
- xgb,smotenc_ctgan20000,daejeon,hyperopt,0.4957,100,"{'colsample_bytree': 0.839455487096683, 'gamma': 0.6837674570637463, 'learning_rate': 0.059254118154918205, 'max_depth': 12.0, 'min_child_weight': 16.0, 'reg_alpha': 0.741156478605324, 'reg_lambda': 0.21565422560180894, 'subsample': 0.6536276314951073}"
110
- xgb,pure,daejeon,hyperopt,0.5098,100,"{'colsample_bytree': 0.7188993723348515, 'gamma': 0.8312229248711611, 'learning_rate': 0.13636264607406778, 'max_depth': 4.0, 'min_child_weight': 4.0, 'reg_alpha': 0.8193564485724522, 'reg_lambda': 0.796595038878536, 'subsample': 0.6408013744463261}"
111
- xgb,ctgan10000,busan,hyperopt,0.4881,100,"{'colsample_bytree': 0.6875780545091519, 'gamma': 2.658982028747467, 'learning_rate': 0.11595019019771602, 'max_depth': 12.0, 'min_child_weight': 18.0, 'reg_alpha': 0.9071038284958559, 'reg_lambda': 0.2880553949579837, 'subsample': 0.7100915343806586}"
112
- lgb,smotenc_ctgan20000,seoul,hyperopt,0.5622,100,"{'colsample_bytree': 0.6345219883667235, 'learning_rate': 0.010708867004743688, 'max_depth': 14.0, 'min_child_weight': 14.0, 'num_leaves': 111.0, 'reg_alpha': 0.12632598375404414, 'reg_lambda': 0.632120993668035, 'subsample': 0.6705295797223814}"
113
- lgb,pure,daejeon,hyperopt,0.4963,100,"{'colsample_bytree': 0.8561310986458904, 'learning_rate': 0.04190098150317689, 'max_depth': 13.0, 'min_child_weight': 10.0, 'num_leaves': 20.0, 'reg_alpha': 0.8699443310189154, 'reg_lambda': 0.6437824460768218, 'subsample': 0.7878286207681128}"
114
- resnet_like,pure,busan,optuna,0.4542970487265281,100,"{'d_main': 160, 'd_hidden': 128, 'n_blocks': 3, 'dropout_first': 0.3972607776862854, 'dropout_second': 0.13044016240028491, 'lr': 0.009478600883696408, 'weight_decay': 0.009976371611112884, 'batch_size': 64}"
115
- resnet_like,smote,daegu,optuna,0.39559298655454533,100,"{'d_main': 96, 'd_hidden': 64, 'n_blocks': 4, 'dropout_first': 0.3420738859809193, 'dropout_second': 0.1494827111234321, 'lr': 0.008042182880401372, 'weight_decay': 0.00036799147801704795, 'batch_size': 32}"
116
- deepgbm,smotenc_ctgan20000,daejeon,optuna,0.4848928597361463,100,"{'d_main': 96, 'd_hidden': 64, 'n_blocks': 6, 'dropout': 0.2751230389403302, 'lr': 0.007765569142416956, 'weight_decay': 0.019237770834817113, 'batch_size': 128}"
117
- resnet_like,smotenc_ctgan20000,daejeon,optuna,0.48223852799636385,100,"{'d_main': 256, 'd_hidden': 128, 'n_blocks': 2, 'dropout_first': 0.1346841434587851, 'dropout_second': 0.09633438692418629, 'lr': 0.008312402405575084, 'weight_decay': 0.00017010818801516423, 'batch_size': 256}"
118
- deepgbm,pure,daegu,optuna,0.3570454117885781,100,"{'d_main': 160, 'd_hidden': 64, 'n_blocks': 4, 'dropout': 0.39867335991930597, 'lr': 0.002428396450034936, 'weight_decay': 0.05750366129158036, 'batch_size': 32}"
119
- resnet_like,pure,daegu,optuna,0.4075222542316909,100,"{'d_main': 192, 'd_hidden': 64, 'n_blocks': 3, 'dropout_first': 0.10590946492415504, 'dropout_second': 0.10708545709499212, 'lr': 0.0012544175963123808, 'weight_decay': 0.08314171375285831, 'batch_size': 32}"
120
- deepgbm,ctgan10000,gwangju,optuna,0.5204031176113428,100,"{'d_main': 128, 'd_hidden': 192, 'n_blocks': 6, 'dropout': 0.3938212564993552, 'lr': 0.007164979269975063, 'weight_decay': 0.0923681134285374, 'batch_size': 64}"
121
- ft_transformer,ctgan10000,incheon,optuna,0.5643832267965512,100,"{'d_token': 128, 'n_blocks': 6, 'n_heads': 4, 'attention_dropout': 0.1369145590780379, 'ffn_dropout': 0.16996780951611673, 'lr': 0.0003662598943307626, 'weight_decay': 0.000729119254134465, 'batch_size': 256}"
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c57d5ca97fe773c9252fdb884b97c032769ac7032cefdba4577e88553ed2e743
3
+ size 32383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Analysis_code/7.ensemble/analysis_of_shap.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
Analysis_code/7.ensemble/ensemble.ipynb CHANGED
@@ -1,789 +1,3 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "a19bb535",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "# utils.py는 같은 디렉토리에 있으므로 직접 import 가능\n",
11
- "from utils import predict_test_proba, calculate_csi\n",
12
- "import numpy as np\n",
13
- "import pandas as pd"
14
- ]
15
- },
16
- {
17
- "cell_type": "code",
18
- "execution_count": 2,
19
- "id": "1048a139",
20
- "metadata": {},
21
- "outputs": [],
22
- "source": [
23
- "df= pd.read_csv(\"../6.optima_models_analysis/best_samples_best_datasample_per_model_per_region.csv\")\n",
24
- "df= df.loc[df.groupby(['region','optimization_library'])['best_csi'].idxmax(),:]\n",
25
- "df.sort_values(by=['region','best_csi'], ascending=False, inplace=True)"
26
- ]
27
- },
28
- {
29
- "cell_type": "code",
30
- "execution_count": 3,
31
- "id": "67d96046",
32
- "metadata": {},
33
- "outputs": [
34
- {
35
- "data": {
36
- "text/html": [
37
- "<div>\n",
38
- "<style scoped>\n",
39
- " .dataframe tbody tr th:only-of-type {\n",
40
- " vertical-align: middle;\n",
41
- " }\n",
42
- "\n",
43
- " .dataframe tbody tr th {\n",
44
- " vertical-align: top;\n",
45
- " }\n",
46
- "\n",
47
- " .dataframe thead th {\n",
48
- " text-align: right;\n",
49
- " }\n",
50
- "</style>\n",
51
- "<table border=\"1\" class=\"dataframe\">\n",
52
- " <thead>\n",
53
- " <tr style=\"text-align: right;\">\n",
54
- " <th></th>\n",
55
- " <th>model</th>\n",
56
- " <th>data_sample</th>\n",
57
- " <th>region</th>\n",
58
- " <th>optimization_library</th>\n",
59
- " <th>best_csi</th>\n",
60
- " <th>n_trials</th>\n",
61
- " <th>best_params</th>\n",
62
- " </tr>\n",
63
- " </thead>\n",
64
- " <tbody>\n",
65
- " <tr>\n",
66
- " <th>0</th>\n",
67
- " <td>ft_transformer</td>\n",
68
- " <td>ctgan10000</td>\n",
69
- " <td>seoul</td>\n",
70
- " <td>optuna</td>\n",
71
- " <td>0.593658</td>\n",
72
- " <td>100</td>\n",
73
- " <td>{'d_token': 224, 'n_blocks': 4, 'n_heads': 8, ...</td>\n",
74
- " </tr>\n",
75
- " <tr>\n",
76
- " <th>1</th>\n",
77
- " <td>xgb</td>\n",
78
- " <td>smote</td>\n",
79
- " <td>seoul</td>\n",
80
- " <td>hyperopt</td>\n",
81
- " <td>0.591400</td>\n",
82
- " <td>100</td>\n",
83
- " <td>{'colsample_bytree': 0.9204650350059359, 'gamm...</td>\n",
84
- " </tr>\n",
85
- " <tr>\n",
86
- " <th>5</th>\n",
87
- " <td>xgb</td>\n",
88
- " <td>smote</td>\n",
89
- " <td>incheon</td>\n",
90
- " <td>hyperopt</td>\n",
91
- " <td>0.600000</td>\n",
92
- " <td>100</td>\n",
93
- " <td>{'colsample_bytree': 0.8863531635625073, 'gamm...</td>\n",
94
- " </tr>\n",
95
- " <tr>\n",
96
- " <th>7</th>\n",
97
- " <td>resnet_like</td>\n",
98
- " <td>ctgan10000</td>\n",
99
- " <td>incheon</td>\n",
100
- " <td>optuna</td>\n",
101
- " <td>0.587620</td>\n",
102
- " <td>100</td>\n",
103
- " <td>{'d_main': 160, 'd_hidden': 192, 'n_blocks': 3...</td>\n",
104
- " </tr>\n",
105
- " <tr>\n",
106
- " <th>10</th>\n",
107
- " <td>xgb</td>\n",
108
- " <td>smote</td>\n",
109
- " <td>gwangju</td>\n",
110
- " <td>hyperopt</td>\n",
111
- " <td>0.530000</td>\n",
112
- " <td>100</td>\n",
113
- " <td>{'colsample_bytree': 0.7658195937298418, 'gamm...</td>\n",
114
- " </tr>\n",
115
- " <tr>\n",
116
- " <th>12</th>\n",
117
- " <td>deepgbm</td>\n",
118
- " <td>ctgan10000</td>\n",
119
- " <td>gwangju</td>\n",
120
- " <td>optuna</td>\n",
121
- " <td>0.520403</td>\n",
122
- " <td>100</td>\n",
123
- " <td>{'d_main': 128, 'd_hidden': 192, 'n_blocks': 6...</td>\n",
124
- " </tr>\n",
125
- " <tr>\n",
126
- " <th>15</th>\n",
127
- " <td>xgb</td>\n",
128
- " <td>smote</td>\n",
129
- " <td>daejeon</td>\n",
130
- " <td>hyperopt</td>\n",
131
- " <td>0.537100</td>\n",
132
- " <td>100</td>\n",
133
- " <td>{'colsample_bytree': 0.733236256331133, 'gamma...</td>\n",
134
- " </tr>\n",
135
- " <tr>\n",
136
- " <th>17</th>\n",
137
- " <td>resnet_like</td>\n",
138
- " <td>ctgan10000</td>\n",
139
- " <td>daejeon</td>\n",
140
- " <td>optuna</td>\n",
141
- " <td>0.510177</td>\n",
142
- " <td>100</td>\n",
143
- " <td>{'d_main': 128, 'd_hidden': 256, 'n_blocks': 5...</td>\n",
144
- " </tr>\n",
145
- " <tr>\n",
146
- " <th>20</th>\n",
147
- " <td>xgb</td>\n",
148
- " <td>smote</td>\n",
149
- " <td>daegu</td>\n",
150
- " <td>hyperopt</td>\n",
151
- " <td>0.467200</td>\n",
152
- " <td>100</td>\n",
153
- " <td>{'colsample_bytree': 0.8132816721507904, 'gamm...</td>\n",
154
- " </tr>\n",
155
- " <tr>\n",
156
- " <th>22</th>\n",
157
- " <td>resnet_like</td>\n",
158
- " <td>ctgan10000</td>\n",
159
- " <td>daegu</td>\n",
160
- " <td>optuna</td>\n",
161
- " <td>0.460863</td>\n",
162
- " <td>100</td>\n",
163
- " <td>{'d_main': 96, 'd_hidden': 256, 'n_blocks': 3,...</td>\n",
164
- " </tr>\n",
165
- " <tr>\n",
166
- " <th>25</th>\n",
167
- " <td>ft_transformer</td>\n",
168
- " <td>ctgan10000</td>\n",
169
- " <td>busan</td>\n",
170
- " <td>optuna</td>\n",
171
- " <td>0.496046</td>\n",
172
- " <td>100</td>\n",
173
- " <td>{'d_token': 224, 'n_blocks': 2, 'n_heads': 8, ...</td>\n",
174
- " </tr>\n",
175
- " <tr>\n",
176
- " <th>26</th>\n",
177
- " <td>xgb</td>\n",
178
- " <td>pure</td>\n",
179
- " <td>busan</td>\n",
180
- " <td>hyperopt</td>\n",
181
- " <td>0.494900</td>\n",
182
- " <td>100</td>\n",
183
- " <td>{'colsample_bytree': 0.8651175745135303, 'gamm...</td>\n",
184
- " </tr>\n",
185
- " </tbody>\n",
186
- "</table>\n",
187
- "</div>"
188
- ],
189
- "text/plain": [
190
- " model data_sample region optimization_library best_csi \\\n",
191
- "0 ft_transformer ctgan10000 seoul optuna 0.593658 \n",
192
- "1 xgb smote seoul hyperopt 0.591400 \n",
193
- "5 xgb smote incheon hyperopt 0.600000 \n",
194
- "7 resnet_like ctgan10000 incheon optuna 0.587620 \n",
195
- "10 xgb smote gwangju hyperopt 0.530000 \n",
196
- "12 deepgbm ctgan10000 gwangju optuna 0.520403 \n",
197
- "15 xgb smote daejeon hyperopt 0.537100 \n",
198
- "17 resnet_like ctgan10000 daejeon optuna 0.510177 \n",
199
- "20 xgb smote daegu hyperopt 0.467200 \n",
200
- "22 resnet_like ctgan10000 daegu optuna 0.460863 \n",
201
- "25 ft_transformer ctgan10000 busan optuna 0.496046 \n",
202
- "26 xgb pure busan hyperopt 0.494900 \n",
203
- "\n",
204
- " n_trials best_params \n",
205
- "0 100 {'d_token': 224, 'n_blocks': 4, 'n_heads': 8, ... \n",
206
- "1 100 {'colsample_bytree': 0.9204650350059359, 'gamm... \n",
207
- "5 100 {'colsample_bytree': 0.8863531635625073, 'gamm... \n",
208
- "7 100 {'d_main': 160, 'd_hidden': 192, 'n_blocks': 3... \n",
209
- "10 100 {'colsample_bytree': 0.7658195937298418, 'gamm... \n",
210
- "12 100 {'d_main': 128, 'd_hidden': 192, 'n_blocks': 6... \n",
211
- "15 100 {'colsample_bytree': 0.733236256331133, 'gamma... \n",
212
- "17 100 {'d_main': 128, 'd_hidden': 256, 'n_blocks': 5... \n",
213
- "20 100 {'colsample_bytree': 0.8132816721507904, 'gamm... \n",
214
- "22 100 {'d_main': 96, 'd_hidden': 256, 'n_blocks': 3,... \n",
215
- "25 100 {'d_token': 224, 'n_blocks': 2, 'n_heads': 8, ... \n",
216
- "26 100 {'colsample_bytree': 0.8651175745135303, 'gamm... "
217
- ]
218
- },
219
- "execution_count": 3,
220
- "metadata": {},
221
- "output_type": "execute_result"
222
- }
223
- ],
224
- "source": [
225
- "df"
226
- ]
227
- },
228
- {
229
- "cell_type": "markdown",
230
- "id": "791fe38f",
231
- "metadata": {},
232
- "source": [
233
- "## 1. Seoul\n"
234
- ]
235
- },
236
- {
237
- "cell_type": "code",
238
- "execution_count": 4,
239
- "id": "998634af",
240
- "metadata": {},
241
- "outputs": [
242
- {
243
- "data": {
244
- "text/plain": [
245
- "0.3371710526315512"
246
- ]
247
- },
248
- "execution_count": 4,
249
- "metadata": {},
250
- "output_type": "execute_result"
251
- }
252
- ],
253
- "source": [
254
- "probs, y_test = predict_test_proba(\n",
255
- " model_name='ft_transformer',\n",
256
- " region='seoul',\n",
257
- " data_sample='ctgan10000',\n",
258
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
259
- " n_folds=3\n",
260
- ")\n",
261
- "probs_1 = np.mean(probs, axis=0)\n",
262
- "calculate_csi(np.argmax(probs_1, axis=1), y_test)"
263
- ]
264
- },
265
- {
266
- "cell_type": "code",
267
- "execution_count": 5,
268
- "id": "a4f94ce4",
269
- "metadata": {},
270
- "outputs": [
271
- {
272
- "data": {
273
- "text/plain": [
274
- "0.3431294678315851"
275
- ]
276
- },
277
- "execution_count": 5,
278
- "metadata": {},
279
- "output_type": "execute_result"
280
- }
281
- ],
282
- "source": [
283
- "probs, y_test = predict_test_proba(\n",
284
- " model_name='xgb',\n",
285
- " region='seoul',\n",
286
- " data_sample='smote',\n",
287
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
288
- " n_folds=3\n",
289
- ")\n",
290
- "probs_2= np.mean(probs, axis=0)\n",
291
- "calculate_csi(np.argmax(probs_2, axis=1), y_test)"
292
- ]
293
- },
294
- {
295
- "cell_type": "code",
296
- "execution_count": 6,
297
- "id": "69421641",
298
- "metadata": {},
299
- "outputs": [
300
- {
301
- "data": {
302
- "text/plain": [
303
- "0.35207823960877327"
304
- ]
305
- },
306
- "execution_count": 6,
307
- "metadata": {},
308
- "output_type": "execute_result"
309
- }
310
- ],
311
- "source": [
312
- "final_pred = np.array([probs_1, probs_2])\n",
313
- "final_pred = np.mean(final_pred, axis=0)\n",
314
- "final_pred = np.argmax(final_pred, axis=1)\n",
315
- "calculate_csi(final_pred, y_test)"
316
- ]
317
- },
318
- {
319
- "cell_type": "markdown",
320
- "id": "586f2131",
321
- "metadata": {},
322
- "source": [
323
- "## 2. Incheon"
324
- ]
325
- },
326
- {
327
- "cell_type": "code",
328
- "execution_count": 7,
329
- "id": "8518d772",
330
- "metadata": {},
331
- "outputs": [
332
- {
333
- "data": {
334
- "text/plain": [
335
- "0.5848329048842812"
336
- ]
337
- },
338
- "execution_count": 7,
339
- "metadata": {},
340
- "output_type": "execute_result"
341
- }
342
- ],
343
- "source": [
344
- "probs, y_test = predict_test_proba(\n",
345
- " model_name='xgb',\n",
346
- " region='incheon',\n",
347
- " data_sample='smote',\n",
348
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
349
- " n_folds=3\n",
350
- ")\n",
351
- "probs_1 = np.mean(probs, axis=0)\n",
352
- "calculate_csi(np.argmax(probs_1, axis=1), y_test)\n"
353
- ]
354
- },
355
- {
356
- "cell_type": "code",
357
- "execution_count": 8,
358
- "metadata": {},
359
- "outputs": [
360
- {
361
- "data": {
362
- "text/plain": [
363
- "0.5070785070784745"
364
- ]
365
- },
366
- "execution_count": 8,
367
- "metadata": {},
368
- "output_type": "execute_result"
369
- }
370
- ],
371
- "source": [
372
- "probs, y_test = predict_test_proba(\n",
373
- " model_name='resnet_like',\n",
374
- " region='incheon',\n",
375
- " data_sample='ctgan10000',\n",
376
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
377
- " n_folds=3\n",
378
- ")\n",
379
- "probs_2= np.mean(probs, axis=0)\n",
380
- "calculate_csi(np.argmax(probs_2, axis=1), y_test)"
381
- ]
382
- },
383
- {
384
- "cell_type": "code",
385
- "execution_count": 9,
386
- "id": "1e156a8b",
387
- "metadata": {},
388
- "outputs": [
389
- {
390
- "data": {
391
- "text/plain": [
392
- "0.56152849740929"
393
- ]
394
- },
395
- "execution_count": 9,
396
- "metadata": {},
397
- "output_type": "execute_result"
398
- }
399
- ],
400
- "source": [
401
- "final_pred = np.array([probs_1, probs_2])\n",
402
- "final_pred = np.mean(final_pred, axis=0)\n",
403
- "final_pred = np.argmax(final_pred, axis=1)\n",
404
- "calculate_csi(final_pred, y_test)"
405
- ]
406
- },
407
- {
408
- "cell_type": "markdown",
409
- "id": "41a294ed",
410
- "metadata": {},
411
- "source": [
412
- "## 3. Gwangju"
413
- ]
414
- },
415
- {
416
- "cell_type": "code",
417
- "execution_count": 10,
418
- "id": "e3d931c2",
419
- "metadata": {},
420
- "outputs": [
421
- {
422
- "data": {
423
- "text/plain": [
424
- "0.49895397489534526"
425
- ]
426
- },
427
- "execution_count": 10,
428
- "metadata": {},
429
- "output_type": "execute_result"
430
- }
431
- ],
432
- "source": [
433
- "probs, y_test = predict_test_proba(\n",
434
- " model_name='xgb',\n",
435
- " region='gwangju',\n",
436
- " data_sample='smote',\n",
437
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
438
- " n_folds=3\n",
439
- ")\n",
440
- "probs_1 = np.mean(probs, axis=0)\n",
441
- "calculate_csi(np.argmax(probs_1, axis=1), y_test)\n"
442
- ]
443
- },
444
- {
445
- "cell_type": "code",
446
- "execution_count": 11,
447
- "id": "35e6a123",
448
- "metadata": {},
449
- "outputs": [
450
- {
451
- "data": {
452
- "text/plain": [
453
- "0.4685314685314217"
454
- ]
455
- },
456
- "execution_count": 11,
457
- "metadata": {},
458
- "output_type": "execute_result"
459
- }
460
- ],
461
- "source": [
462
- "probs, y_test = predict_test_proba(\n",
463
- " model_name='deepgbm',\n",
464
- " region='gwangju',\n",
465
- " data_sample='ctgan10000',\n",
466
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
467
- " n_folds=3\n",
468
- ")\n",
469
- "probs_2= np.mean(probs, axis=0)\n",
470
- "calculate_csi(np.argmax(probs_2, axis=1), y_test)"
471
- ]
472
- },
473
- {
474
- "cell_type": "code",
475
- "execution_count": 12,
476
- "id": "281bec8d",
477
- "metadata": {},
478
- "outputs": [
479
- {
480
- "data": {
481
- "text/plain": [
482
- "0.5052192066805318"
483
- ]
484
- },
485
- "execution_count": 12,
486
- "metadata": {},
487
- "output_type": "execute_result"
488
- }
489
- ],
490
- "source": [
491
- "final_pred = np.array([probs_1, probs_2])\n",
492
- "final_pred = np.mean(final_pred, axis=0)\n",
493
- "final_pred = np.argmax(final_pred, axis=1)\n",
494
- "calculate_csi(final_pred, y_test)"
495
- ]
496
- },
497
- {
498
- "cell_type": "markdown",
499
- "id": "48178b79",
500
- "metadata": {},
501
- "source": [
502
- "## 4. Daejeon"
503
- ]
504
- },
505
- {
506
- "cell_type": "code",
507
- "execution_count": 13,
508
- "id": "6531212c",
509
- "metadata": {},
510
- "outputs": [
511
- {
512
- "data": {
513
- "text/plain": [
514
- "0.394344069128013"
515
- ]
516
- },
517
- "execution_count": 13,
518
- "metadata": {},
519
- "output_type": "execute_result"
520
- }
521
- ],
522
- "source": [
523
- "probs, y_test = predict_test_proba(\n",
524
- " model_name='xgb',\n",
525
- " region='daejeon',\n",
526
- " data_sample='smote',\n",
527
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
528
- " n_folds=3\n",
529
- ")\n",
530
- "probs_1 = np.mean(probs, axis=0)\n",
531
- "calculate_csi(np.argmax(probs_1, axis=1), y_test)\n"
532
- ]
533
- },
534
- {
535
- "cell_type": "code",
536
- "execution_count": 14,
537
- "id": "c7cdb38b",
538
- "metadata": {},
539
- "outputs": [
540
- {
541
- "data": {
542
- "text/plain": [
543
- "0.37882352941173497"
544
- ]
545
- },
546
- "execution_count": 14,
547
- "metadata": {},
548
- "output_type": "execute_result"
549
- }
550
- ],
551
- "source": [
552
- "probs, y_test = predict_test_proba(\n",
553
- " model_name='resnet_like',\n",
554
- " region='daejeon',\n",
555
- " data_sample='ctgan10000',\n",
556
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
557
- " n_folds=3\n",
558
- ")\n",
559
- "probs_2= np.mean(probs, axis=0)\n",
560
- "calculate_csi(np.argmax(probs_2, axis=1), y_test)"
561
- ]
562
- },
563
- {
564
- "cell_type": "code",
565
- "execution_count": 15,
566
- "id": "be501df7",
567
- "metadata": {},
568
- "outputs": [
569
- {
570
- "data": {
571
- "text/plain": [
572
- "0.40776699029122915"
573
- ]
574
- },
575
- "execution_count": 15,
576
- "metadata": {},
577
- "output_type": "execute_result"
578
- }
579
- ],
580
- "source": [
581
- "final_pred = np.array([probs_1, probs_2])\n",
582
- "final_pred = np.mean(final_pred, axis=0)\n",
583
- "final_pred = np.argmax(final_pred, axis=1)\n",
584
- "calculate_csi(final_pred, y_test)"
585
- ]
586
- },
587
- {
588
- "cell_type": "markdown",
589
- "id": "e2ccc35e",
590
- "metadata": {},
591
- "source": [
592
- "## 5. Daegu"
593
- ]
594
- },
595
- {
596
- "cell_type": "code",
597
- "execution_count": 16,
598
- "id": "1f971cb1",
599
- "metadata": {},
600
- "outputs": [
601
- {
602
- "data": {
603
- "text/plain": [
604
- "0.2303523035229728"
605
- ]
606
- },
607
- "execution_count": 16,
608
- "metadata": {},
609
- "output_type": "execute_result"
610
- }
611
- ],
612
- "source": [
613
- "probs, y_test = predict_test_proba(\n",
614
- " model_name='xgb',\n",
615
- " region='daegu',\n",
616
- " data_sample='smote',\n",
617
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
618
- " n_folds=3\n",
619
- ")\n",
620
- "probs_1 = np.mean(probs, axis=0)\n",
621
- "calculate_csi(np.argmax(probs_1, axis=1), y_test)\n"
622
- ]
623
- },
624
- {
625
- "cell_type": "code",
626
- "execution_count": 17,
627
- "id": "da36de77",
628
- "metadata": {},
629
- "outputs": [
630
- {
631
- "data": {
632
- "text/plain": [
633
- "0.27622377622367966"
634
- ]
635
- },
636
- "execution_count": 17,
637
- "metadata": {},
638
- "output_type": "execute_result"
639
- }
640
- ],
641
- "source": [
642
- "probs, y_test = predict_test_proba(\n",
643
- " model_name='resnet_like',\n",
644
- " region='daegu',\n",
645
- " data_sample='ctgan10000',\n",
646
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
647
- " n_folds=3\n",
648
- ")\n",
649
- "probs_2= np.mean(probs, axis=0)\n",
650
- "calculate_csi(np.argmax(probs_2, axis=1), y_test)"
651
- ]
652
- },
653
- {
654
- "cell_type": "code",
655
- "execution_count": 18,
656
- "id": "500f73e4",
657
- "metadata": {},
658
- "outputs": [
659
- {
660
- "data": {
661
- "text/plain": [
662
- "0.2738853503183841"
663
- ]
664
- },
665
- "execution_count": 18,
666
- "metadata": {},
667
- "output_type": "execute_result"
668
- }
669
- ],
670
- "source": [
671
- "final_pred = np.array([probs_1, probs_2])\n",
672
- "final_pred = np.mean(final_pred, axis=0)\n",
673
- "final_pred = np.argmax(final_pred, axis=1)\n",
674
- "calculate_csi(final_pred, y_test)"
675
- ]
676
- },
677
- {
678
- "cell_type": "markdown",
679
- "id": "8b22a8da",
680
- "metadata": {},
681
- "source": [
682
- "## 6. Busan"
683
- ]
684
- },
685
- {
686
- "cell_type": "code",
687
- "execution_count": 19,
688
- "id": "74bfd797",
689
- "metadata": {},
690
- "outputs": [
691
- {
692
- "data": {
693
- "text/plain": [
694
- "0.441005802707845"
695
- ]
696
- },
697
- "execution_count": 19,
698
- "metadata": {},
699
- "output_type": "execute_result"
700
- }
701
- ],
702
- "source": [
703
- "probs, y_test = predict_test_proba(\n",
704
- " model_name='ft_transformer',\n",
705
- " region='busan',\n",
706
- " data_sample='ctgan10000',\n",
707
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
708
- " n_folds=3\n",
709
- ")\n",
710
- "probs_1 = np.mean(probs, axis=0)\n",
711
- "calculate_csi(np.argmax(probs_1, axis=1), y_test)\n"
712
- ]
713
- },
714
- {
715
- "cell_type": "code",
716
- "execution_count": 20,
717
- "id": "66fc12e7",
718
- "metadata": {},
719
- "outputs": [
720
- {
721
- "data": {
722
- "text/plain": [
723
- "0.49767441860453543"
724
- ]
725
- },
726
- "execution_count": 20,
727
- "metadata": {},
728
- "output_type": "execute_result"
729
- }
730
- ],
731
- "source": [
732
- "probs, y_test = predict_test_proba(\n",
733
- " model_name='xgb',\n",
734
- " region='busan',\n",
735
- " data_sample='pure',\n",
736
- " device='cuda', # Tree-based 모델은 device 파라미터가 사용되지 않음\n",
737
- " n_folds=3\n",
738
- ")\n",
739
- "probs_2= np.mean(probs, axis=0)\n",
740
- "calculate_csi(np.argmax(probs_2, axis=1), y_test)"
741
- ]
742
- },
743
- {
744
- "cell_type": "code",
745
- "execution_count": 21,
746
- "id": "78b1b5f8",
747
- "metadata": {},
748
- "outputs": [
749
- {
750
- "data": {
751
- "text/plain": [
752
- "0.4711934156377631"
753
- ]
754
- },
755
- "execution_count": 21,
756
- "metadata": {},
757
- "output_type": "execute_result"
758
- }
759
- ],
760
- "source": [
761
- "final_pred = np.array([probs_1, probs_2])\n",
762
- "final_pred = np.mean(final_pred, axis=0)\n",
763
- "final_pred = np.argmax(final_pred, axis=1)\n",
764
- "calculate_csi(final_pred, y_test)"
765
- ]
766
- }
767
- ],
768
- "metadata": {
769
- "kernelspec": {
770
- "display_name": "py39",
771
- "language": "python",
772
- "name": "python3"
773
- },
774
- "language_info": {
775
- "codemirror_mode": {
776
- "name": "ipython",
777
- "version": 3
778
- },
779
- "file_extension": ".py",
780
- "mimetype": "text/x-python",
781
- "name": "python",
782
- "nbconvert_exporter": "python",
783
- "pygments_lexer": "ipython3",
784
- "version": "3.9.18"
785
- }
786
- },
787
- "nbformat": 4,
788
- "nbformat_minor": 5
789
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b948db39e385865eccedfded6410f2d4719ad87d5a95d857b62ba34f1995065b
3
+ size 20952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Analysis_code/find_reason/trend_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff