markusbw commited on
Commit
c9f7289
·
1 Parent(s): eb2e35f

duplicate handling

Browse files
Files changed (1) hide show
  1. Markus_data_ting.ipynb +516 -0
Markus_data_ting.ipynb CHANGED
@@ -0,0 +1,516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 10,
6
+ "id": "4f90bfb1",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pyarrow.parquet as pq\n",
11
+ "import pandas as pd\n",
12
+ "import random\n",
13
+ "import gc"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": null,
19
+ "id": "fedd7106",
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "# Open the Parquet file\n",
24
+ "parquet_file = pq.ParquetFile(r'C:\\Users\\marku\\Desktop\\4år\\AML\\AppliedML2025\\Final project\\antarctica ml\\AppML_2025\\tabular_train_dataset\\bedmap_train2_30m.parquet')\n",
25
+ "\n",
26
+ "# Get number of row groups\n",
27
+ "num_row_groups = parquet_file.num_row_groups\n",
28
+ "\n",
29
+ "# Select 10% of row groups randomly\n",
30
+ "sample_size = max(1, int(num_row_groups * 0.1))\n",
31
+ "selected_groups = random.sample(range(num_row_groups), sample_size)\n",
32
+ "\n",
33
+ "# Read only the selected row groups, excluding specific columns\n",
34
+ "dfs = []\n",
35
+ "for i in selected_groups:\n",
36
+ " table = parquet_file.read_row_group(i)\n",
37
+ " df = table.to_pandas()\n",
38
+ " df = df.drop(columns=['LON', 'LAT', 'geometry'], errors='ignore') # Drop unwanted columns if present\n",
39
+ " dfs.append(df)\n",
40
+ "\n",
41
+ "# Combine into one DataFrame\n",
42
+ "data = pd.concat(dfs, ignore_index=True)\n"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 5,
48
+ "id": "105c726d",
49
+ "metadata": {},
50
+ "outputs": [],
51
+ "source": [
52
+ "data.sort_values(by=['EAST', 'NORTH'], inplace=True)"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": 6,
58
+ "id": "e72f55d9",
59
+ "metadata": {},
60
+ "outputs": [
61
+ {
62
+ "name": "stderr",
63
+ "output_type": "stream",
64
+ "text": [
65
+ "C:\\Users\\marku\\AppData\\Local\\Temp\\ipykernel_1488\\2804182976.py:2: SettingWithCopyWarning: \n",
66
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
67
+ "\n",
68
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
69
+ " duplicates.sort_values(by=['EAST', 'NORTH'], inplace=True)\n"
70
+ ]
71
+ }
72
+ ],
73
+ "source": [
74
+ "duplicates = data[data.duplicated(subset=['EAST', 'NORTH'], keep=False)]\n",
75
+ "duplicates.sort_values(by=['EAST', 'NORTH'], inplace=True)"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": 7,
81
+ "id": "98f5dbf6",
82
+ "metadata": {},
83
+ "outputs": [
84
+ {
85
+ "name": "stdout",
86
+ "output_type": "stream",
87
+ "text": [
88
+ "no. duplicates: 20749.\n",
89
+ "Unique (EAST, NORTH) pairs that have duplicates: 8369\n"
90
+ ]
91
+ }
92
+ ],
93
+ "source": [
94
+ "print(f\"no. duplicates: {len(duplicates)}.\")\n",
95
+ "\n",
96
+ "num_duped_coord = duplicates[['EAST', 'NORTH']].drop_duplicates().shape[0]\n",
97
+ "print(\"Unique (EAST, NORTH) pairs that have duplicates:\", num_duped_coord)"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 8,
103
+ "id": "7e1a206f",
104
+ "metadata": {},
105
+ "outputs": [
106
+ {
107
+ "name": "stdout",
108
+ "output_type": "stream",
109
+ "text": [
110
+ " EAST NORTH THICK_range THICK_mean THICK_median \\\n",
111
+ "0 -2.308745e+06 1.143082e+06 16.590 343.410000 343.915 \n",
112
+ "1 -2.141053e+06 1.032035e+06 0.000 460.420000 460.420 \n",
113
+ "2 -1.944284e+06 9.048456e+05 0.000 1290.000000 1290.000 \n",
114
+ "3 -1.907729e+06 8.049775e+05 0.000 716.000000 716.000 \n",
115
+ "4 -1.902475e+06 9.188595e+05 5.060 913.010000 913.010 \n",
116
+ "5 -1.900838e+06 8.852626e+05 0.000 407.000000 407.000 \n",
117
+ "6 -1.875244e+06 8.974816e+05 0.000 1260.000000 1260.000 \n",
118
+ "7 -1.800857e+06 8.888363e+05 0.000 770.000000 770.000 \n",
119
+ "8 -1.766391e+06 7.534504e+05 0.000 763.000000 763.000 \n",
120
+ "9 -1.708213e+06 8.119492e+05 0.000 1267.000000 1267.000 \n",
121
+ "10 -1.698118e+06 7.934723e+05 0.000 612.000000 612.000 \n",
122
+ "11 -1.695179e+06 6.702288e+05 7.928 762.671000 762.671 \n",
123
+ "12 -1.689698e+06 -1.828034e+05 4.350 826.545000 826.545 \n",
124
+ "13 -1.688817e+06 -1.826473e+05 1.740 858.260000 858.260 \n",
125
+ "14 -1.677457e+06 -1.213272e+05 0.350 1819.685000 1819.685 \n",
126
+ "15 -1.672095e+06 7.657638e+05 0.000 249.000000 249.000 \n",
127
+ "16 -1.641989e+06 5.976326e+05 2.990 1706.393333 1706.630 \n",
128
+ "17 -1.628775e+06 4.224858e+05 0.000 1300.000000 1300.000 \n",
129
+ "18 -1.627830e+06 4.993682e+05 0.000 943.000000 943.000 \n",
130
+ "19 -1.622924e+06 -2.434122e+05 4.600 502.560000 502.560 \n",
131
+ "20 -1.622312e+06 -2.459353e+05 0.220 690.100000 690.100 \n",
132
+ "21 -1.622251e+06 -2.462038e+05 1.570 704.635000 704.635 \n",
133
+ "22 -1.621120e+06 -2.512063e+05 4.950 710.475000 710.475 \n",
134
+ "23 -1.620706e+06 -2.522192e+05 3.240 716.200000 716.200 \n",
135
+ "24 -1.618462e+06 -2.385436e+05 1.170 663.165000 663.165 \n",
136
+ "\n",
137
+ " THICK_range_ratio \n",
138
+ "0 0.048310 \n",
139
+ "1 0.000000 \n",
140
+ "2 0.000000 \n",
141
+ "3 0.000000 \n",
142
+ "4 0.005542 \n",
143
+ "5 0.000000 \n",
144
+ "6 0.000000 \n",
145
+ "7 0.000000 \n",
146
+ "8 0.000000 \n",
147
+ "9 0.000000 \n",
148
+ "10 0.000000 \n",
149
+ "11 0.010395 \n",
150
+ "12 0.005263 \n",
151
+ "13 0.002027 \n",
152
+ "14 0.000192 \n",
153
+ "15 0.000000 \n",
154
+ "16 0.001752 \n",
155
+ "17 0.000000 \n",
156
+ "18 0.000000 \n",
157
+ "19 0.009153 \n",
158
+ "20 0.000319 \n",
159
+ "21 0.002228 \n",
160
+ "22 0.006967 \n",
161
+ "23 0.004524 \n",
162
+ "24 0.001764 \n"
163
+ ]
164
+ }
165
+ ],
166
+ "source": [
167
+ "summary = (\n",
168
+ " duplicates.groupby(['EAST', 'NORTH']).agg(\n",
169
+ " THICK_range=('THICK', lambda x: x.max() - x.min()),\n",
170
+ " THICK_mean=('THICK', 'mean'),\n",
171
+ " THICK_median=('THICK', 'median')\n",
172
+ " ).reset_index()\n",
173
+ ")\n",
174
+ "summary['THICK_range_ratio'] = summary['THICK_range'] / summary['THICK_mean']\n",
175
+ "\n",
176
+ "print(summary.head(25))\n"
177
+ ]
178
+ },
179
+ {
180
+ "cell_type": "code",
181
+ "execution_count": 9,
182
+ "id": "2b7c1579",
183
+ "metadata": {},
184
+ "outputs": [
185
+ {
186
+ "name": "stdout",
187
+ "output_type": "stream",
188
+ "text": [
189
+ "0.0030684829606210935\n",
190
+ "0.017637902014526745\n",
191
+ "8369\n",
192
+ "2317\n",
193
+ "1259\n"
194
+ ]
195
+ }
196
+ ],
197
+ "source": [
198
+ "print(summary['THICK_range_ratio'].median())\n",
199
+ "print(summary['THICK_range_ratio'].mean())\n",
200
+ "print(len(summary))\n",
201
+ "print(len(summary[summary['THICK_range_ratio'] > 0.01]))\n",
202
+ "print(len(summary[(summary['THICK_range_ratio'] > 0.025)]))"
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": 17,
208
+ "id": "d4ea1f81",
209
+ "metadata": {},
210
+ "outputs": [],
211
+ "source": [
212
+ "#Keep only rows with THICK_range_ratio <= 0.025\n",
213
+ "summary = summary[summary['THICK_range_ratio'] <= 0.025]"
214
+ ]
215
+ },
216
+ {
217
+ "cell_type": "code",
218
+ "execution_count": 19,
219
+ "id": "5b63659d",
220
+ "metadata": {},
221
+ "outputs": [],
222
+ "source": [
223
+ "# Step 1: Merge df with summary to bring in the Thick_median\n",
224
+ "merged = data.merge(summary, on=['EAST', 'NORTH'], how='left')\n",
225
+ "\n",
226
+ "# Step 2: Keep either:\n",
227
+ "# - rows not in summary (i.e., Thick_median is NaN)\n",
228
+ "# - or rows where Thick == Thick_median\n",
229
+ "result = merged[\n",
230
+ " merged['THICK_median'].isna() |\n",
231
+ " (merged['THICK'] == merged['THICK_median'])\n",
232
+ "]\n",
233
+ "del merged\n",
234
+ "gc.collect()\n",
235
+ "\n",
236
+ "\n",
237
+ "\n",
238
+ "# Optional: Drop Thick_median column if not needed\n",
239
+ "result = result.drop(columns=['THICK_median', 'THICK_range', 'THICK_mean', 'THICK_range_ratio'], errors='ignore')\n"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": 20,
245
+ "id": "c45e6e83",
246
+ "metadata": {},
247
+ "outputs": [
248
+ {
249
+ "data": {
250
+ "text/html": [
251
+ "<div>\n",
252
+ "<style scoped>\n",
253
+ " .dataframe tbody tr th:only-of-type {\n",
254
+ " vertical-align: middle;\n",
255
+ " }\n",
256
+ "\n",
257
+ " .dataframe tbody tr th {\n",
258
+ " vertical-align: top;\n",
259
+ " }\n",
260
+ "\n",
261
+ " .dataframe thead th {\n",
262
+ " text-align: right;\n",
263
+ " }\n",
264
+ "</style>\n",
265
+ "<table border=\"1\" class=\"dataframe\">\n",
266
+ " <thead>\n",
267
+ " <tr style=\"text-align: right;\">\n",
268
+ " <th></th>\n",
269
+ " <th>THICK</th>\n",
270
+ " <th>EAST</th>\n",
271
+ " <th>NORTH</th>\n",
272
+ " <th>vx</th>\n",
273
+ " <th>vy</th>\n",
274
+ " <th>v</th>\n",
275
+ " <th>ith_bm</th>\n",
276
+ " <th>smb</th>\n",
277
+ " <th>z</th>\n",
278
+ " <th>s</th>\n",
279
+ " <th>temp</th>\n",
280
+ " </tr>\n",
281
+ " </thead>\n",
282
+ " <tbody>\n",
283
+ " <tr>\n",
284
+ " <th>0</th>\n",
285
+ " <td>50.82</td>\n",
286
+ " <td>-2.498779e+06</td>\n",
287
+ " <td>1.417597e+06</td>\n",
288
+ " <td>-1133.923586</td>\n",
289
+ " <td>538.882191</td>\n",
290
+ " <td>1255.458766</td>\n",
291
+ " <td>85.334967</td>\n",
292
+ " <td>1607.108764</td>\n",
293
+ " <td>278.650876</td>\n",
294
+ " <td>0.049565</td>\n",
295
+ " <td>266.860876</td>\n",
296
+ " </tr>\n",
297
+ " <tr>\n",
298
+ " <th>1</th>\n",
299
+ " <td>53.63</td>\n",
300
+ " <td>-2.498462e+06</td>\n",
301
+ " <td>1.417028e+06</td>\n",
302
+ " <td>-1104.194542</td>\n",
303
+ " <td>518.784315</td>\n",
304
+ " <td>1219.992931</td>\n",
305
+ " <td>69.791700</td>\n",
306
+ " <td>1669.475359</td>\n",
307
+ " <td>305.152998</td>\n",
308
+ " <td>0.055753</td>\n",
309
+ " <td>266.809909</td>\n",
310
+ " </tr>\n",
311
+ " <tr>\n",
312
+ " <th>2</th>\n",
313
+ " <td>23.24</td>\n",
314
+ " <td>-2.497579e+06</td>\n",
315
+ " <td>1.415438e+06</td>\n",
316
+ " <td>-797.720551</td>\n",
317
+ " <td>252.445367</td>\n",
318
+ " <td>836.711863</td>\n",
319
+ " <td>44.747269</td>\n",
320
+ " <td>1834.636673</td>\n",
321
+ " <td>365.113957</td>\n",
322
+ " <td>0.020203</td>\n",
323
+ " <td>266.596713</td>\n",
324
+ " </tr>\n",
325
+ " <tr>\n",
326
+ " <th>3</th>\n",
327
+ " <td>21.80</td>\n",
328
+ " <td>-2.495786e+06</td>\n",
329
+ " <td>1.412343e+06</td>\n",
330
+ " <td>-65.286477</td>\n",
331
+ " <td>146.920962</td>\n",
332
+ " <td>160.773421</td>\n",
333
+ " <td>19.385211</td>\n",
334
+ " <td>2213.076648</td>\n",
335
+ " <td>499.961841</td>\n",
336
+ " <td>0.046409</td>\n",
337
+ " <td>265.990232</td>\n",
338
+ " </tr>\n",
339
+ " <tr>\n",
340
+ " <th>4</th>\n",
341
+ " <td>25.30</td>\n",
342
+ " <td>-2.495772e+06</td>\n",
343
+ " <td>1.412320e+06</td>\n",
344
+ " <td>-63.979106</td>\n",
345
+ " <td>147.167889</td>\n",
346
+ " <td>160.473404</td>\n",
347
+ " <td>19.363090</td>\n",
348
+ " <td>2215.949898</td>\n",
349
+ " <td>501.047693</td>\n",
350
+ " <td>0.045488</td>\n",
351
+ " <td>265.986040</td>\n",
352
+ " </tr>\n",
353
+ " <tr>\n",
354
+ " <th>...</th>\n",
355
+ " <td>...</td>\n",
356
+ " <td>...</td>\n",
357
+ " <td>...</td>\n",
358
+ " <td>...</td>\n",
359
+ " <td>...</td>\n",
360
+ " <td>...</td>\n",
361
+ " <td>...</td>\n",
362
+ " <td>...</td>\n",
363
+ " <td>...</td>\n",
364
+ " <td>...</td>\n",
365
+ " <td>...</td>\n",
366
+ " </tr>\n",
367
+ " <tr>\n",
368
+ " <th>2097147</th>\n",
369
+ " <td>2000.64</td>\n",
370
+ " <td>2.654522e+06</td>\n",
371
+ " <td>-4.884883e+05</td>\n",
372
+ " <td>0.449748</td>\n",
373
+ " <td>15.509423</td>\n",
374
+ " <td>15.515943</td>\n",
375
+ " <td>147.532692</td>\n",
376
+ " <td>765.603721</td>\n",
377
+ " <td>182.417924</td>\n",
378
+ " <td>0.040650</td>\n",
379
+ " <td>261.717844</td>\n",
380
+ " </tr>\n",
381
+ " <tr>\n",
382
+ " <th>2097148</th>\n",
383
+ " <td>1955.43</td>\n",
384
+ " <td>2.654789e+06</td>\n",
385
+ " <td>-4.886187e+05</td>\n",
386
+ " <td>0.618483</td>\n",
387
+ " <td>15.115018</td>\n",
388
+ " <td>15.127666</td>\n",
389
+ " <td>155.331007</td>\n",
390
+ " <td>763.821258</td>\n",
391
+ " <td>181.186072</td>\n",
392
+ " <td>0.040658</td>\n",
393
+ " <td>261.736247</td>\n",
394
+ " </tr>\n",
395
+ " <tr>\n",
396
+ " <th>2097149</th>\n",
397
+ " <td>1973.15</td>\n",
398
+ " <td>2.654843e+06</td>\n",
399
+ " <td>-4.886439e+05</td>\n",
400
+ " <td>0.630447</td>\n",
401
+ " <td>15.065385</td>\n",
402
+ " <td>15.078570</td>\n",
403
+ " <td>156.902040</td>\n",
404
+ " <td>763.393967</td>\n",
405
+ " <td>180.894478</td>\n",
406
+ " <td>0.040675</td>\n",
407
+ " <td>261.740045</td>\n",
408
+ " </tr>\n",
409
+ " <tr>\n",
410
+ " <th>2097150</th>\n",
411
+ " <td>1905.27</td>\n",
412
+ " <td>2.655007e+06</td>\n",
413
+ " <td>-4.887185e+05</td>\n",
414
+ " <td>0.433723</td>\n",
415
+ " <td>14.901482</td>\n",
416
+ " <td>14.907793</td>\n",
417
+ " <td>161.674849</td>\n",
418
+ " <td>762.025022</td>\n",
419
+ " <td>179.962857</td>\n",
420
+ " <td>0.040781</td>\n",
421
+ " <td>261.751564</td>\n",
422
+ " </tr>\n",
423
+ " <tr>\n",
424
+ " <th>2097151</th>\n",
425
+ " <td>1893.28</td>\n",
426
+ " <td>2.655062e+06</td>\n",
427
+ " <td>-4.887430e+05</td>\n",
428
+ " <td>0.326494</td>\n",
429
+ " <td>14.848336</td>\n",
430
+ " <td>14.851925</td>\n",
431
+ " <td>163.469380</td>\n",
432
+ " <td>761.494619</td>\n",
433
+ " <td>179.626606</td>\n",
434
+ " <td>0.040890</td>\n",
435
+ " <td>261.755432</td>\n",
436
+ " </tr>\n",
437
+ " </tbody>\n",
438
+ "</table>\n",
439
+ "<p>2083679 rows × 11 columns</p>\n",
440
+ "</div>"
441
+ ],
442
+ "text/plain": [
443
+ " THICK EAST NORTH vx vy \\\n",
444
+ "0 50.82 -2.498779e+06 1.417597e+06 -1133.923586 538.882191 \n",
445
+ "1 53.63 -2.498462e+06 1.417028e+06 -1104.194542 518.784315 \n",
446
+ "2 23.24 -2.497579e+06 1.415438e+06 -797.720551 252.445367 \n",
447
+ "3 21.80 -2.495786e+06 1.412343e+06 -65.286477 146.920962 \n",
448
+ "4 25.30 -2.495772e+06 1.412320e+06 -63.979106 147.167889 \n",
449
+ "... ... ... ... ... ... \n",
450
+ "2097147 2000.64 2.654522e+06 -4.884883e+05 0.449748 15.509423 \n",
451
+ "2097148 1955.43 2.654789e+06 -4.886187e+05 0.618483 15.115018 \n",
452
+ "2097149 1973.15 2.654843e+06 -4.886439e+05 0.630447 15.065385 \n",
453
+ "2097150 1905.27 2.655007e+06 -4.887185e+05 0.433723 14.901482 \n",
454
+ "2097151 1893.28 2.655062e+06 -4.887430e+05 0.326494 14.848336 \n",
455
+ "\n",
456
+ " v ith_bm smb z s \\\n",
457
+ "0 1255.458766 85.334967 1607.108764 278.650876 0.049565 \n",
458
+ "1 1219.992931 69.791700 1669.475359 305.152998 0.055753 \n",
459
+ "2 836.711863 44.747269 1834.636673 365.113957 0.020203 \n",
460
+ "3 160.773421 19.385211 2213.076648 499.961841 0.046409 \n",
461
+ "4 160.473404 19.363090 2215.949898 501.047693 0.045488 \n",
462
+ "... ... ... ... ... ... \n",
463
+ "2097147 15.515943 147.532692 765.603721 182.417924 0.040650 \n",
464
+ "2097148 15.127666 155.331007 763.821258 181.186072 0.040658 \n",
465
+ "2097149 15.078570 156.902040 763.393967 180.894478 0.040675 \n",
466
+ "2097150 14.907793 161.674849 762.025022 179.962857 0.040781 \n",
467
+ "2097151 14.851925 163.469380 761.494619 179.626606 0.040890 \n",
468
+ "\n",
469
+ " temp \n",
470
+ "0 266.860876 \n",
471
+ "1 266.809909 \n",
472
+ "2 266.596713 \n",
473
+ "3 265.990232 \n",
474
+ "4 265.986040 \n",
475
+ "... ... \n",
476
+ "2097147 261.717844 \n",
477
+ "2097148 261.736247 \n",
478
+ "2097149 261.740045 \n",
479
+ "2097150 261.751564 \n",
480
+ "2097151 261.755432 \n",
481
+ "\n",
482
+ "[2083679 rows x 11 columns]"
483
+ ]
484
+ },
485
+ "execution_count": 20,
486
+ "metadata": {},
487
+ "output_type": "execute_result"
488
+ }
489
+ ],
490
+ "source": [
491
+ "result"
492
+ ]
493
+ }
494
+ ],
495
+ "metadata": {
496
+ "kernelspec": {
497
+ "display_name": "appml25",
498
+ "language": "python",
499
+ "name": "python3"
500
+ },
501
+ "language_info": {
502
+ "codemirror_mode": {
503
+ "name": "ipython",
504
+ "version": 3
505
+ },
506
+ "file_extension": ".py",
507
+ "mimetype": "text/x-python",
508
+ "name": "python",
509
+ "nbconvert_exporter": "python",
510
+ "pygments_lexer": "ipython3",
511
+ "version": "3.12.9"
512
+ }
513
+ },
514
+ "nbformat": 4,
515
+ "nbformat_minor": 5
516
+ }