duplicate handling
Browse files- Markus_data_ting.ipynb +516 -0
Markus_data_ting.ipynb
CHANGED
|
@@ -0,0 +1,516 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 10,
|
| 6 |
+
"id": "4f90bfb1",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"import pyarrow.parquet as pq\n",
|
| 11 |
+
"import pandas as pd\n",
|
| 12 |
+
"import random\n",
|
| 13 |
+
"import gc"
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"cell_type": "code",
|
| 18 |
+
"execution_count": null,
|
| 19 |
+
"id": "fedd7106",
|
| 20 |
+
"metadata": {},
|
| 21 |
+
"outputs": [],
|
| 22 |
+
"source": [
|
| 23 |
+
"# Open the Parquet file\n",
|
| 24 |
+
"parquet_file = pq.ParquetFile(r'C:\\Users\\marku\\Desktop\\4år\\AML\\AppliedML2025\\Final project\\antarctica ml\\AppML_2025\\tabular_train_dataset\\bedmap_train2_30m.parquet')\n",
|
| 25 |
+
"\n",
|
| 26 |
+
"# Get number of row groups\n",
|
| 27 |
+
"num_row_groups = parquet_file.num_row_groups\n",
|
| 28 |
+
"\n",
|
| 29 |
+
"# Select 10% of row groups randomly\n",
|
| 30 |
+
"sample_size = max(1, int(num_row_groups * 0.1))\n",
|
| 31 |
+
"selected_groups = random.sample(range(num_row_groups), sample_size)\n",
|
| 32 |
+
"\n",
|
| 33 |
+
"# Read only the selected row groups, excluding specific columns\n",
|
| 34 |
+
"dfs = []\n",
|
| 35 |
+
"for i in selected_groups:\n",
|
| 36 |
+
" table = parquet_file.read_row_group(i)\n",
|
| 37 |
+
" df = table.to_pandas()\n",
|
| 38 |
+
" df = df.drop(columns=['LON', 'LAT', 'geometry'], errors='ignore') # Drop unwanted columns if present\n",
|
| 39 |
+
" dfs.append(df)\n",
|
| 40 |
+
"\n",
|
| 41 |
+
"# Combine into one DataFrame\n",
|
| 42 |
+
"data = pd.concat(dfs, ignore_index=True)\n"
|
| 43 |
+
]
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"cell_type": "code",
|
| 47 |
+
"execution_count": 5,
|
| 48 |
+
"id": "105c726d",
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"data.sort_values(by=['EAST', 'NORTH'], inplace=True)"
|
| 53 |
+
]
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"cell_type": "code",
|
| 57 |
+
"execution_count": 6,
|
| 58 |
+
"id": "e72f55d9",
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [
|
| 61 |
+
{
|
| 62 |
+
"name": "stderr",
|
| 63 |
+
"output_type": "stream",
|
| 64 |
+
"text": [
|
| 65 |
+
"C:\\Users\\marku\\AppData\\Local\\Temp\\ipykernel_1488\\2804182976.py:2: SettingWithCopyWarning: \n",
|
| 66 |
+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
| 67 |
+
"\n",
|
| 68 |
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
| 69 |
+
" duplicates.sort_values(by=['EAST', 'NORTH'], inplace=True)\n"
|
| 70 |
+
]
|
| 71 |
+
}
|
| 72 |
+
],
|
| 73 |
+
"source": [
|
| 74 |
+
"duplicates = data[data.duplicated(subset=['EAST', 'NORTH'], keep=False)]\n",
|
| 75 |
+
"duplicates.sort_values(by=['EAST', 'NORTH'], inplace=True)"
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"cell_type": "code",
|
| 80 |
+
"execution_count": 7,
|
| 81 |
+
"id": "98f5dbf6",
|
| 82 |
+
"metadata": {},
|
| 83 |
+
"outputs": [
|
| 84 |
+
{
|
| 85 |
+
"name": "stdout",
|
| 86 |
+
"output_type": "stream",
|
| 87 |
+
"text": [
|
| 88 |
+
"no. duplicates: 20749.\n",
|
| 89 |
+
"Unique (EAST, NORTH) pairs that have duplicates: 8369\n"
|
| 90 |
+
]
|
| 91 |
+
}
|
| 92 |
+
],
|
| 93 |
+
"source": [
|
| 94 |
+
"print(f\"no. duplicates: {len(duplicates)}.\")\n",
|
| 95 |
+
"\n",
|
| 96 |
+
"num_duped_coord = duplicates[['EAST', 'NORTH']].drop_duplicates().shape[0]\n",
|
| 97 |
+
"print(\"Unique (EAST, NORTH) pairs that have duplicates:\", num_duped_coord)"
|
| 98 |
+
]
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"cell_type": "code",
|
| 102 |
+
"execution_count": 8,
|
| 103 |
+
"id": "7e1a206f",
|
| 104 |
+
"metadata": {},
|
| 105 |
+
"outputs": [
|
| 106 |
+
{
|
| 107 |
+
"name": "stdout",
|
| 108 |
+
"output_type": "stream",
|
| 109 |
+
"text": [
|
| 110 |
+
" EAST NORTH THICK_range THICK_mean THICK_median \\\n",
|
| 111 |
+
"0 -2.308745e+06 1.143082e+06 16.590 343.410000 343.915 \n",
|
| 112 |
+
"1 -2.141053e+06 1.032035e+06 0.000 460.420000 460.420 \n",
|
| 113 |
+
"2 -1.944284e+06 9.048456e+05 0.000 1290.000000 1290.000 \n",
|
| 114 |
+
"3 -1.907729e+06 8.049775e+05 0.000 716.000000 716.000 \n",
|
| 115 |
+
"4 -1.902475e+06 9.188595e+05 5.060 913.010000 913.010 \n",
|
| 116 |
+
"5 -1.900838e+06 8.852626e+05 0.000 407.000000 407.000 \n",
|
| 117 |
+
"6 -1.875244e+06 8.974816e+05 0.000 1260.000000 1260.000 \n",
|
| 118 |
+
"7 -1.800857e+06 8.888363e+05 0.000 770.000000 770.000 \n",
|
| 119 |
+
"8 -1.766391e+06 7.534504e+05 0.000 763.000000 763.000 \n",
|
| 120 |
+
"9 -1.708213e+06 8.119492e+05 0.000 1267.000000 1267.000 \n",
|
| 121 |
+
"10 -1.698118e+06 7.934723e+05 0.000 612.000000 612.000 \n",
|
| 122 |
+
"11 -1.695179e+06 6.702288e+05 7.928 762.671000 762.671 \n",
|
| 123 |
+
"12 -1.689698e+06 -1.828034e+05 4.350 826.545000 826.545 \n",
|
| 124 |
+
"13 -1.688817e+06 -1.826473e+05 1.740 858.260000 858.260 \n",
|
| 125 |
+
"14 -1.677457e+06 -1.213272e+05 0.350 1819.685000 1819.685 \n",
|
| 126 |
+
"15 -1.672095e+06 7.657638e+05 0.000 249.000000 249.000 \n",
|
| 127 |
+
"16 -1.641989e+06 5.976326e+05 2.990 1706.393333 1706.630 \n",
|
| 128 |
+
"17 -1.628775e+06 4.224858e+05 0.000 1300.000000 1300.000 \n",
|
| 129 |
+
"18 -1.627830e+06 4.993682e+05 0.000 943.000000 943.000 \n",
|
| 130 |
+
"19 -1.622924e+06 -2.434122e+05 4.600 502.560000 502.560 \n",
|
| 131 |
+
"20 -1.622312e+06 -2.459353e+05 0.220 690.100000 690.100 \n",
|
| 132 |
+
"21 -1.622251e+06 -2.462038e+05 1.570 704.635000 704.635 \n",
|
| 133 |
+
"22 -1.621120e+06 -2.512063e+05 4.950 710.475000 710.475 \n",
|
| 134 |
+
"23 -1.620706e+06 -2.522192e+05 3.240 716.200000 716.200 \n",
|
| 135 |
+
"24 -1.618462e+06 -2.385436e+05 1.170 663.165000 663.165 \n",
|
| 136 |
+
"\n",
|
| 137 |
+
" THICK_range_ratio \n",
|
| 138 |
+
"0 0.048310 \n",
|
| 139 |
+
"1 0.000000 \n",
|
| 140 |
+
"2 0.000000 \n",
|
| 141 |
+
"3 0.000000 \n",
|
| 142 |
+
"4 0.005542 \n",
|
| 143 |
+
"5 0.000000 \n",
|
| 144 |
+
"6 0.000000 \n",
|
| 145 |
+
"7 0.000000 \n",
|
| 146 |
+
"8 0.000000 \n",
|
| 147 |
+
"9 0.000000 \n",
|
| 148 |
+
"10 0.000000 \n",
|
| 149 |
+
"11 0.010395 \n",
|
| 150 |
+
"12 0.005263 \n",
|
| 151 |
+
"13 0.002027 \n",
|
| 152 |
+
"14 0.000192 \n",
|
| 153 |
+
"15 0.000000 \n",
|
| 154 |
+
"16 0.001752 \n",
|
| 155 |
+
"17 0.000000 \n",
|
| 156 |
+
"18 0.000000 \n",
|
| 157 |
+
"19 0.009153 \n",
|
| 158 |
+
"20 0.000319 \n",
|
| 159 |
+
"21 0.002228 \n",
|
| 160 |
+
"22 0.006967 \n",
|
| 161 |
+
"23 0.004524 \n",
|
| 162 |
+
"24 0.001764 \n"
|
| 163 |
+
]
|
| 164 |
+
}
|
| 165 |
+
],
|
| 166 |
+
"source": [
|
| 167 |
+
"summary = (\n",
|
| 168 |
+
" duplicates.groupby(['EAST', 'NORTH']).agg(\n",
|
| 169 |
+
" THICK_range=('THICK', lambda x: x.max() - x.min()),\n",
|
| 170 |
+
" THICK_mean=('THICK', 'mean'),\n",
|
| 171 |
+
" THICK_median=('THICK', 'median')\n",
|
| 172 |
+
" ).reset_index()\n",
|
| 173 |
+
")\n",
|
| 174 |
+
"summary['THICK_range_ratio'] = summary['THICK_range'] / summary['THICK_mean']\n",
|
| 175 |
+
"\n",
|
| 176 |
+
"print(summary.head(25))\n"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"cell_type": "code",
|
| 181 |
+
"execution_count": 9,
|
| 182 |
+
"id": "2b7c1579",
|
| 183 |
+
"metadata": {},
|
| 184 |
+
"outputs": [
|
| 185 |
+
{
|
| 186 |
+
"name": "stdout",
|
| 187 |
+
"output_type": "stream",
|
| 188 |
+
"text": [
|
| 189 |
+
"0.0030684829606210935\n",
|
| 190 |
+
"0.017637902014526745\n",
|
| 191 |
+
"8369\n",
|
| 192 |
+
"2317\n",
|
| 193 |
+
"1259\n"
|
| 194 |
+
]
|
| 195 |
+
}
|
| 196 |
+
],
|
| 197 |
+
"source": [
|
| 198 |
+
"print(summary['THICK_range_ratio'].median())\n",
|
| 199 |
+
"print(summary['THICK_range_ratio'].mean())\n",
|
| 200 |
+
"print(len(summary))\n",
|
| 201 |
+
"print(len(summary[summary['THICK_range_ratio'] > 0.01]))\n",
|
| 202 |
+
"print(len(summary[(summary['THICK_range_ratio'] > 0.025)]))"
|
| 203 |
+
]
|
| 204 |
+
},
|
| 205 |
+
{
|
| 206 |
+
"cell_type": "code",
|
| 207 |
+
"execution_count": 17,
|
| 208 |
+
"id": "d4ea1f81",
|
| 209 |
+
"metadata": {},
|
| 210 |
+
"outputs": [],
|
| 211 |
+
"source": [
|
| 212 |
+
"#Keep only rows with THICK_range_ratio <= 0.025\n",
|
| 213 |
+
"summary = summary[summary['THICK_range_ratio'] <= 0.025]"
|
| 214 |
+
]
|
| 215 |
+
},
|
| 216 |
+
{
|
| 217 |
+
"cell_type": "code",
|
| 218 |
+
"execution_count": 19,
|
| 219 |
+
"id": "5b63659d",
|
| 220 |
+
"metadata": {},
|
| 221 |
+
"outputs": [],
|
| 222 |
+
"source": [
|
| 223 |
+
"# Step 1: Merge df with summary to bring in the Thick_median\n",
|
| 224 |
+
"merged = data.merge(summary, on=['EAST', 'NORTH'], how='left')\n",
|
| 225 |
+
"\n",
|
| 226 |
+
"# Step 2: Keep either:\n",
|
| 227 |
+
"# - rows not in summary (i.e., Thick_median is NaN)\n",
|
| 228 |
+
"# - or rows where Thick == Thick_median\n",
|
| 229 |
+
"result = merged[\n",
|
| 230 |
+
" merged['THICK_median'].isna() |\n",
|
| 231 |
+
" (merged['THICK'] == merged['THICK_median'])\n",
|
| 232 |
+
"]\n",
|
| 233 |
+
"del merged\n",
|
| 234 |
+
"gc.collect()\n",
|
| 235 |
+
"\n",
|
| 236 |
+
"\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"# Optional: Drop Thick_median column if not needed\n",
|
| 239 |
+
"result = result.drop(columns=['THICK_median', 'THICK_range', 'THICK_mean', 'THICK_range_ratio'], errors='ignore')\n"
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"cell_type": "code",
|
| 244 |
+
"execution_count": 20,
|
| 245 |
+
"id": "c45e6e83",
|
| 246 |
+
"metadata": {},
|
| 247 |
+
"outputs": [
|
| 248 |
+
{
|
| 249 |
+
"data": {
|
| 250 |
+
"text/html": [
|
| 251 |
+
"<div>\n",
|
| 252 |
+
"<style scoped>\n",
|
| 253 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 254 |
+
" vertical-align: middle;\n",
|
| 255 |
+
" }\n",
|
| 256 |
+
"\n",
|
| 257 |
+
" .dataframe tbody tr th {\n",
|
| 258 |
+
" vertical-align: top;\n",
|
| 259 |
+
" }\n",
|
| 260 |
+
"\n",
|
| 261 |
+
" .dataframe thead th {\n",
|
| 262 |
+
" text-align: right;\n",
|
| 263 |
+
" }\n",
|
| 264 |
+
"</style>\n",
|
| 265 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 266 |
+
" <thead>\n",
|
| 267 |
+
" <tr style=\"text-align: right;\">\n",
|
| 268 |
+
" <th></th>\n",
|
| 269 |
+
" <th>THICK</th>\n",
|
| 270 |
+
" <th>EAST</th>\n",
|
| 271 |
+
" <th>NORTH</th>\n",
|
| 272 |
+
" <th>vx</th>\n",
|
| 273 |
+
" <th>vy</th>\n",
|
| 274 |
+
" <th>v</th>\n",
|
| 275 |
+
" <th>ith_bm</th>\n",
|
| 276 |
+
" <th>smb</th>\n",
|
| 277 |
+
" <th>z</th>\n",
|
| 278 |
+
" <th>s</th>\n",
|
| 279 |
+
" <th>temp</th>\n",
|
| 280 |
+
" </tr>\n",
|
| 281 |
+
" </thead>\n",
|
| 282 |
+
" <tbody>\n",
|
| 283 |
+
" <tr>\n",
|
| 284 |
+
" <th>0</th>\n",
|
| 285 |
+
" <td>50.82</td>\n",
|
| 286 |
+
" <td>-2.498779e+06</td>\n",
|
| 287 |
+
" <td>1.417597e+06</td>\n",
|
| 288 |
+
" <td>-1133.923586</td>\n",
|
| 289 |
+
" <td>538.882191</td>\n",
|
| 290 |
+
" <td>1255.458766</td>\n",
|
| 291 |
+
" <td>85.334967</td>\n",
|
| 292 |
+
" <td>1607.108764</td>\n",
|
| 293 |
+
" <td>278.650876</td>\n",
|
| 294 |
+
" <td>0.049565</td>\n",
|
| 295 |
+
" <td>266.860876</td>\n",
|
| 296 |
+
" </tr>\n",
|
| 297 |
+
" <tr>\n",
|
| 298 |
+
" <th>1</th>\n",
|
| 299 |
+
" <td>53.63</td>\n",
|
| 300 |
+
" <td>-2.498462e+06</td>\n",
|
| 301 |
+
" <td>1.417028e+06</td>\n",
|
| 302 |
+
" <td>-1104.194542</td>\n",
|
| 303 |
+
" <td>518.784315</td>\n",
|
| 304 |
+
" <td>1219.992931</td>\n",
|
| 305 |
+
" <td>69.791700</td>\n",
|
| 306 |
+
" <td>1669.475359</td>\n",
|
| 307 |
+
" <td>305.152998</td>\n",
|
| 308 |
+
" <td>0.055753</td>\n",
|
| 309 |
+
" <td>266.809909</td>\n",
|
| 310 |
+
" </tr>\n",
|
| 311 |
+
" <tr>\n",
|
| 312 |
+
" <th>2</th>\n",
|
| 313 |
+
" <td>23.24</td>\n",
|
| 314 |
+
" <td>-2.497579e+06</td>\n",
|
| 315 |
+
" <td>1.415438e+06</td>\n",
|
| 316 |
+
" <td>-797.720551</td>\n",
|
| 317 |
+
" <td>252.445367</td>\n",
|
| 318 |
+
" <td>836.711863</td>\n",
|
| 319 |
+
" <td>44.747269</td>\n",
|
| 320 |
+
" <td>1834.636673</td>\n",
|
| 321 |
+
" <td>365.113957</td>\n",
|
| 322 |
+
" <td>0.020203</td>\n",
|
| 323 |
+
" <td>266.596713</td>\n",
|
| 324 |
+
" </tr>\n",
|
| 325 |
+
" <tr>\n",
|
| 326 |
+
" <th>3</th>\n",
|
| 327 |
+
" <td>21.80</td>\n",
|
| 328 |
+
" <td>-2.495786e+06</td>\n",
|
| 329 |
+
" <td>1.412343e+06</td>\n",
|
| 330 |
+
" <td>-65.286477</td>\n",
|
| 331 |
+
" <td>146.920962</td>\n",
|
| 332 |
+
" <td>160.773421</td>\n",
|
| 333 |
+
" <td>19.385211</td>\n",
|
| 334 |
+
" <td>2213.076648</td>\n",
|
| 335 |
+
" <td>499.961841</td>\n",
|
| 336 |
+
" <td>0.046409</td>\n",
|
| 337 |
+
" <td>265.990232</td>\n",
|
| 338 |
+
" </tr>\n",
|
| 339 |
+
" <tr>\n",
|
| 340 |
+
" <th>4</th>\n",
|
| 341 |
+
" <td>25.30</td>\n",
|
| 342 |
+
" <td>-2.495772e+06</td>\n",
|
| 343 |
+
" <td>1.412320e+06</td>\n",
|
| 344 |
+
" <td>-63.979106</td>\n",
|
| 345 |
+
" <td>147.167889</td>\n",
|
| 346 |
+
" <td>160.473404</td>\n",
|
| 347 |
+
" <td>19.363090</td>\n",
|
| 348 |
+
" <td>2215.949898</td>\n",
|
| 349 |
+
" <td>501.047693</td>\n",
|
| 350 |
+
" <td>0.045488</td>\n",
|
| 351 |
+
" <td>265.986040</td>\n",
|
| 352 |
+
" </tr>\n",
|
| 353 |
+
" <tr>\n",
|
| 354 |
+
" <th>...</th>\n",
|
| 355 |
+
" <td>...</td>\n",
|
| 356 |
+
" <td>...</td>\n",
|
| 357 |
+
" <td>...</td>\n",
|
| 358 |
+
" <td>...</td>\n",
|
| 359 |
+
" <td>...</td>\n",
|
| 360 |
+
" <td>...</td>\n",
|
| 361 |
+
" <td>...</td>\n",
|
| 362 |
+
" <td>...</td>\n",
|
| 363 |
+
" <td>...</td>\n",
|
| 364 |
+
" <td>...</td>\n",
|
| 365 |
+
" <td>...</td>\n",
|
| 366 |
+
" </tr>\n",
|
| 367 |
+
" <tr>\n",
|
| 368 |
+
" <th>2097147</th>\n",
|
| 369 |
+
" <td>2000.64</td>\n",
|
| 370 |
+
" <td>2.654522e+06</td>\n",
|
| 371 |
+
" <td>-4.884883e+05</td>\n",
|
| 372 |
+
" <td>0.449748</td>\n",
|
| 373 |
+
" <td>15.509423</td>\n",
|
| 374 |
+
" <td>15.515943</td>\n",
|
| 375 |
+
" <td>147.532692</td>\n",
|
| 376 |
+
" <td>765.603721</td>\n",
|
| 377 |
+
" <td>182.417924</td>\n",
|
| 378 |
+
" <td>0.040650</td>\n",
|
| 379 |
+
" <td>261.717844</td>\n",
|
| 380 |
+
" </tr>\n",
|
| 381 |
+
" <tr>\n",
|
| 382 |
+
" <th>2097148</th>\n",
|
| 383 |
+
" <td>1955.43</td>\n",
|
| 384 |
+
" <td>2.654789e+06</td>\n",
|
| 385 |
+
" <td>-4.886187e+05</td>\n",
|
| 386 |
+
" <td>0.618483</td>\n",
|
| 387 |
+
" <td>15.115018</td>\n",
|
| 388 |
+
" <td>15.127666</td>\n",
|
| 389 |
+
" <td>155.331007</td>\n",
|
| 390 |
+
" <td>763.821258</td>\n",
|
| 391 |
+
" <td>181.186072</td>\n",
|
| 392 |
+
" <td>0.040658</td>\n",
|
| 393 |
+
" <td>261.736247</td>\n",
|
| 394 |
+
" </tr>\n",
|
| 395 |
+
" <tr>\n",
|
| 396 |
+
" <th>2097149</th>\n",
|
| 397 |
+
" <td>1973.15</td>\n",
|
| 398 |
+
" <td>2.654843e+06</td>\n",
|
| 399 |
+
" <td>-4.886439e+05</td>\n",
|
| 400 |
+
" <td>0.630447</td>\n",
|
| 401 |
+
" <td>15.065385</td>\n",
|
| 402 |
+
" <td>15.078570</td>\n",
|
| 403 |
+
" <td>156.902040</td>\n",
|
| 404 |
+
" <td>763.393967</td>\n",
|
| 405 |
+
" <td>180.894478</td>\n",
|
| 406 |
+
" <td>0.040675</td>\n",
|
| 407 |
+
" <td>261.740045</td>\n",
|
| 408 |
+
" </tr>\n",
|
| 409 |
+
" <tr>\n",
|
| 410 |
+
" <th>2097150</th>\n",
|
| 411 |
+
" <td>1905.27</td>\n",
|
| 412 |
+
" <td>2.655007e+06</td>\n",
|
| 413 |
+
" <td>-4.887185e+05</td>\n",
|
| 414 |
+
" <td>0.433723</td>\n",
|
| 415 |
+
" <td>14.901482</td>\n",
|
| 416 |
+
" <td>14.907793</td>\n",
|
| 417 |
+
" <td>161.674849</td>\n",
|
| 418 |
+
" <td>762.025022</td>\n",
|
| 419 |
+
" <td>179.962857</td>\n",
|
| 420 |
+
" <td>0.040781</td>\n",
|
| 421 |
+
" <td>261.751564</td>\n",
|
| 422 |
+
" </tr>\n",
|
| 423 |
+
" <tr>\n",
|
| 424 |
+
" <th>2097151</th>\n",
|
| 425 |
+
" <td>1893.28</td>\n",
|
| 426 |
+
" <td>2.655062e+06</td>\n",
|
| 427 |
+
" <td>-4.887430e+05</td>\n",
|
| 428 |
+
" <td>0.326494</td>\n",
|
| 429 |
+
" <td>14.848336</td>\n",
|
| 430 |
+
" <td>14.851925</td>\n",
|
| 431 |
+
" <td>163.469380</td>\n",
|
| 432 |
+
" <td>761.494619</td>\n",
|
| 433 |
+
" <td>179.626606</td>\n",
|
| 434 |
+
" <td>0.040890</td>\n",
|
| 435 |
+
" <td>261.755432</td>\n",
|
| 436 |
+
" </tr>\n",
|
| 437 |
+
" </tbody>\n",
|
| 438 |
+
"</table>\n",
|
| 439 |
+
"<p>2083679 rows × 11 columns</p>\n",
|
| 440 |
+
"</div>"
|
| 441 |
+
],
|
| 442 |
+
"text/plain": [
|
| 443 |
+
" THICK EAST NORTH vx vy \\\n",
|
| 444 |
+
"0 50.82 -2.498779e+06 1.417597e+06 -1133.923586 538.882191 \n",
|
| 445 |
+
"1 53.63 -2.498462e+06 1.417028e+06 -1104.194542 518.784315 \n",
|
| 446 |
+
"2 23.24 -2.497579e+06 1.415438e+06 -797.720551 252.445367 \n",
|
| 447 |
+
"3 21.80 -2.495786e+06 1.412343e+06 -65.286477 146.920962 \n",
|
| 448 |
+
"4 25.30 -2.495772e+06 1.412320e+06 -63.979106 147.167889 \n",
|
| 449 |
+
"... ... ... ... ... ... \n",
|
| 450 |
+
"2097147 2000.64 2.654522e+06 -4.884883e+05 0.449748 15.509423 \n",
|
| 451 |
+
"2097148 1955.43 2.654789e+06 -4.886187e+05 0.618483 15.115018 \n",
|
| 452 |
+
"2097149 1973.15 2.654843e+06 -4.886439e+05 0.630447 15.065385 \n",
|
| 453 |
+
"2097150 1905.27 2.655007e+06 -4.887185e+05 0.433723 14.901482 \n",
|
| 454 |
+
"2097151 1893.28 2.655062e+06 -4.887430e+05 0.326494 14.848336 \n",
|
| 455 |
+
"\n",
|
| 456 |
+
" v ith_bm smb z s \\\n",
|
| 457 |
+
"0 1255.458766 85.334967 1607.108764 278.650876 0.049565 \n",
|
| 458 |
+
"1 1219.992931 69.791700 1669.475359 305.152998 0.055753 \n",
|
| 459 |
+
"2 836.711863 44.747269 1834.636673 365.113957 0.020203 \n",
|
| 460 |
+
"3 160.773421 19.385211 2213.076648 499.961841 0.046409 \n",
|
| 461 |
+
"4 160.473404 19.363090 2215.949898 501.047693 0.045488 \n",
|
| 462 |
+
"... ... ... ... ... ... \n",
|
| 463 |
+
"2097147 15.515943 147.532692 765.603721 182.417924 0.040650 \n",
|
| 464 |
+
"2097148 15.127666 155.331007 763.821258 181.186072 0.040658 \n",
|
| 465 |
+
"2097149 15.078570 156.902040 763.393967 180.894478 0.040675 \n",
|
| 466 |
+
"2097150 14.907793 161.674849 762.025022 179.962857 0.040781 \n",
|
| 467 |
+
"2097151 14.851925 163.469380 761.494619 179.626606 0.040890 \n",
|
| 468 |
+
"\n",
|
| 469 |
+
" temp \n",
|
| 470 |
+
"0 266.860876 \n",
|
| 471 |
+
"1 266.809909 \n",
|
| 472 |
+
"2 266.596713 \n",
|
| 473 |
+
"3 265.990232 \n",
|
| 474 |
+
"4 265.986040 \n",
|
| 475 |
+
"... ... \n",
|
| 476 |
+
"2097147 261.717844 \n",
|
| 477 |
+
"2097148 261.736247 \n",
|
| 478 |
+
"2097149 261.740045 \n",
|
| 479 |
+
"2097150 261.751564 \n",
|
| 480 |
+
"2097151 261.755432 \n",
|
| 481 |
+
"\n",
|
| 482 |
+
"[2083679 rows x 11 columns]"
|
| 483 |
+
]
|
| 484 |
+
},
|
| 485 |
+
"execution_count": 20,
|
| 486 |
+
"metadata": {},
|
| 487 |
+
"output_type": "execute_result"
|
| 488 |
+
}
|
| 489 |
+
],
|
| 490 |
+
"source": [
|
| 491 |
+
"result"
|
| 492 |
+
]
|
| 493 |
+
}
|
| 494 |
+
],
|
| 495 |
+
"metadata": {
|
| 496 |
+
"kernelspec": {
|
| 497 |
+
"display_name": "appml25",
|
| 498 |
+
"language": "python",
|
| 499 |
+
"name": "python3"
|
| 500 |
+
},
|
| 501 |
+
"language_info": {
|
| 502 |
+
"codemirror_mode": {
|
| 503 |
+
"name": "ipython",
|
| 504 |
+
"version": 3
|
| 505 |
+
},
|
| 506 |
+
"file_extension": ".py",
|
| 507 |
+
"mimetype": "text/x-python",
|
| 508 |
+
"name": "python",
|
| 509 |
+
"nbconvert_exporter": "python",
|
| 510 |
+
"pygments_lexer": "ipython3",
|
| 511 |
+
"version": "3.12.9"
|
| 512 |
+
}
|
| 513 |
+
},
|
| 514 |
+
"nbformat": 4,
|
| 515 |
+
"nbformat_minor": 5
|
| 516 |
+
}
|