Jensen-holm commited on
Commit
d0bdf19
·
1 Parent(s): ba98f59

went back and fixed an issue in pre processing, realized that I was not

Browse files
Files changed (3) hide show
  1. data/AllTeamsAgg.csv +2 -2
  2. src/nn.ipynb +446 -6
  3. src/pre_processing.ipynb +632 -661
data/AllTeamsAgg.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08b49367eed1f1591006101b89cf44461ef268128d5865e8df4a3f708f8342c7
3
- size 2913054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc143a27b120af9722911bfb7796137d1f7a22b2796870cefcb108f68e1f7e9f
3
+ size 21371547
src/nn.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 5,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -18,7 +18,7 @@
18
  },
19
  {
20
  "cell_type": "code",
21
- "execution_count": 7,
22
  "metadata": {},
23
  "outputs": [
24
  {
@@ -27,9 +27,9 @@
27
  "text": [
28
  "<class 'pandas.core.frame.DataFrame'>\n",
29
  "RangeIndex: 1335 entries, 0 to 1334\n",
30
- "Columns: 324 entries, Unnamed: 0 to LastD1Season tourney\n",
31
- "dtypes: float64(186), int64(131), object(7)\n",
32
- "memory usage: 3.3+ MB\n"
33
  ]
34
  }
35
  ],
@@ -41,7 +41,375 @@
41
  },
42
  {
43
  "cell_type": "code",
44
- "execution_count": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  "metadata": {},
46
  "outputs": [],
47
  "source": [
@@ -52,6 +420,78 @@
52
  "\n",
53
  "DEVICE = get_device() "
54
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  }
56
  ],
57
  "metadata": {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
18
  },
19
  {
20
  "cell_type": "code",
21
+ "execution_count": 2,
22
  "metadata": {},
23
  "outputs": [
24
  {
 
27
  "text": [
28
  "<class 'pandas.core.frame.DataFrame'>\n",
29
  "RangeIndex: 1335 entries, 0 to 1334\n",
30
+ "Columns: 319 entries, Unnamed: 0 to LastD1Season\n",
31
+ "dtypes: float64(186), int64(129), object(4)\n",
32
+ "memory usage: 3.2+ MB\n"
33
  ]
34
  }
35
  ],
 
41
  },
42
  {
43
  "cell_type": "code",
44
+ "execution_count": 3,
45
+ "metadata": {},
46
+ "outputs": [
47
+ {
48
+ "data": {
49
+ "text/html": [
50
+ "<div>\n",
51
+ "<style scoped>\n",
52
+ " .dataframe tbody tr th:only-of-type {\n",
53
+ " vertical-align: middle;\n",
54
+ " }\n",
55
+ "\n",
56
+ " .dataframe tbody tr th {\n",
57
+ " vertical-align: top;\n",
58
+ " }\n",
59
+ "\n",
60
+ " .dataframe thead th {\n",
61
+ " text-align: right;\n",
62
+ " }\n",
63
+ "</style>\n",
64
+ "<table border=\"1\" class=\"dataframe\">\n",
65
+ " <thead>\n",
66
+ " <tr style=\"text-align: right;\">\n",
67
+ " <th></th>\n",
68
+ " <th>Unnamed: 0</th>\n",
69
+ " <th>TeamID</th>\n",
70
+ " <th>Season</th>\n",
71
+ " <th>League</th>\n",
72
+ " <th>TeamScore min reg</th>\n",
73
+ " <th>TeamScore max reg</th>\n",
74
+ " <th>TeamScore std reg</th>\n",
75
+ " <th>TeamScore median reg</th>\n",
76
+ " <th>TeamScore mean reg</th>\n",
77
+ " <th>OppScore min reg</th>\n",
78
+ " <th>...</th>\n",
79
+ " <th>Win min tourney</th>\n",
80
+ " <th>Win max tourney</th>\n",
81
+ " <th>Win std tourney</th>\n",
82
+ " <th>Win median tourney</th>\n",
83
+ " <th>Win mean tourney</th>\n",
84
+ " <th>ConfAbbrev</th>\n",
85
+ " <th>Seed</th>\n",
86
+ " <th>TeamName</th>\n",
87
+ " <th>FirstD1Season</th>\n",
88
+ " <th>LastD1Season</th>\n",
89
+ " </tr>\n",
90
+ " </thead>\n",
91
+ " <tbody>\n",
92
+ " <tr>\n",
93
+ " <th>409</th>\n",
94
+ " <td>409</td>\n",
95
+ " <td>1233</td>\n",
96
+ " <td>2013</td>\n",
97
+ " <td>M</td>\n",
98
+ " <td>60</td>\n",
99
+ " <td>104</td>\n",
100
+ " <td>11.901903</td>\n",
101
+ " <td>81.0</td>\n",
102
+ " <td>80.696970</td>\n",
103
+ " <td>57</td>\n",
104
+ " <td>...</td>\n",
105
+ " <td>0</td>\n",
106
+ " <td>0</td>\n",
107
+ " <td>NaN</td>\n",
108
+ " <td>0.0</td>\n",
109
+ " <td>0.000000</td>\n",
110
+ " <td>maac</td>\n",
111
+ " <td>Z15</td>\n",
112
+ " <td>Iona</td>\n",
113
+ " <td>1985</td>\n",
114
+ " <td>2024</td>\n",
115
+ " </tr>\n",
116
+ " <tr>\n",
117
+ " <th>1189</th>\n",
118
+ " <td>1189</td>\n",
119
+ " <td>1436</td>\n",
120
+ " <td>2005</td>\n",
121
+ " <td>M</td>\n",
122
+ " <td>55</td>\n",
123
+ " <td>90</td>\n",
124
+ " <td>8.572611</td>\n",
125
+ " <td>73.5</td>\n",
126
+ " <td>72.600000</td>\n",
127
+ " <td>44</td>\n",
128
+ " <td>...</td>\n",
129
+ " <td>0</td>\n",
130
+ " <td>1</td>\n",
131
+ " <td>0.707107</td>\n",
132
+ " <td>0.5</td>\n",
133
+ " <td>0.500000</td>\n",
134
+ " <td>aec</td>\n",
135
+ " <td>Y13</td>\n",
136
+ " <td>Vermont</td>\n",
137
+ " <td>1985</td>\n",
138
+ " <td>2024</td>\n",
139
+ " </tr>\n",
140
+ " <tr>\n",
141
+ " <th>1275</th>\n",
142
+ " <td>1275</td>\n",
143
+ " <td>1455</td>\n",
144
+ " <td>2013</td>\n",
145
+ " <td>M</td>\n",
146
+ " <td>52</td>\n",
147
+ " <td>94</td>\n",
148
+ " <td>9.407254</td>\n",
149
+ " <td>69.0</td>\n",
150
+ " <td>69.441176</td>\n",
151
+ " <td>39</td>\n",
152
+ " <td>...</td>\n",
153
+ " <td>0</td>\n",
154
+ " <td>1</td>\n",
155
+ " <td>0.447214</td>\n",
156
+ " <td>1.0</td>\n",
157
+ " <td>0.800000</td>\n",
158
+ " <td>mvc</td>\n",
159
+ " <td>Z09</td>\n",
160
+ " <td>Wichita St</td>\n",
161
+ " <td>1985</td>\n",
162
+ " <td>2024</td>\n",
163
+ " </tr>\n",
164
+ " <tr>\n",
165
+ " <th>487</th>\n",
166
+ " <td>487</td>\n",
167
+ " <td>1246</td>\n",
168
+ " <td>2017</td>\n",
169
+ " <td>M</td>\n",
170
+ " <td>66</td>\n",
171
+ " <td>115</td>\n",
172
+ " <td>12.687243</td>\n",
173
+ " <td>87.0</td>\n",
174
+ " <td>85.941176</td>\n",
175
+ " <td>48</td>\n",
176
+ " <td>...</td>\n",
177
+ " <td>0</td>\n",
178
+ " <td>1</td>\n",
179
+ " <td>0.500000</td>\n",
180
+ " <td>1.0</td>\n",
181
+ " <td>0.750000</td>\n",
182
+ " <td>sec</td>\n",
183
+ " <td>Z02</td>\n",
184
+ " <td>Kentucky</td>\n",
185
+ " <td>1985</td>\n",
186
+ " <td>2024</td>\n",
187
+ " </tr>\n",
188
+ " <tr>\n",
189
+ " <th>1135</th>\n",
190
+ " <td>1135</td>\n",
191
+ " <td>1425</td>\n",
192
+ " <td>2008</td>\n",
193
+ " <td>M</td>\n",
194
+ " <td>46</td>\n",
195
+ " <td>95</td>\n",
196
+ " <td>12.625978</td>\n",
197
+ " <td>69.5</td>\n",
198
+ " <td>69.062500</td>\n",
199
+ " <td>44</td>\n",
200
+ " <td>...</td>\n",
201
+ " <td>0</td>\n",
202
+ " <td>0</td>\n",
203
+ " <td>NaN</td>\n",
204
+ " <td>0.0</td>\n",
205
+ " <td>0.000000</td>\n",
206
+ " <td>pac_ten</td>\n",
207
+ " <td>X06</td>\n",
208
+ " <td>USC</td>\n",
209
+ " <td>1985</td>\n",
210
+ " <td>2024</td>\n",
211
+ " </tr>\n",
212
+ " <tr>\n",
213
+ " <th>556</th>\n",
214
+ " <td>556</td>\n",
215
+ " <td>1268</td>\n",
216
+ " <td>2007</td>\n",
217
+ " <td>M</td>\n",
218
+ " <td>58</td>\n",
219
+ " <td>102</td>\n",
220
+ " <td>11.973088</td>\n",
221
+ " <td>79.5</td>\n",
222
+ " <td>79.500000</td>\n",
223
+ " <td>50</td>\n",
224
+ " <td>...</td>\n",
225
+ " <td>0</td>\n",
226
+ " <td>1</td>\n",
227
+ " <td>0.707107</td>\n",
228
+ " <td>0.5</td>\n",
229
+ " <td>0.500000</td>\n",
230
+ " <td>acc</td>\n",
231
+ " <td>Y04</td>\n",
232
+ " <td>Maryland</td>\n",
233
+ " <td>1985</td>\n",
234
+ " <td>2024</td>\n",
235
+ " </tr>\n",
236
+ " <tr>\n",
237
+ " <th>368</th>\n",
238
+ " <td>368</td>\n",
239
+ " <td>1216</td>\n",
240
+ " <td>2021</td>\n",
241
+ " <td>M</td>\n",
242
+ " <td>49</td>\n",
243
+ " <td>83</td>\n",
244
+ " <td>10.118859</td>\n",
245
+ " <td>66.0</td>\n",
246
+ " <td>65.869565</td>\n",
247
+ " <td>50</td>\n",
248
+ " <td>...</td>\n",
249
+ " <td>0</td>\n",
250
+ " <td>0</td>\n",
251
+ " <td>NaN</td>\n",
252
+ " <td>0.0</td>\n",
253
+ " <td>0.000000</td>\n",
254
+ " <td>aec</td>\n",
255
+ " <td>Z16</td>\n",
256
+ " <td>Hartford</td>\n",
257
+ " <td>1985</td>\n",
258
+ " <td>2023</td>\n",
259
+ " </tr>\n",
260
+ " <tr>\n",
261
+ " <th>1037</th>\n",
262
+ " <td>1037</td>\n",
263
+ " <td>1397</td>\n",
264
+ " <td>2023</td>\n",
265
+ " <td>M</td>\n",
266
+ " <td>46</td>\n",
267
+ " <td>94</td>\n",
268
+ " <td>11.188045</td>\n",
269
+ " <td>71.0</td>\n",
270
+ " <td>71.787879</td>\n",
271
+ " <td>40</td>\n",
272
+ " <td>...</td>\n",
273
+ " <td>0</td>\n",
274
+ " <td>1</td>\n",
275
+ " <td>0.577350</td>\n",
276
+ " <td>1.0</td>\n",
277
+ " <td>0.666667</td>\n",
278
+ " <td>sec</td>\n",
279
+ " <td>W04</td>\n",
280
+ " <td>Tennessee</td>\n",
281
+ " <td>1985</td>\n",
282
+ " <td>2024</td>\n",
283
+ " </tr>\n",
284
+ " <tr>\n",
285
+ " <th>891</th>\n",
286
+ " <td>891</td>\n",
287
+ " <td>1345</td>\n",
288
+ " <td>2023</td>\n",
289
+ " <td>M</td>\n",
290
+ " <td>54</td>\n",
291
+ " <td>89</td>\n",
292
+ " <td>9.013013</td>\n",
293
+ " <td>74.5</td>\n",
294
+ " <td>73.088235</td>\n",
295
+ " <td>39</td>\n",
296
+ " <td>...</td>\n",
297
+ " <td>0</td>\n",
298
+ " <td>0</td>\n",
299
+ " <td>NaN</td>\n",
300
+ " <td>0.0</td>\n",
301
+ " <td>0.000000</td>\n",
302
+ " <td>big_ten</td>\n",
303
+ " <td>W01</td>\n",
304
+ " <td>Purdue</td>\n",
305
+ " <td>1985</td>\n",
306
+ " <td>2024</td>\n",
307
+ " </tr>\n",
308
+ " <tr>\n",
309
+ " <th>559</th>\n",
310
+ " <td>559</td>\n",
311
+ " <td>1268</td>\n",
312
+ " <td>2015</td>\n",
313
+ " <td>M</td>\n",
314
+ " <td>55</td>\n",
315
+ " <td>95</td>\n",
316
+ " <td>9.364565</td>\n",
317
+ " <td>68.0</td>\n",
318
+ " <td>69.484848</td>\n",
319
+ " <td>48</td>\n",
320
+ " <td>...</td>\n",
321
+ " <td>0</td>\n",
322
+ " <td>1</td>\n",
323
+ " <td>0.707107</td>\n",
324
+ " <td>0.5</td>\n",
325
+ " <td>0.500000</td>\n",
326
+ " <td>big_ten</td>\n",
327
+ " <td>Y04</td>\n",
328
+ " <td>Maryland</td>\n",
329
+ " <td>1985</td>\n",
330
+ " <td>2024</td>\n",
331
+ " </tr>\n",
332
+ " </tbody>\n",
333
+ "</table>\n",
334
+ "<p>10 rows × 319 columns</p>\n",
335
+ "</div>"
336
+ ],
337
+ "text/plain": [
338
+ " Unnamed: 0 TeamID Season League TeamScore min reg TeamScore max reg \\\n",
339
+ "409 409 1233 2013 M 60 104 \n",
340
+ "1189 1189 1436 2005 M 55 90 \n",
341
+ "1275 1275 1455 2013 M 52 94 \n",
342
+ "487 487 1246 2017 M 66 115 \n",
343
+ "1135 1135 1425 2008 M 46 95 \n",
344
+ "556 556 1268 2007 M 58 102 \n",
345
+ "368 368 1216 2021 M 49 83 \n",
346
+ "1037 1037 1397 2023 M 46 94 \n",
347
+ "891 891 1345 2023 M 54 89 \n",
348
+ "559 559 1268 2015 M 55 95 \n",
349
+ "\n",
350
+ " TeamScore std reg TeamScore median reg TeamScore mean reg \\\n",
351
+ "409 11.901903 81.0 80.696970 \n",
352
+ "1189 8.572611 73.5 72.600000 \n",
353
+ "1275 9.407254 69.0 69.441176 \n",
354
+ "487 12.687243 87.0 85.941176 \n",
355
+ "1135 12.625978 69.5 69.062500 \n",
356
+ "556 11.973088 79.5 79.500000 \n",
357
+ "368 10.118859 66.0 65.869565 \n",
358
+ "1037 11.188045 71.0 71.787879 \n",
359
+ "891 9.013013 74.5 73.088235 \n",
360
+ "559 9.364565 68.0 69.484848 \n",
361
+ "\n",
362
+ " OppScore min reg ... Win min tourney Win max tourney \\\n",
363
+ "409 57 ... 0 0 \n",
364
+ "1189 44 ... 0 1 \n",
365
+ "1275 39 ... 0 1 \n",
366
+ "487 48 ... 0 1 \n",
367
+ "1135 44 ... 0 0 \n",
368
+ "556 50 ... 0 1 \n",
369
+ "368 50 ... 0 0 \n",
370
+ "1037 40 ... 0 1 \n",
371
+ "891 39 ... 0 0 \n",
372
+ "559 48 ... 0 1 \n",
373
+ "\n",
374
+ " Win std tourney Win median tourney Win mean tourney ConfAbbrev Seed \\\n",
375
+ "409 NaN 0.0 0.000000 maac Z15 \n",
376
+ "1189 0.707107 0.5 0.500000 aec Y13 \n",
377
+ "1275 0.447214 1.0 0.800000 mvc Z09 \n",
378
+ "487 0.500000 1.0 0.750000 sec Z02 \n",
379
+ "1135 NaN 0.0 0.000000 pac_ten X06 \n",
380
+ "556 0.707107 0.5 0.500000 acc Y04 \n",
381
+ "368 NaN 0.0 0.000000 aec Z16 \n",
382
+ "1037 0.577350 1.0 0.666667 sec W04 \n",
383
+ "891 NaN 0.0 0.000000 big_ten W01 \n",
384
+ "559 0.707107 0.5 0.500000 big_ten Y04 \n",
385
+ "\n",
386
+ " TeamName FirstD1Season LastD1Season \n",
387
+ "409 Iona 1985 2024 \n",
388
+ "1189 Vermont 1985 2024 \n",
389
+ "1275 Wichita St 1985 2024 \n",
390
+ "487 Kentucky 1985 2024 \n",
391
+ "1135 USC 1985 2024 \n",
392
+ "556 Maryland 1985 2024 \n",
393
+ "368 Hartford 1985 2023 \n",
394
+ "1037 Tennessee 1985 2024 \n",
395
+ "891 Purdue 1985 2024 \n",
396
+ "559 Maryland 1985 2024 \n",
397
+ "\n",
398
+ "[10 rows x 319 columns]"
399
+ ]
400
+ },
401
+ "execution_count": 3,
402
+ "metadata": {},
403
+ "output_type": "execute_result"
404
+ }
405
+ ],
406
+ "source": [
407
+ "all_teams_agg_df.sample(10, random_state=1)"
408
+ ]
409
+ },
410
+ {
411
+ "cell_type": "code",
412
+ "execution_count": 4,
413
  "metadata": {},
414
  "outputs": [],
415
  "source": [
 
420
  "\n",
421
  "DEVICE = get_device() "
422
  ]
423
+ },
424
+ {
425
+ "cell_type": "markdown",
426
+ "metadata": {},
427
+ "source": [
428
+ "## Split Mens & Womens data"
429
+ ]
430
+ },
431
+ {
432
+ "cell_type": "code",
433
+ "execution_count": 6,
434
+ "metadata": {},
435
+ "outputs": [],
436
+ "source": [
437
+ "mens_team_df = all_teams_agg_df[all_teams_agg_df[\"League\"] == \"M\"]\n",
438
+ "womens_team_df = all_teams_agg_df[all_teams_agg_df[\"League\"] == \"W\"]"
439
+ ]
440
+ },
441
+ {
442
+ "cell_type": "code",
443
+ "execution_count": null,
444
+ "metadata": {},
445
+ "outputs": [],
446
+ "source": [
447
+ "# define the features and target for our models\n",
448
+ "feature_cols = []\n",
449
+ "target_cols = []"
450
+ ]
451
+ },
452
+ {
453
+ "cell_type": "code",
454
+ "execution_count": null,
455
+ "metadata": {},
456
+ "outputs": [],
457
+ "source": [
458
+ "# split into training and testing datasets\n",
459
+ "MX_train, MX_test, My_train, My_test = train_test_split(\n",
460
+ " mens_team_df[feature_cols],\n",
461
+ " mens_team_df[target_cols],\n",
462
+ " test_size=0.2,\n",
463
+ " random_state=1,\n",
464
+ ")\n",
465
+ "\n",
466
+ "# same for womens\n",
467
+ "WX_train, WX_test, Wy_train, Wy_test = train_test_split(\n",
468
+ " womens_team_df[feature_cols],\n",
469
+ " womens_team_df[target_cols],\n",
470
+ " test_size=0.2,\n",
471
+ " random_state=1,\n",
472
+ ")"
473
+ ]
474
+ },
475
+ {
476
+ "cell_type": "code",
477
+ "execution_count": null,
478
+ "metadata": {},
479
+ "outputs": [],
480
+ "source": []
481
+ },
482
+ {
483
+ "cell_type": "markdown",
484
+ "metadata": {},
485
+ "source": [
486
+ "## Mens XGBoost Model"
487
+ ]
488
+ },
489
+ {
490
+ "cell_type": "code",
491
+ "execution_count": null,
492
+ "metadata": {},
493
+ "outputs": [],
494
+ "source": []
495
  }
496
  ],
497
  "metadata": {
src/pre_processing.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 2,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -15,7 +15,7 @@
15
  },
16
  {
17
  "cell_type": "code",
18
- "execution_count": 3,
19
  "metadata": {},
20
  "outputs": [
21
  {
@@ -206,7 +206,7 @@
206
  "[5 rows x 35 columns]"
207
  ]
208
  },
209
- "execution_count": 3,
210
  "metadata": {},
211
  "output_type": "execute_result"
212
  }
@@ -222,7 +222,7 @@
222
  },
223
  {
224
  "cell_type": "code",
225
- "execution_count": 4,
226
  "metadata": {},
227
  "outputs": [
228
  {
@@ -413,7 +413,7 @@
413
  "[5 rows x 35 columns]"
414
  ]
415
  },
416
- "execution_count": 4,
417
  "metadata": {},
418
  "output_type": "execute_result"
419
  }
@@ -436,7 +436,7 @@
436
  },
437
  {
438
  "cell_type": "code",
439
- "execution_count": 5,
440
  "metadata": {},
441
  "outputs": [],
442
  "source": [
@@ -466,7 +466,7 @@
466
  },
467
  {
468
  "cell_type": "code",
469
- "execution_count": 6,
470
  "metadata": {},
471
  "outputs": [
472
  {
@@ -540,7 +540,7 @@
540
  },
541
  {
542
  "cell_type": "code",
543
- "execution_count": 7,
544
  "metadata": {},
545
  "outputs": [
546
  {
@@ -613,7 +613,7 @@
613
  },
614
  {
615
  "cell_type": "code",
616
- "execution_count": 8,
617
  "metadata": {},
618
  "outputs": [],
619
  "source": [
@@ -630,7 +630,7 @@
630
  },
631
  {
632
  "cell_type": "code",
633
- "execution_count": 9,
634
  "metadata": {},
635
  "outputs": [],
636
  "source": [
@@ -654,7 +654,7 @@
654
  },
655
  {
656
  "cell_type": "code",
657
- "execution_count": 10,
658
  "metadata": {},
659
  "outputs": [],
660
  "source": [
@@ -680,7 +680,7 @@
680
  },
681
  {
682
  "cell_type": "code",
683
- "execution_count": 11,
684
  "metadata": {
685
  "tags": []
686
  },
@@ -1027,7 +1027,7 @@
1027
  "[10 rows x 158 columns]"
1028
  ]
1029
  },
1030
- "execution_count": 11,
1031
  "metadata": {},
1032
  "output_type": "execute_result"
1033
  }
@@ -1046,7 +1046,7 @@
1046
  },
1047
  {
1048
  "cell_type": "code",
1049
- "execution_count": 12,
1050
  "metadata": {},
1051
  "outputs": [
1052
  {
@@ -1391,7 +1391,7 @@
1391
  "[10 rows x 158 columns]"
1392
  ]
1393
  },
1394
- "execution_count": 12,
1395
  "metadata": {},
1396
  "output_type": "execute_result"
1397
  }
@@ -1419,7 +1419,7 @@
1419
  },
1420
  {
1421
  "cell_type": "code",
1422
- "execution_count": 13,
1423
  "metadata": {},
1424
  "outputs": [
1425
  {
@@ -1451,150 +1451,91 @@
1451
  " <th>TeamName</th>\n",
1452
  " <th>FirstD1Season</th>\n",
1453
  " <th>LastD1Season</th>\n",
 
1454
  " </tr>\n",
1455
  " </thead>\n",
1456
  " <tbody>\n",
1457
  " <tr>\n",
1458
- " <th>507</th>\n",
1459
- " <td>2016</td>\n",
1460
- " <td>1292</td>\n",
1461
- " <td>cusa</td>\n",
1462
- " <td>M</td>\n",
1463
- " <td>X15</td>\n",
1464
- " <td>MTSU</td>\n",
1465
- " <td>1985</td>\n",
1466
- " <td>2024</td>\n",
1467
- " </tr>\n",
1468
- " <tr>\n",
1469
- " <th>1174</th>\n",
1470
- " <td>1994</td>\n",
1471
- " <td>1438</td>\n",
1472
- " <td>acc</td>\n",
1473
- " <td>M</td>\n",
1474
- " <td>Z07</td>\n",
1475
- " <td>Virginia</td>\n",
1476
- " <td>1985</td>\n",
1477
- " <td>2024</td>\n",
1478
- " </tr>\n",
1479
- " <tr>\n",
1480
- " <th>1158</th>\n",
1481
- " <td>1998</td>\n",
1482
- " <td>1428</td>\n",
1483
- " <td>wac</td>\n",
1484
- " <td>M</td>\n",
1485
- " <td>X03</td>\n",
1486
- " <td>Utah</td>\n",
1487
  " <td>1985</td>\n",
1488
- " <td>2024</td>\n",
1489
- " </tr>\n",
1490
- " <tr>\n",
1491
- " <th>638</th>\n",
1492
- " <td>1988</td>\n",
1493
- " <td>1328</td>\n",
1494
- " <td>big_eight</td>\n",
1495
  " <td>M</td>\n",
1496
- " <td>Y01</td>\n",
1497
- " <td>Oklahoma</td>\n",
1498
  " <td>1985</td>\n",
1499
  " <td>2024</td>\n",
 
1500
  " </tr>\n",
1501
  " <tr>\n",
1502
- " <th>404</th>\n",
1503
- " <td>1999</td>\n",
1504
- " <td>1268</td>\n",
1505
- " <td>acc</td>\n",
1506
  " <td>M</td>\n",
1507
- " <td>Y02</td>\n",
1508
- " <td>Maryland</td>\n",
1509
  " <td>1985</td>\n",
1510
  " <td>2024</td>\n",
 
1511
  " </tr>\n",
1512
  " <tr>\n",
1513
- " <th>1797</th>\n",
1514
- " <td>2007</td>\n",
1515
- " <td>1308</td>\n",
1516
- " <td>wac</td>\n",
1517
  " <td>M</td>\n",
1518
- " <td>W13</td>\n",
1519
- " <td>New Mexico St</td>\n",
1520
  " <td>1985</td>\n",
1521
  " <td>2024</td>\n",
 
1522
  " </tr>\n",
1523
  " <tr>\n",
1524
- " <th>1471</th>\n",
1525
- " <td>1988</td>\n",
1526
- " <td>1185</td>\n",
1527
- " <td>mac</td>\n",
1528
  " <td>M</td>\n",
1529
- " <td>X15</td>\n",
1530
- " <td>E Michigan</td>\n",
1531
  " <td>1985</td>\n",
1532
  " <td>2024</td>\n",
 
1533
  " </tr>\n",
1534
  " <tr>\n",
1535
- " <th>1313</th>\n",
1536
- " <td>2021</td>\n",
1537
- " <td>1196</td>\n",
1538
  " <td>sec</td>\n",
1539
  " <td>M</td>\n",
1540
  " <td>Z07</td>\n",
1541
- " <td>Florida</td>\n",
1542
- " <td>1985</td>\n",
1543
- " <td>2024</td>\n",
1544
- " </tr>\n",
1545
- " <tr>\n",
1546
- " <th>881</th>\n",
1547
- " <td>1987</td>\n",
1548
- " <td>1424</td>\n",
1549
- " <td>pcaa</td>\n",
1550
- " <td>M</td>\n",
1551
- " <td>Z01</td>\n",
1552
- " <td>UNLV</td>\n",
1553
- " <td>1985</td>\n",
1554
- " <td>2024</td>\n",
1555
- " </tr>\n",
1556
- " <tr>\n",
1557
- " <th>1998</th>\n",
1558
- " <td>2006</td>\n",
1559
- " <td>1203</td>\n",
1560
- " <td>a_ten</td>\n",
1561
- " <td>M</td>\n",
1562
- " <td>W08</td>\n",
1563
- " <td>G Washington</td>\n",
1564
  " <td>1985</td>\n",
1565
  " <td>2024</td>\n",
 
1566
  " </tr>\n",
1567
  " </tbody>\n",
1568
  "</table>\n",
1569
  "</div>"
1570
  ],
1571
  "text/plain": [
1572
- " Season TeamID ConfAbbrev League Seed TeamName FirstD1Season \\\n",
1573
- "507 2016 1292 cusa M X15 MTSU 1985 \n",
1574
- "1174 1994 1438 acc M Z07 Virginia 1985 \n",
1575
- "1158 1998 1428 wac M X03 Utah 1985 \n",
1576
- "638 1988 1328 big_eight M Y01 Oklahoma 1985 \n",
1577
- "404 1999 1268 acc M Y02 Maryland 1985 \n",
1578
- "1797 2007 1308 wac M W13 New Mexico St 1985 \n",
1579
- "1471 1988 1185 mac M X15 E Michigan 1985 \n",
1580
- "1313 2021 1196 sec M Z07 Florida 1985 \n",
1581
- "881 1987 1424 pcaa M Z01 UNLV 1985 \n",
1582
- "1998 2006 1203 a_ten M W08 G Washington 1985 \n",
1583
  "\n",
1584
- " LastD1Season \n",
1585
- "507 2024 \n",
1586
- "1174 2024 \n",
1587
- "1158 2024 \n",
1588
- "638 2024 \n",
1589
- "404 2024 \n",
1590
- "1797 2024 \n",
1591
- "1471 2024 \n",
1592
- "1313 2024 \n",
1593
- "881 2024 \n",
1594
- "1998 2024 "
1595
  ]
1596
  },
1597
- "execution_count": 13,
1598
  "metadata": {},
1599
  "output_type": "execute_result"
1600
  }
@@ -1616,29 +1557,17 @@
1616
  " .merge(right=pd.read_csv(os.path.join(DATA_DIR, \"MTeams.csv\")), on=\"TeamID\")\n",
1617
  ")\n",
1618
  "\n",
1619
- "team_conf_seeds_df.sample(10, random_state=10)"
1620
- ]
1621
- },
1622
- {
1623
- "cell_type": "code",
1624
- "execution_count": 14,
1625
- "metadata": {},
1626
- "outputs": [],
1627
- "source": [
1628
- "# merge the tournament aggregated metrics with the regular season aggregated metrics\n",
1629
- "team_agg_df = (\n",
1630
- " pd.merge(\n",
1631
- " left=team_reg_agg, \n",
1632
- " right=team_tourney_agg, \n",
1633
- " on=[\"TeamID\", \"Season\", \"League\"], \n",
1634
- " suffixes=(\" reg\", \" tourney\"),\n",
1635
- " )\n",
1636
- ")"
1637
  ]
1638
  },
1639
  {
1640
  "cell_type": "code",
1641
- "execution_count": 15,
1642
  "metadata": {},
1643
  "outputs": [
1644
  {
@@ -1673,351 +1602,363 @@
1673
  " <th>OppScore min reg</th>\n",
1674
  " <th>OppScore max reg</th>\n",
1675
  " <th>...</th>\n",
 
 
 
 
 
1676
  " <th>Win min tourney</th>\n",
1677
  " <th>Win max tourney</th>\n",
1678
  " <th>Win std tourney</th>\n",
1679
  " <th>Win median tourney</th>\n",
1680
  " <th>Win mean tourney</th>\n",
1681
- " <th>ConfAbbrev</th>\n",
1682
- " <th>Seed</th>\n",
1683
- " <th>TeamName</th>\n",
1684
- " <th>FirstD1Season</th>\n",
1685
- " <th>LastD1Season</th>\n",
1686
  " </tr>\n",
1687
  " </thead>\n",
1688
  " <tbody>\n",
1689
  " <tr>\n",
1690
- " <th>1202</th>\n",
1691
- " <td>1437</td>\n",
1692
- " <td>2011</td>\n",
1693
- " <td>M</td>\n",
1694
- " <td>50</td>\n",
1695
- " <td>88</td>\n",
1696
- " <td>9.450156</td>\n",
1697
- " <td>73.0</td>\n",
1698
- " <td>72.718750</td>\n",
1699
  " <td>36</td>\n",
1700
- " <td>93</td>\n",
1701
  " <td>...</td>\n",
1702
- " <td>0</td>\n",
1703
- " <td>0</td>\n",
1704
  " <td>NaN</td>\n",
1705
- " <td>0.0</td>\n",
1706
- " <td>0.000000</td>\n",
1707
- " <td>big_east</td>\n",
1708
- " <td>W09</td>\n",
1709
- " <td>Villanova</td>\n",
1710
- " <td>1985</td>\n",
1711
- " <td>2024</td>\n",
 
 
1712
  " </tr>\n",
1713
  " <tr>\n",
1714
- " <th>109</th>\n",
1715
- " <td>1137</td>\n",
1716
- " <td>2013</td>\n",
1717
  " <td>M</td>\n",
1718
- " <td>56</td>\n",
1719
  " <td>88</td>\n",
1720
- " <td>7.121073</td>\n",
1721
- " <td>66.0</td>\n",
1722
- " <td>67.250000</td>\n",
1723
- " <td>42</td>\n",
1724
- " <td>79</td>\n",
1725
  " <td>...</td>\n",
1726
- " <td>0</td>\n",
1727
- " <td>0</td>\n",
1728
  " <td>NaN</td>\n",
1729
- " <td>0.0</td>\n",
1730
- " <td>0.000000</td>\n",
1731
- " <td>patriot</td>\n",
1732
- " <td>W11</td>\n",
1733
- " <td>Bucknell</td>\n",
1734
- " <td>1985</td>\n",
1735
- " <td>2024</td>\n",
 
 
1736
  " </tr>\n",
1737
  " <tr>\n",
1738
- " <th>1105</th>\n",
1739
- " <td>1417</td>\n",
1740
- " <td>2015</td>\n",
1741
  " <td>M</td>\n",
1742
- " <td>39</td>\n",
1743
- " <td>113</td>\n",
1744
- " <td>16.594427</td>\n",
1745
  " <td>72.0</td>\n",
1746
- " <td>72.000000</td>\n",
1747
- " <td>45</td>\n",
1748
- " <td>87</td>\n",
1749
  " <td>...</td>\n",
1750
- " <td>0</td>\n",
1751
- " <td>1</td>\n",
1752
- " <td>0.577350</td>\n",
1753
- " <td>1.0</td>\n",
1754
- " <td>0.666667</td>\n",
1755
- " <td>pac_twelve</td>\n",
1756
- " <td>X11</td>\n",
1757
- " <td>UCLA</td>\n",
1758
- " <td>1985</td>\n",
1759
- " <td>2024</td>\n",
1760
  " </tr>\n",
1761
  " <tr>\n",
1762
- " <th>700</th>\n",
1763
- " <td>1305</td>\n",
1764
  " <td>2005</td>\n",
1765
  " <td>M</td>\n",
1766
- " <td>52</td>\n",
1767
- " <td>84</td>\n",
1768
- " <td>8.720199</td>\n",
1769
- " <td>72.0</td>\n",
1770
- " <td>69.551724</td>\n",
1771
- " <td>46</td>\n",
1772
- " <td>85</td>\n",
1773
  " <td>...</td>\n",
1774
- " <td>0</td>\n",
1775
- " <td>1</td>\n",
1776
- " <td>0.707107</td>\n",
1777
- " <td>0.5</td>\n",
1778
- " <td>0.500000</td>\n",
1779
- " <td>wac</td>\n",
1780
- " <td>X09</td>\n",
1781
- " <td>Nevada</td>\n",
1782
- " <td>1985</td>\n",
1783
- " <td>2024</td>\n",
1784
  " </tr>\n",
1785
  " <tr>\n",
1786
- " <th>834</th>\n",
1787
- " <td>1332</td>\n",
1788
- " <td>2019</td>\n",
1789
  " <td>M</td>\n",
1790
- " <td>47</td>\n",
1791
- " <td>84</td>\n",
1792
- " <td>11.186577</td>\n",
1793
- " <td>72.0</td>\n",
1794
- " <td>70.485714</td>\n",
1795
- " <td>46</td>\n",
1796
- " <td>90</td>\n",
1797
  " <td>...</td>\n",
1798
- " <td>0</td>\n",
1799
- " <td>1</td>\n",
1800
- " <td>0.577350</td>\n",
 
 
 
1801
  " <td>1.0</td>\n",
1802
- " <td>0.666667</td>\n",
1803
- " <td>pac_twelve</td>\n",
1804
- " <td>Z12</td>\n",
1805
- " <td>Oregon</td>\n",
1806
- " <td>1985</td>\n",
1807
- " <td>2024</td>\n",
1808
  " </tr>\n",
1809
  " <tr>\n",
1810
- " <th>1009</th>\n",
1811
- " <td>1393</td>\n",
1812
- " <td>2018</td>\n",
1813
  " <td>M</td>\n",
1814
  " <td>44</td>\n",
1815
- " <td>90</td>\n",
1816
- " <td>11.519253</td>\n",
1817
- " <td>70.0</td>\n",
1818
- " <td>67.545455</td>\n",
1819
  " <td>45</td>\n",
1820
- " <td>101</td>\n",
1821
  " <td>...</td>\n",
1822
- " <td>0</td>\n",
1823
- " <td>1</td>\n",
1824
- " <td>0.500000</td>\n",
1825
- " <td>1.0</td>\n",
1826
- " <td>0.750000</td>\n",
1827
- " <td>acc</td>\n",
1828
- " <td>X11b</td>\n",
1829
- " <td>Syracuse</td>\n",
1830
- " <td>1985</td>\n",
1831
- " <td>2024</td>\n",
1832
  " </tr>\n",
1833
  " <tr>\n",
1834
- " <th>953</th>\n",
1835
- " <td>1373</td>\n",
1836
- " <td>2010</td>\n",
1837
  " <td>M</td>\n",
1838
- " <td>53</td>\n",
1839
- " <td>99</td>\n",
1840
- " <td>10.164555</td>\n",
1841
  " <td>76.0</td>\n",
1842
- " <td>75.454545</td>\n",
1843
- " <td>51</td>\n",
1844
- " <td>87</td>\n",
1845
  " <td>...</td>\n",
1846
- " <td>0</td>\n",
1847
- " <td>0</td>\n",
1848
  " <td>NaN</td>\n",
1849
- " <td>0.0</td>\n",
1850
- " <td>0.000000</td>\n",
1851
- " <td>maac</td>\n",
1852
- " <td>X13</td>\n",
1853
- " <td>Siena</td>\n",
1854
- " <td>1985</td>\n",
1855
- " <td>2024</td>\n",
 
 
1856
  " </tr>\n",
1857
  " <tr>\n",
1858
- " <th>1141</th>\n",
1859
- " <td>1425</td>\n",
1860
- " <td>2022</td>\n",
1861
- " <td>M</td>\n",
1862
- " <td>58</td>\n",
1863
- " <td>98</td>\n",
1864
- " <td>10.506041</td>\n",
1865
- " <td>70.0</td>\n",
1866
- " <td>72.575758</td>\n",
1867
- " <td>43</td>\n",
1868
- " <td>91</td>\n",
1869
  " <td>...</td>\n",
1870
- " <td>0</td>\n",
1871
- " <td>0</td>\n",
1872
  " <td>NaN</td>\n",
1873
- " <td>0.0</td>\n",
1874
- " <td>0.000000</td>\n",
1875
- " <td>pac_twelve</td>\n",
1876
- " <td>Y07</td>\n",
1877
- " <td>USC</td>\n",
1878
- " <td>1985</td>\n",
1879
- " <td>2024</td>\n",
 
 
1880
  " </tr>\n",
1881
  " <tr>\n",
1882
- " <th>419</th>\n",
1883
- " <td>1234</td>\n",
1884
- " <td>2015</td>\n",
1885
- " <td>M</td>\n",
1886
- " <td>44</td>\n",
1887
- " <td>90</td>\n",
1888
- " <td>10.216556</td>\n",
1889
- " <td>70.5</td>\n",
1890
- " <td>69.406250</td>\n",
1891
- " <td>44</td>\n",
1892
- " <td>90</td>\n",
1893
  " <td>...</td>\n",
1894
- " <td>0</td>\n",
1895
- " <td>1</td>\n",
1896
- " <td>0.707107</td>\n",
1897
- " <td>0.5</td>\n",
1898
- " <td>0.500000</td>\n",
1899
- " <td>big_ten</td>\n",
1900
- " <td>X07</td>\n",
1901
- " <td>Iowa</td>\n",
1902
- " <td>1985</td>\n",
1903
- " <td>2024</td>\n",
1904
  " </tr>\n",
1905
  " <tr>\n",
1906
- " <th>973</th>\n",
1907
- " <td>1386</td>\n",
1908
- " <td>2008</td>\n",
1909
- " <td>M</td>\n",
1910
- " <td>55</td>\n",
1911
- " <td>98</td>\n",
1912
- " <td>10.801234</td>\n",
1913
- " <td>72.0</td>\n",
1914
- " <td>73.333333</td>\n",
1915
- " <td>42</td>\n",
1916
- " <td>102</td>\n",
1917
  " <td>...</td>\n",
1918
- " <td>0</td>\n",
1919
- " <td>0</td>\n",
1920
- " <td>NaN</td>\n",
 
 
1921
  " <td>0.0</td>\n",
1922
- " <td>0.000000</td>\n",
1923
- " <td>a_ten</td>\n",
1924
- " <td>W11</td>\n",
1925
- " <td>St Joseph's PA</td>\n",
1926
- " <td>1985</td>\n",
1927
- " <td>2024</td>\n",
1928
  " </tr>\n",
1929
  " </tbody>\n",
1930
  "</table>\n",
1931
- "<p>10 rows × 318 columns</p>\n",
1932
  "</div>"
1933
  ],
1934
  "text/plain": [
1935
- " TeamID Season League TeamScore min reg TeamScore max reg \\\n",
1936
- "1202 1437 2011 M 50 88 \n",
1937
- "109 1137 2013 M 56 88 \n",
1938
- "1105 1417 2015 M 39 113 \n",
1939
- "700 1305 2005 M 52 84 \n",
1940
- "834 1332 2019 M 47 84 \n",
1941
- "1009 1393 2018 M 44 90 \n",
1942
- "953 1373 2010 M 53 99 \n",
1943
- "1141 1425 2022 M 58 98 \n",
1944
- "419 1234 2015 M 44 90 \n",
1945
- "973 1386 2008 M 55 98 \n",
1946
  "\n",
1947
- " TeamScore std reg TeamScore median reg TeamScore mean reg \\\n",
1948
- "1202 9.450156 73.0 72.718750 \n",
1949
- "109 7.121073 66.0 67.250000 \n",
1950
- "1105 16.594427 72.0 72.000000 \n",
1951
- "700 8.720199 72.0 69.551724 \n",
1952
- "834 11.186577 72.0 70.485714 \n",
1953
- "1009 11.519253 70.0 67.545455 \n",
1954
- "953 10.164555 76.0 75.454545 \n",
1955
- "1141 10.506041 70.0 72.575758 \n",
1956
- "419 10.216556 70.5 69.406250 \n",
1957
- "973 10.801234 72.0 73.333333 \n",
1958
  "\n",
1959
- " OppScore min reg OppScore max reg ... Win min tourney \\\n",
1960
- "1202 36 93 ... 0 \n",
1961
- "109 42 79 ... 0 \n",
1962
- "1105 45 87 ... 0 \n",
1963
- "700 46 85 ... 0 \n",
1964
- "834 46 90 ... 0 \n",
1965
- "1009 45 101 ... 0 \n",
1966
- "953 51 87 ... 0 \n",
1967
- "1141 43 91 ... 0 \n",
1968
- "419 44 90 ... 0 \n",
1969
- "973 42 102 ... 0 \n",
1970
  "\n",
1971
- " Win max tourney Win std tourney Win median tourney Win mean tourney \\\n",
1972
- "1202 0 NaN 0.0 0.000000 \n",
1973
- "109 0 NaN 0.0 0.000000 \n",
1974
- "1105 1 0.577350 1.0 0.666667 \n",
1975
- "700 1 0.707107 0.5 0.500000 \n",
1976
- "834 1 0.577350 1.0 0.666667 \n",
1977
- "1009 1 0.500000 1.0 0.750000 \n",
1978
- "953 0 NaN 0.0 0.000000 \n",
1979
- "1141 0 NaN 0.0 0.000000 \n",
1980
- "419 1 0.707107 0.5 0.500000 \n",
1981
- "973 0 NaN 0.0 0.000000 \n",
1982
  "\n",
1983
- " ConfAbbrev Seed TeamName FirstD1Season LastD1Season \n",
1984
- "1202 big_east W09 Villanova 1985 2024 \n",
1985
- "109 patriot W11 Bucknell 1985 2024 \n",
1986
- "1105 pac_twelve X11 UCLA 1985 2024 \n",
1987
- "700 wac X09 Nevada 1985 2024 \n",
1988
- "834 pac_twelve Z12 Oregon 1985 2024 \n",
1989
- "1009 acc X11b Syracuse 1985 2024 \n",
1990
- "953 maac X13 Siena 1985 2024 \n",
1991
- "1141 pac_twelve Y07 USC 1985 2024 \n",
1992
- "419 big_ten X07 Iowa 1985 2024 \n",
1993
- "973 a_ten W11 St Joseph's PA 1985 2024 \n",
1994
  "\n",
1995
- "[10 rows x 318 columns]"
 
 
 
 
 
 
 
 
 
 
 
 
1996
  ]
1997
  },
1998
- "execution_count": 15,
1999
  "metadata": {},
2000
  "output_type": "execute_result"
2001
  }
2002
  ],
2003
  "source": [
2004
- "# merge the team_conf_seeds_df with team attributes into the aggregated data\n",
2005
- "\n",
2006
- "# team_reg_agg_df = team_reg_agg.merge(right=team_conf_seeds_df, on=[\"TeamID\", \"Season\", \"League\"])\n",
2007
- "# team_tourney_agg_df = team_tourney_agg.merge(right=team_conf_seeds_df, on=[\"TeamID\", \"Season\", \"League\"])\n",
2008
- "\n",
2009
- "team_agg_df = pd.merge(\n",
2010
- " left=team_agg_df,\n",
2011
- " right=team_conf_seeds_df,\n",
2012
- " on=[\"TeamID\", \"Season\", \"League\"],\n",
2013
  ")\n",
2014
  "\n",
2015
- "team_agg_df.sample(10, random_state=10)"
2016
  ]
2017
  },
2018
  {
2019
  "cell_type": "code",
2020
- "execution_count": 16,
2021
  "metadata": {},
2022
  "outputs": [
2023
  {
@@ -2052,7 +1993,6 @@
2052
  " <th>OppScore min reg</th>\n",
2053
  " <th>OppScore max reg</th>\n",
2054
  " <th>...</th>\n",
2055
- " <th>Win min tourney</th>\n",
2056
  " <th>Win max tourney</th>\n",
2057
  " <th>Win std tourney</th>\n",
2058
  " <th>Win median tourney</th>\n",
@@ -2062,330 +2002,361 @@
2062
  " <th>TeamName</th>\n",
2063
  " <th>FirstD1Season</th>\n",
2064
  " <th>LastD1Season</th>\n",
 
2065
  " </tr>\n",
2066
  " </thead>\n",
2067
  " <tbody>\n",
2068
  " <tr>\n",
2069
- " <th>409</th>\n",
2070
- " <td>1233</td>\n",
2071
- " <td>2013</td>\n",
2072
- " <td>M</td>\n",
2073
- " <td>60</td>\n",
2074
- " <td>104</td>\n",
2075
- " <td>11.901903</td>\n",
2076
- " <td>81.0</td>\n",
2077
- " <td>80.696970</td>\n",
2078
- " <td>57</td>\n",
2079
- " <td>105</td>\n",
2080
  " <td>...</td>\n",
2081
- " <td>0</td>\n",
2082
- " <td>0</td>\n",
2083
  " <td>NaN</td>\n",
2084
- " <td>0.0</td>\n",
2085
- " <td>0.000000</td>\n",
2086
- " <td>maac</td>\n",
2087
- " <td>Z15</td>\n",
2088
- " <td>Iona</td>\n",
2089
- " <td>1985</td>\n",
2090
- " <td>2024</td>\n",
 
 
2091
  " </tr>\n",
2092
  " <tr>\n",
2093
- " <th>1189</th>\n",
2094
- " <td>1436</td>\n",
2095
- " <td>2005</td>\n",
2096
  " <td>M</td>\n",
2097
- " <td>55</td>\n",
2098
- " <td>90</td>\n",
2099
- " <td>8.572611</td>\n",
2100
- " <td>73.5</td>\n",
2101
- " <td>72.600000</td>\n",
2102
  " <td>44</td>\n",
2103
- " <td>93</td>\n",
2104
  " <td>...</td>\n",
2105
- " <td>0</td>\n",
2106
- " <td>1</td>\n",
2107
- " <td>0.707107</td>\n",
2108
- " <td>0.5</td>\n",
2109
- " <td>0.500000</td>\n",
2110
- " <td>aec</td>\n",
2111
- " <td>Y13</td>\n",
2112
- " <td>Vermont</td>\n",
2113
- " <td>1985</td>\n",
2114
- " <td>2024</td>\n",
2115
  " </tr>\n",
2116
  " <tr>\n",
2117
- " <th>1275</th>\n",
2118
- " <td>1455</td>\n",
2119
- " <td>2013</td>\n",
2120
  " <td>M</td>\n",
2121
- " <td>52</td>\n",
2122
- " <td>94</td>\n",
2123
- " <td>9.407254</td>\n",
2124
- " <td>69.0</td>\n",
2125
- " <td>69.441176</td>\n",
2126
- " <td>39</td>\n",
2127
- " <td>91</td>\n",
2128
  " <td>...</td>\n",
2129
- " <td>0</td>\n",
2130
- " <td>1</td>\n",
2131
- " <td>0.447214</td>\n",
2132
- " <td>1.0</td>\n",
2133
- " <td>0.800000</td>\n",
2134
- " <td>mvc</td>\n",
2135
- " <td>Z09</td>\n",
2136
- " <td>Wichita St</td>\n",
2137
- " <td>1985</td>\n",
2138
- " <td>2024</td>\n",
2139
  " </tr>\n",
2140
  " <tr>\n",
2141
- " <th>487</th>\n",
2142
- " <td>1246</td>\n",
2143
- " <td>2017</td>\n",
2144
  " <td>M</td>\n",
2145
- " <td>66</td>\n",
2146
- " <td>115</td>\n",
2147
- " <td>12.687243</td>\n",
2148
- " <td>87.0</td>\n",
2149
- " <td>85.941176</td>\n",
2150
- " <td>48</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2151
  " <td>100</td>\n",
 
 
 
 
 
2152
  " <td>...</td>\n",
2153
- " <td>0</td>\n",
2154
- " <td>1</td>\n",
2155
- " <td>0.500000</td>\n",
2156
  " <td>1.0</td>\n",
2157
- " <td>0.750000</td>\n",
2158
- " <td>sec</td>\n",
2159
- " <td>Z02</td>\n",
2160
- " <td>Kentucky</td>\n",
2161
- " <td>1985</td>\n",
2162
- " <td>2024</td>\n",
 
 
 
2163
  " </tr>\n",
2164
  " <tr>\n",
2165
- " <th>1135</th>\n",
2166
- " <td>1425</td>\n",
2167
- " <td>2008</td>\n",
2168
  " <td>M</td>\n",
2169
- " <td>46</td>\n",
2170
- " <td>95</td>\n",
2171
- " <td>12.625978</td>\n",
2172
- " <td>69.5</td>\n",
2173
- " <td>69.062500</td>\n",
2174
  " <td>44</td>\n",
2175
- " <td>96</td>\n",
 
 
 
 
 
2176
  " <td>...</td>\n",
2177
- " <td>0</td>\n",
2178
- " <td>0</td>\n",
2179
  " <td>NaN</td>\n",
2180
- " <td>0.0</td>\n",
2181
- " <td>0.000000</td>\n",
2182
- " <td>pac_ten</td>\n",
2183
- " <td>X06</td>\n",
2184
- " <td>USC</td>\n",
2185
- " <td>1985</td>\n",
2186
- " <td>2024</td>\n",
 
 
2187
  " </tr>\n",
2188
  " <tr>\n",
2189
- " <th>556</th>\n",
2190
- " <td>1268</td>\n",
2191
- " <td>2007</td>\n",
2192
  " <td>M</td>\n",
2193
- " <td>58</td>\n",
2194
- " <td>102</td>\n",
2195
- " <td>11.973088</td>\n",
2196
- " <td>79.5</td>\n",
2197
- " <td>79.500000</td>\n",
2198
- " <td>50</td>\n",
2199
- " <td>103</td>\n",
2200
  " <td>...</td>\n",
2201
- " <td>0</td>\n",
2202
- " <td>1</td>\n",
2203
- " <td>0.707107</td>\n",
2204
- " <td>0.5</td>\n",
2205
- " <td>0.500000</td>\n",
2206
- " <td>acc</td>\n",
2207
- " <td>Y04</td>\n",
2208
- " <td>Maryland</td>\n",
2209
- " <td>1985</td>\n",
2210
- " <td>2024</td>\n",
2211
  " </tr>\n",
2212
  " <tr>\n",
2213
- " <th>368</th>\n",
2214
- " <td>1216</td>\n",
2215
  " <td>2021</td>\n",
2216
- " <td>M</td>\n",
2217
- " <td>49</td>\n",
2218
- " <td>83</td>\n",
2219
- " <td>10.118859</td>\n",
2220
- " <td>66.0</td>\n",
2221
- " <td>65.869565</td>\n",
2222
- " <td>50</td>\n",
2223
- " <td>87</td>\n",
2224
  " <td>...</td>\n",
2225
- " <td>0</td>\n",
2226
- " <td>0</td>\n",
2227
  " <td>NaN</td>\n",
2228
- " <td>0.0</td>\n",
2229
- " <td>0.000000</td>\n",
2230
- " <td>aec</td>\n",
2231
- " <td>Z16</td>\n",
2232
- " <td>Hartford</td>\n",
2233
- " <td>1985</td>\n",
2234
- " <td>2023</td>\n",
 
 
2235
  " </tr>\n",
2236
  " <tr>\n",
2237
- " <th>1037</th>\n",
2238
- " <td>1397</td>\n",
2239
- " <td>2023</td>\n",
2240
- " <td>M</td>\n",
2241
- " <td>46</td>\n",
2242
- " <td>94</td>\n",
2243
- " <td>11.188045</td>\n",
2244
- " <td>71.0</td>\n",
2245
- " <td>71.787879</td>\n",
2246
- " <td>40</td>\n",
2247
- " <td>86</td>\n",
2248
  " <td>...</td>\n",
2249
- " <td>0</td>\n",
2250
- " <td>1</td>\n",
2251
- " <td>0.577350</td>\n",
2252
- " <td>1.0</td>\n",
2253
- " <td>0.666667</td>\n",
2254
- " <td>sec</td>\n",
2255
- " <td>W04</td>\n",
2256
- " <td>Tennessee</td>\n",
2257
- " <td>1985</td>\n",
2258
- " <td>2024</td>\n",
2259
  " </tr>\n",
2260
  " <tr>\n",
2261
- " <th>891</th>\n",
2262
- " <td>1345</td>\n",
2263
- " <td>2023</td>\n",
2264
- " <td>M</td>\n",
2265
- " <td>54</td>\n",
2266
- " <td>89</td>\n",
2267
- " <td>9.013013</td>\n",
2268
- " <td>74.5</td>\n",
2269
- " <td>73.088235</td>\n",
2270
  " <td>39</td>\n",
 
 
 
 
 
2271
  " <td>79</td>\n",
2272
  " <td>...</td>\n",
2273
- " <td>0</td>\n",
2274
- " <td>0</td>\n",
2275
- " <td>NaN</td>\n",
2276
- " <td>0.0</td>\n",
2277
- " <td>0.000000</td>\n",
2278
- " <td>big_ten</td>\n",
2279
- " <td>W01</td>\n",
2280
- " <td>Purdue</td>\n",
2281
- " <td>1985</td>\n",
2282
- " <td>2024</td>\n",
2283
- " </tr>\n",
2284
- " <tr>\n",
2285
- " <th>559</th>\n",
2286
- " <td>1268</td>\n",
2287
- " <td>2015</td>\n",
2288
- " <td>M</td>\n",
2289
- " <td>55</td>\n",
2290
- " <td>95</td>\n",
2291
- " <td>9.364565</td>\n",
2292
- " <td>68.0</td>\n",
2293
- " <td>69.484848</td>\n",
2294
- " <td>48</td>\n",
2295
- " <td>89</td>\n",
2296
- " <td>...</td>\n",
2297
- " <td>0</td>\n",
2298
- " <td>1</td>\n",
2299
  " <td>0.707107</td>\n",
2300
  " <td>0.5</td>\n",
2301
- " <td>0.500000</td>\n",
2302
- " <td>big_ten</td>\n",
2303
- " <td>Y04</td>\n",
2304
- " <td>Maryland</td>\n",
2305
- " <td>1985</td>\n",
2306
- " <td>2024</td>\n",
 
2307
  " </tr>\n",
2308
  " </tbody>\n",
2309
  "</table>\n",
2310
- "<p>10 rows × 318 columns</p>\n",
2311
  "</div>"
2312
  ],
2313
  "text/plain": [
2314
- " TeamID Season League TeamScore min reg TeamScore max reg \\\n",
2315
- "409 1233 2013 M 60 104 \n",
2316
- "1189 1436 2005 M 55 90 \n",
2317
- "1275 1455 2013 M 52 94 \n",
2318
- "487 1246 2017 M 66 115 \n",
2319
- "1135 1425 2008 M 46 95 \n",
2320
- "556 1268 2007 M 58 102 \n",
2321
- "368 1216 2021 M 49 83 \n",
2322
- "1037 1397 2023 M 46 94 \n",
2323
- "891 1345 2023 M 54 89 \n",
2324
- "559 1268 2015 M 55 95 \n",
2325
  "\n",
2326
- " TeamScore std reg TeamScore median reg TeamScore mean reg \\\n",
2327
- "409 11.901903 81.0 80.696970 \n",
2328
- "1189 8.572611 73.5 72.600000 \n",
2329
- "1275 9.407254 69.0 69.441176 \n",
2330
- "487 12.687243 87.0 85.941176 \n",
2331
- "1135 12.625978 69.5 69.062500 \n",
2332
- "556 11.973088 79.5 79.500000 \n",
2333
- "368 10.118859 66.0 65.869565 \n",
2334
- "1037 11.188045 71.0 71.787879 \n",
2335
- "891 9.013013 74.5 73.088235 \n",
2336
- "559 9.364565 68.0 69.484848 \n",
2337
  "\n",
2338
- " OppScore min reg OppScore max reg ... Win min tourney \\\n",
2339
- "409 57 105 ... 0 \n",
2340
- "1189 44 93 ... 0 \n",
2341
- "1275 39 91 ... 0 \n",
2342
- "487 48 100 ... 0 \n",
2343
- "1135 44 96 ... 0 \n",
2344
- "556 50 103 ... 0 \n",
2345
- "368 50 87 ... 0 \n",
2346
- "1037 40 86 ... 0 \n",
2347
- "891 39 79 ... 0 \n",
2348
- "559 48 89 ... 0 \n",
2349
  "\n",
2350
- " Win max tourney Win std tourney Win median tourney Win mean tourney \\\n",
2351
- "409 0 NaN 0.0 0.000000 \n",
2352
- "1189 1 0.707107 0.5 0.500000 \n",
2353
- "1275 1 0.447214 1.0 0.800000 \n",
2354
- "487 1 0.500000 1.0 0.750000 \n",
2355
- "1135 0 NaN 0.0 0.000000 \n",
2356
- "556 1 0.707107 0.5 0.500000 \n",
2357
- "368 0 NaN 0.0 0.000000 \n",
2358
- "1037 1 0.577350 1.0 0.666667 \n",
2359
- "891 0 NaN 0.0 0.000000 \n",
2360
- "559 1 0.707107 0.5 0.500000 \n",
2361
  "\n",
2362
- " ConfAbbrev Seed TeamName FirstD1Season LastD1Season \n",
2363
- "409 maac Z15 Iona 1985 2024 \n",
2364
- "1189 aec Y13 Vermont 1985 2024 \n",
2365
- "1275 mvc Z09 Wichita St 1985 2024 \n",
2366
- "487 sec Z02 Kentucky 1985 2024 \n",
2367
- "1135 pac_ten X06 USC 1985 2024 \n",
2368
- "556 acc Y04 Maryland 1985 2024 \n",
2369
- "368 aec Z16 Hartford 1985 2023 \n",
2370
- "1037 sec W04 Tennessee 1985 2024 \n",
2371
- "891 big_ten W01 Purdue 1985 2024 \n",
2372
- "559 big_ten Y04 Maryland 1985 2024 \n",
2373
  "\n",
2374
- "[10 rows x 318 columns]"
2375
  ]
2376
  },
2377
- "execution_count": 16,
2378
  "metadata": {},
2379
  "output_type": "execute_result"
2380
  }
2381
  ],
2382
  "source": [
 
 
 
 
 
 
 
 
 
2383
  "team_agg_df.sample(10, random_state=1)"
2384
  ]
2385
  },
2386
  {
2387
  "cell_type": "code",
2388
- "execution_count": 17,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2389
  "metadata": {},
2390
  "outputs": [],
2391
  "source": [
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
15
  },
16
  {
17
  "cell_type": "code",
18
+ "execution_count": 2,
19
  "metadata": {},
20
  "outputs": [
21
  {
 
206
  "[5 rows x 35 columns]"
207
  ]
208
  },
209
+ "execution_count": 2,
210
  "metadata": {},
211
  "output_type": "execute_result"
212
  }
 
222
  },
223
  {
224
  "cell_type": "code",
225
+ "execution_count": 3,
226
  "metadata": {},
227
  "outputs": [
228
  {
 
413
  "[5 rows x 35 columns]"
414
  ]
415
  },
416
+ "execution_count": 3,
417
  "metadata": {},
418
  "output_type": "execute_result"
419
  }
 
436
  },
437
  {
438
  "cell_type": "code",
439
+ "execution_count": 4,
440
  "metadata": {},
441
  "outputs": [],
442
  "source": [
 
466
  },
467
  {
468
  "cell_type": "code",
469
+ "execution_count": 5,
470
  "metadata": {},
471
  "outputs": [
472
  {
 
540
  },
541
  {
542
  "cell_type": "code",
543
+ "execution_count": 6,
544
  "metadata": {},
545
  "outputs": [
546
  {
 
613
  },
614
  {
615
  "cell_type": "code",
616
+ "execution_count": 7,
617
  "metadata": {},
618
  "outputs": [],
619
  "source": [
 
630
  },
631
  {
632
  "cell_type": "code",
633
+ "execution_count": 8,
634
  "metadata": {},
635
  "outputs": [],
636
  "source": [
 
654
  },
655
  {
656
  "cell_type": "code",
657
+ "execution_count": 9,
658
  "metadata": {},
659
  "outputs": [],
660
  "source": [
 
680
  },
681
  {
682
  "cell_type": "code",
683
+ "execution_count": 10,
684
  "metadata": {
685
  "tags": []
686
  },
 
1027
  "[10 rows x 158 columns]"
1028
  ]
1029
  },
1030
+ "execution_count": 10,
1031
  "metadata": {},
1032
  "output_type": "execute_result"
1033
  }
 
1046
  },
1047
  {
1048
  "cell_type": "code",
1049
+ "execution_count": 11,
1050
  "metadata": {},
1051
  "outputs": [
1052
  {
 
1391
  "[10 rows x 158 columns]"
1392
  ]
1393
  },
1394
+ "execution_count": 11,
1395
  "metadata": {},
1396
  "output_type": "execute_result"
1397
  }
 
1419
  },
1420
  {
1421
  "cell_type": "code",
1422
+ "execution_count": 12,
1423
  "metadata": {},
1424
  "outputs": [
1425
  {
 
1451
  " <th>TeamName</th>\n",
1452
  " <th>FirstD1Season</th>\n",
1453
  " <th>LastD1Season</th>\n",
1454
+ " <th>ChalkSeed</th>\n",
1455
  " </tr>\n",
1456
  " </thead>\n",
1457
  " <tbody>\n",
1458
  " <tr>\n",
1459
+ " <th>0</th>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1460
  " <td>1985</td>\n",
1461
+ " <td>1104</td>\n",
1462
+ " <td>sec</td>\n",
 
 
 
 
 
1463
  " <td>M</td>\n",
1464
+ " <td>X07</td>\n",
1465
+ " <td>Alabama</td>\n",
1466
  " <td>1985</td>\n",
1467
  " <td>2024</td>\n",
1468
+ " <td>7</td>\n",
1469
  " </tr>\n",
1470
  " <tr>\n",
1471
+ " <th>1</th>\n",
1472
+ " <td>1986</td>\n",
1473
+ " <td>1104</td>\n",
1474
+ " <td>sec</td>\n",
1475
  " <td>M</td>\n",
1476
+ " <td>Y05</td>\n",
1477
+ " <td>Alabama</td>\n",
1478
  " <td>1985</td>\n",
1479
  " <td>2024</td>\n",
1480
+ " <td>5</td>\n",
1481
  " </tr>\n",
1482
  " <tr>\n",
1483
+ " <th>2</th>\n",
1484
+ " <td>1987</td>\n",
1485
+ " <td>1104</td>\n",
1486
+ " <td>sec</td>\n",
1487
  " <td>M</td>\n",
1488
+ " <td>X02</td>\n",
1489
+ " <td>Alabama</td>\n",
1490
  " <td>1985</td>\n",
1491
  " <td>2024</td>\n",
1492
+ " <td>2</td>\n",
1493
  " </tr>\n",
1494
  " <tr>\n",
1495
+ " <th>3</th>\n",
1496
+ " <td>1989</td>\n",
1497
+ " <td>1104</td>\n",
1498
+ " <td>sec</td>\n",
1499
  " <td>M</td>\n",
1500
+ " <td>Z06</td>\n",
1501
+ " <td>Alabama</td>\n",
1502
  " <td>1985</td>\n",
1503
  " <td>2024</td>\n",
1504
+ " <td>6</td>\n",
1505
  " </tr>\n",
1506
  " <tr>\n",
1507
+ " <th>4</th>\n",
1508
+ " <td>1990</td>\n",
1509
+ " <td>1104</td>\n",
1510
  " <td>sec</td>\n",
1511
  " <td>M</td>\n",
1512
  " <td>Z07</td>\n",
1513
+ " <td>Alabama</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1514
  " <td>1985</td>\n",
1515
  " <td>2024</td>\n",
1516
+ " <td>7</td>\n",
1517
  " </tr>\n",
1518
  " </tbody>\n",
1519
  "</table>\n",
1520
  "</div>"
1521
  ],
1522
  "text/plain": [
1523
+ " Season TeamID ConfAbbrev League Seed TeamName FirstD1Season \\\n",
1524
+ "0 1985 1104 sec M X07 Alabama 1985 \n",
1525
+ "1 1986 1104 sec M Y05 Alabama 1985 \n",
1526
+ "2 1987 1104 sec M X02 Alabama 1985 \n",
1527
+ "3 1989 1104 sec M Z06 Alabama 1985 \n",
1528
+ "4 1990 1104 sec M Z07 Alabama 1985 \n",
 
 
 
 
 
1529
  "\n",
1530
+ " LastD1Season ChalkSeed \n",
1531
+ "0 2024 7 \n",
1532
+ "1 2024 5 \n",
1533
+ "2 2024 2 \n",
1534
+ "3 2024 6 \n",
1535
+ "4 2024 7 "
 
 
 
 
 
1536
  ]
1537
  },
1538
+ "execution_count": 12,
1539
  "metadata": {},
1540
  "output_type": "execute_result"
1541
  }
 
1557
  " .merge(right=pd.read_csv(os.path.join(DATA_DIR, \"MTeams.csv\")), on=\"TeamID\")\n",
1558
  ")\n",
1559
  "\n",
1560
+ "team_conf_seeds_df[\"ChalkSeed\"] = team_conf_seeds_df.apply(\n",
1561
+ " lambda row: int(row[\"Seed\"][1:].replace(\"a\", \"\").replace(\"b\", \"\")),\n",
1562
+ " axis=1,\n",
1563
+ ")\n",
1564
+ "\n",
1565
+ "team_conf_seeds_df.head()"
 
 
 
 
 
 
 
 
 
 
 
 
1566
  ]
1567
  },
1568
  {
1569
  "cell_type": "code",
1570
+ "execution_count": 13,
1571
  "metadata": {},
1572
  "outputs": [
1573
  {
 
1602
  " <th>OppScore min reg</th>\n",
1603
  " <th>OppScore max reg</th>\n",
1604
  " <th>...</th>\n",
1605
+ " <th>ScoreDiff min tourney</th>\n",
1606
+ " <th>ScoreDiff max tourney</th>\n",
1607
+ " <th>ScoreDiff std tourney</th>\n",
1608
+ " <th>ScoreDiff median tourney</th>\n",
1609
+ " <th>ScoreDiff mean tourney</th>\n",
1610
  " <th>Win min tourney</th>\n",
1611
  " <th>Win max tourney</th>\n",
1612
  " <th>Win std tourney</th>\n",
1613
  " <th>Win median tourney</th>\n",
1614
  " <th>Win mean tourney</th>\n",
 
 
 
 
 
1615
  " </tr>\n",
1616
  " </thead>\n",
1617
  " <tbody>\n",
1618
  " <tr>\n",
1619
+ " <th>12348</th>\n",
1620
+ " <td>3430</td>\n",
1621
+ " <td>2012</td>\n",
1622
+ " <td>W</td>\n",
1623
+ " <td>41</td>\n",
1624
+ " <td>78</td>\n",
1625
+ " <td>10.808339</td>\n",
1626
+ " <td>61.0</td>\n",
1627
+ " <td>58.965517</td>\n",
1628
  " <td>36</td>\n",
1629
+ " <td>85</td>\n",
1630
  " <td>...</td>\n",
 
 
1631
  " <td>NaN</td>\n",
1632
+ " <td>NaN</td>\n",
1633
+ " <td>NaN</td>\n",
1634
+ " <td>NaN</td>\n",
1635
+ " <td>NaN</td>\n",
1636
+ " <td>NaN</td>\n",
1637
+ " <td>NaN</td>\n",
1638
+ " <td>NaN</td>\n",
1639
+ " <td>NaN</td>\n",
1640
+ " <td>NaN</td>\n",
1641
  " </tr>\n",
1642
  " <tr>\n",
1643
+ " <th>6900</th>\n",
1644
+ " <td>1431</td>\n",
1645
+ " <td>2018</td>\n",
1646
  " <td>M</td>\n",
1647
+ " <td>33</td>\n",
1648
  " <td>88</td>\n",
1649
+ " <td>12.283247</td>\n",
1650
+ " <td>67.0</td>\n",
1651
+ " <td>66.466667</td>\n",
1652
+ " <td>44</td>\n",
1653
+ " <td>97</td>\n",
1654
  " <td>...</td>\n",
 
 
1655
  " <td>NaN</td>\n",
1656
+ " <td>NaN</td>\n",
1657
+ " <td>NaN</td>\n",
1658
+ " <td>NaN</td>\n",
1659
+ " <td>NaN</td>\n",
1660
+ " <td>NaN</td>\n",
1661
+ " <td>NaN</td>\n",
1662
+ " <td>NaN</td>\n",
1663
+ " <td>NaN</td>\n",
1664
+ " <td>NaN</td>\n",
1665
  " </tr>\n",
1666
  " <tr>\n",
1667
+ " <th>4406</th>\n",
1668
+ " <td>1315</td>\n",
1669
+ " <td>2014</td>\n",
1670
  " <td>M</td>\n",
1671
+ " <td>43</td>\n",
1672
+ " <td>95</td>\n",
1673
+ " <td>10.019980</td>\n",
1674
  " <td>72.0</td>\n",
1675
+ " <td>73.000000</td>\n",
1676
+ " <td>61</td>\n",
1677
+ " <td>103</td>\n",
1678
  " <td>...</td>\n",
1679
+ " <td>NaN</td>\n",
1680
+ " <td>NaN</td>\n",
1681
+ " <td>NaN</td>\n",
1682
+ " <td>NaN</td>\n",
1683
+ " <td>NaN</td>\n",
1684
+ " <td>NaN</td>\n",
1685
+ " <td>NaN</td>\n",
1686
+ " <td>NaN</td>\n",
1687
+ " <td>NaN</td>\n",
1688
+ " <td>NaN</td>\n",
1689
  " </tr>\n",
1690
  " <tr>\n",
1691
+ " <th>4233</th>\n",
1692
+ " <td>1307</td>\n",
1693
  " <td>2005</td>\n",
1694
  " <td>M</td>\n",
1695
+ " <td>53</td>\n",
1696
+ " <td>101</td>\n",
1697
+ " <td>12.911860</td>\n",
1698
+ " <td>77.0</td>\n",
1699
+ " <td>75.870968</td>\n",
1700
+ " <td>47</td>\n",
1701
+ " <td>81</td>\n",
1702
  " <td>...</td>\n",
1703
+ " <td>-8.0</td>\n",
1704
+ " <td>-8.0</td>\n",
1705
+ " <td>NaN</td>\n",
1706
+ " <td>-8.0</td>\n",
1707
+ " <td>-8.0</td>\n",
1708
+ " <td>0.0</td>\n",
1709
+ " <td>0.0</td>\n",
1710
+ " <td>NaN</td>\n",
1711
+ " <td>0.0</td>\n",
1712
+ " <td>0.0</td>\n",
1713
  " </tr>\n",
1714
  " <tr>\n",
1715
+ " <th>3407</th>\n",
1716
+ " <td>1266</td>\n",
1717
+ " <td>2008</td>\n",
1718
  " <td>M</td>\n",
1719
+ " <td>51</td>\n",
1720
+ " <td>100</td>\n",
1721
+ " <td>11.841315</td>\n",
1722
+ " <td>75.5</td>\n",
1723
+ " <td>75.906250</td>\n",
1724
+ " <td>37</td>\n",
1725
+ " <td>89</td>\n",
1726
  " <td>...</td>\n",
1727
+ " <td>-1.0</td>\n",
1728
+ " <td>8.0</td>\n",
1729
+ " <td>6.363961</td>\n",
1730
+ " <td>3.5</td>\n",
1731
+ " <td>3.5</td>\n",
1732
+ " <td>0.0</td>\n",
1733
  " <td>1.0</td>\n",
1734
+ " <td>0.707107</td>\n",
1735
+ " <td>0.5</td>\n",
1736
+ " <td>0.5</td>\n",
 
 
 
1737
  " </tr>\n",
1738
  " <tr>\n",
1739
+ " <th>5190</th>\n",
1740
+ " <td>1352</td>\n",
1741
+ " <td>2016</td>\n",
1742
  " <td>M</td>\n",
1743
  " <td>44</td>\n",
1744
+ " <td>89</td>\n",
1745
+ " <td>10.298567</td>\n",
1746
+ " <td>67.0</td>\n",
1747
+ " <td>65.062500</td>\n",
1748
  " <td>45</td>\n",
1749
+ " <td>106</td>\n",
1750
  " <td>...</td>\n",
1751
+ " <td>NaN</td>\n",
1752
+ " <td>NaN</td>\n",
1753
+ " <td>NaN</td>\n",
1754
+ " <td>NaN</td>\n",
1755
+ " <td>NaN</td>\n",
1756
+ " <td>NaN</td>\n",
1757
+ " <td>NaN</td>\n",
1758
+ " <td>NaN</td>\n",
1759
+ " <td>NaN</td>\n",
1760
+ " <td>NaN</td>\n",
1761
  " </tr>\n",
1762
  " <tr>\n",
1763
+ " <th>1892</th>\n",
1764
+ " <td>1194</td>\n",
1765
+ " <td>2005</td>\n",
1766
  " <td>M</td>\n",
1767
+ " <td>45</td>\n",
1768
+ " <td>104</td>\n",
1769
+ " <td>14.194618</td>\n",
1770
  " <td>76.0</td>\n",
1771
+ " <td>76.777778</td>\n",
1772
+ " <td>59</td>\n",
1773
+ " <td>107</td>\n",
1774
  " <td>...</td>\n",
 
 
1775
  " <td>NaN</td>\n",
1776
+ " <td>NaN</td>\n",
1777
+ " <td>NaN</td>\n",
1778
+ " <td>NaN</td>\n",
1779
+ " <td>NaN</td>\n",
1780
+ " <td>NaN</td>\n",
1781
+ " <td>NaN</td>\n",
1782
+ " <td>NaN</td>\n",
1783
+ " <td>NaN</td>\n",
1784
+ " <td>NaN</td>\n",
1785
  " </tr>\n",
1786
  " <tr>\n",
1787
+ " <th>10020</th>\n",
1788
+ " <td>3270</td>\n",
1789
+ " <td>2021</td>\n",
1790
+ " <td>W</td>\n",
1791
+ " <td>24</td>\n",
1792
+ " <td>80</td>\n",
1793
+ " <td>13.385137</td>\n",
1794
+ " <td>53.0</td>\n",
1795
+ " <td>55.476190</td>\n",
1796
+ " <td>41</td>\n",
1797
+ " <td>117</td>\n",
1798
  " <td>...</td>\n",
 
 
1799
  " <td>NaN</td>\n",
1800
+ " <td>NaN</td>\n",
1801
+ " <td>NaN</td>\n",
1802
+ " <td>NaN</td>\n",
1803
+ " <td>NaN</td>\n",
1804
+ " <td>NaN</td>\n",
1805
+ " <td>NaN</td>\n",
1806
+ " <td>NaN</td>\n",
1807
+ " <td>NaN</td>\n",
1808
+ " <td>NaN</td>\n",
1809
  " </tr>\n",
1810
  " <tr>\n",
1811
+ " <th>9567</th>\n",
1812
+ " <td>3240</td>\n",
1813
+ " <td>2014</td>\n",
1814
+ " <td>W</td>\n",
1815
+ " <td>43</td>\n",
1816
+ " <td>84</td>\n",
1817
+ " <td>11.319009</td>\n",
1818
+ " <td>62.5</td>\n",
1819
+ " <td>63.593750</td>\n",
1820
+ " <td>45</td>\n",
1821
+ " <td>100</td>\n",
1822
  " <td>...</td>\n",
1823
+ " <td>NaN</td>\n",
1824
+ " <td>NaN</td>\n",
1825
+ " <td>NaN</td>\n",
1826
+ " <td>NaN</td>\n",
1827
+ " <td>NaN</td>\n",
1828
+ " <td>NaN</td>\n",
1829
+ " <td>NaN</td>\n",
1830
+ " <td>NaN</td>\n",
1831
+ " <td>NaN</td>\n",
1832
+ " <td>NaN</td>\n",
1833
  " </tr>\n",
1834
  " <tr>\n",
1835
+ " <th>12617</th>\n",
1836
+ " <td>3452</td>\n",
1837
+ " <td>2011</td>\n",
1838
+ " <td>W</td>\n",
1839
+ " <td>39</td>\n",
1840
+ " <td>90</td>\n",
1841
+ " <td>12.518374</td>\n",
1842
+ " <td>65.0</td>\n",
1843
+ " <td>65.750000</td>\n",
1844
+ " <td>21</td>\n",
1845
+ " <td>79</td>\n",
1846
  " <td>...</td>\n",
1847
+ " <td>-14.0</td>\n",
1848
+ " <td>6.0</td>\n",
1849
+ " <td>14.142136</td>\n",
1850
+ " <td>-4.0</td>\n",
1851
+ " <td>-4.0</td>\n",
1852
  " <td>0.0</td>\n",
1853
+ " <td>1.0</td>\n",
1854
+ " <td>0.707107</td>\n",
1855
+ " <td>0.5</td>\n",
1856
+ " <td>0.5</td>\n",
 
 
1857
  " </tr>\n",
1858
  " </tbody>\n",
1859
  "</table>\n",
1860
+ "<p>10 rows × 313 columns</p>\n",
1861
  "</div>"
1862
  ],
1863
  "text/plain": [
1864
+ " TeamID Season League TeamScore min reg TeamScore max reg \\\n",
1865
+ "12348 3430 2012 W 41 78 \n",
1866
+ "6900 1431 2018 M 33 88 \n",
1867
+ "4406 1315 2014 M 43 95 \n",
1868
+ "4233 1307 2005 M 53 101 \n",
1869
+ "3407 1266 2008 M 51 100 \n",
1870
+ "5190 1352 2016 M 44 89 \n",
1871
+ "1892 1194 2005 M 45 104 \n",
1872
+ "10020 3270 2021 W 24 80 \n",
1873
+ "9567 3240 2014 W 43 84 \n",
1874
+ "12617 3452 2011 W 39 90 \n",
1875
  "\n",
1876
+ " TeamScore std reg TeamScore median reg TeamScore mean reg \\\n",
1877
+ "12348 10.808339 61.0 58.965517 \n",
1878
+ "6900 12.283247 67.0 66.466667 \n",
1879
+ "4406 10.019980 72.0 73.000000 \n",
1880
+ "4233 12.911860 77.0 75.870968 \n",
1881
+ "3407 11.841315 75.5 75.906250 \n",
1882
+ "5190 10.298567 67.0 65.062500 \n",
1883
+ "1892 14.194618 76.0 76.777778 \n",
1884
+ "10020 13.385137 53.0 55.476190 \n",
1885
+ "9567 11.319009 62.5 63.593750 \n",
1886
+ "12617 12.518374 65.0 65.750000 \n",
1887
  "\n",
1888
+ " OppScore min reg OppScore max reg ... ScoreDiff min tourney \\\n",
1889
+ "12348 36 85 ... NaN \n",
1890
+ "6900 44 97 ... NaN \n",
1891
+ "4406 61 103 ... NaN \n",
1892
+ "4233 47 81 ... -8.0 \n",
1893
+ "3407 37 89 ... -1.0 \n",
1894
+ "5190 45 106 ... NaN \n",
1895
+ "1892 59 107 ... NaN \n",
1896
+ "10020 41 117 ... NaN \n",
1897
+ "9567 45 100 ... NaN \n",
1898
+ "12617 21 79 ... -14.0 \n",
1899
  "\n",
1900
+ " ScoreDiff max tourney ScoreDiff std tourney ScoreDiff median tourney \\\n",
1901
+ "12348 NaN NaN NaN \n",
1902
+ "6900 NaN NaN NaN \n",
1903
+ "4406 NaN NaN NaN \n",
1904
+ "4233 -8.0 NaN -8.0 \n",
1905
+ "3407 8.0 6.363961 3.5 \n",
1906
+ "5190 NaN NaN NaN \n",
1907
+ "1892 NaN NaN NaN \n",
1908
+ "10020 NaN NaN NaN \n",
1909
+ "9567 NaN NaN NaN \n",
1910
+ "12617 6.0 14.142136 -4.0 \n",
1911
  "\n",
1912
+ " ScoreDiff mean tourney Win min tourney Win max tourney \\\n",
1913
+ "12348 NaN NaN NaN \n",
1914
+ "6900 NaN NaN NaN \n",
1915
+ "4406 NaN NaN NaN \n",
1916
+ "4233 -8.0 0.0 0.0 \n",
1917
+ "3407 3.5 0.0 1.0 \n",
1918
+ "5190 NaN NaN NaN \n",
1919
+ "1892 NaN NaN NaN \n",
1920
+ "10020 NaN NaN NaN \n",
1921
+ "9567 NaN NaN NaN \n",
1922
+ "12617 -4.0 0.0 1.0 \n",
1923
  "\n",
1924
+ " Win std tourney Win median tourney Win mean tourney \n",
1925
+ "12348 NaN NaN NaN \n",
1926
+ "6900 NaN NaN NaN \n",
1927
+ "4406 NaN NaN NaN \n",
1928
+ "4233 NaN 0.0 0.0 \n",
1929
+ "3407 0.707107 0.5 0.5 \n",
1930
+ "5190 NaN NaN NaN \n",
1931
+ "1892 NaN NaN NaN \n",
1932
+ "10020 NaN NaN NaN \n",
1933
+ "9567 NaN NaN NaN \n",
1934
+ "12617 0.707107 0.5 0.5 \n",
1935
+ "\n",
1936
+ "[10 rows x 313 columns]"
1937
  ]
1938
  },
1939
+ "execution_count": 13,
1940
  "metadata": {},
1941
  "output_type": "execute_result"
1942
  }
1943
  ],
1944
  "source": [
1945
+ "# merge the tournament aggregated metrics with the regular season aggregated metrics\n",
1946
+ "team_agg_df = (\n",
1947
+ " pd.merge(\n",
1948
+ " left=team_reg_agg, \n",
1949
+ " right=team_tourney_agg, \n",
1950
+ " how=\"left\",\n",
1951
+ " on=[\"TeamID\", \"Season\", \"League\"], \n",
1952
+ " suffixes=(\" reg\", \" tourney\"),\n",
1953
+ " )\n",
1954
  ")\n",
1955
  "\n",
1956
+ "team_agg_df.sample(10, random_state=1)"
1957
  ]
1958
  },
1959
  {
1960
  "cell_type": "code",
1961
+ "execution_count": 14,
1962
  "metadata": {},
1963
  "outputs": [
1964
  {
 
1993
  " <th>OppScore min reg</th>\n",
1994
  " <th>OppScore max reg</th>\n",
1995
  " <th>...</th>\n",
 
1996
  " <th>Win max tourney</th>\n",
1997
  " <th>Win std tourney</th>\n",
1998
  " <th>Win median tourney</th>\n",
 
2002
  " <th>TeamName</th>\n",
2003
  " <th>FirstD1Season</th>\n",
2004
  " <th>LastD1Season</th>\n",
2005
+ " <th>ChalkSeed</th>\n",
2006
  " </tr>\n",
2007
  " </thead>\n",
2008
  " <tbody>\n",
2009
  " <tr>\n",
2010
+ " <th>12348</th>\n",
2011
+ " <td>3430</td>\n",
2012
+ " <td>2012</td>\n",
2013
+ " <td>W</td>\n",
2014
+ " <td>41</td>\n",
2015
+ " <td>78</td>\n",
2016
+ " <td>10.808339</td>\n",
2017
+ " <td>61.0</td>\n",
2018
+ " <td>58.965517</td>\n",
2019
+ " <td>36</td>\n",
2020
+ " <td>85</td>\n",
2021
  " <td>...</td>\n",
 
 
2022
  " <td>NaN</td>\n",
2023
+ " <td>NaN</td>\n",
2024
+ " <td>NaN</td>\n",
2025
+ " <td>NaN</td>\n",
2026
+ " <td>NaN</td>\n",
2027
+ " <td>NaN</td>\n",
2028
+ " <td>NaN</td>\n",
2029
+ " <td>NaN</td>\n",
2030
+ " <td>NaN</td>\n",
2031
+ " <td>NaN</td>\n",
2032
  " </tr>\n",
2033
  " <tr>\n",
2034
+ " <th>6900</th>\n",
2035
+ " <td>1431</td>\n",
2036
+ " <td>2018</td>\n",
2037
  " <td>M</td>\n",
2038
+ " <td>33</td>\n",
2039
+ " <td>88</td>\n",
2040
+ " <td>12.283247</td>\n",
2041
+ " <td>67.0</td>\n",
2042
+ " <td>66.466667</td>\n",
2043
  " <td>44</td>\n",
2044
+ " <td>97</td>\n",
2045
  " <td>...</td>\n",
2046
+ " <td>NaN</td>\n",
2047
+ " <td>NaN</td>\n",
2048
+ " <td>NaN</td>\n",
2049
+ " <td>NaN</td>\n",
2050
+ " <td>NaN</td>\n",
2051
+ " <td>NaN</td>\n",
2052
+ " <td>NaN</td>\n",
2053
+ " <td>NaN</td>\n",
2054
+ " <td>NaN</td>\n",
2055
+ " <td>NaN</td>\n",
2056
  " </tr>\n",
2057
  " <tr>\n",
2058
+ " <th>4406</th>\n",
2059
+ " <td>1315</td>\n",
2060
+ " <td>2014</td>\n",
2061
  " <td>M</td>\n",
2062
+ " <td>43</td>\n",
2063
+ " <td>95</td>\n",
2064
+ " <td>10.019980</td>\n",
2065
+ " <td>72.0</td>\n",
2066
+ " <td>73.000000</td>\n",
2067
+ " <td>61</td>\n",
2068
+ " <td>103</td>\n",
2069
  " <td>...</td>\n",
2070
+ " <td>NaN</td>\n",
2071
+ " <td>NaN</td>\n",
2072
+ " <td>NaN</td>\n",
2073
+ " <td>NaN</td>\n",
2074
+ " <td>NaN</td>\n",
2075
+ " <td>NaN</td>\n",
2076
+ " <td>NaN</td>\n",
2077
+ " <td>NaN</td>\n",
2078
+ " <td>NaN</td>\n",
2079
+ " <td>NaN</td>\n",
2080
  " </tr>\n",
2081
  " <tr>\n",
2082
+ " <th>4233</th>\n",
2083
+ " <td>1307</td>\n",
2084
+ " <td>2005</td>\n",
2085
  " <td>M</td>\n",
2086
+ " <td>53</td>\n",
2087
+ " <td>101</td>\n",
2088
+ " <td>12.911860</td>\n",
2089
+ " <td>77.0</td>\n",
2090
+ " <td>75.870968</td>\n",
2091
+ " <td>47</td>\n",
2092
+ " <td>81</td>\n",
2093
+ " <td>...</td>\n",
2094
+ " <td>0.0</td>\n",
2095
+ " <td>NaN</td>\n",
2096
+ " <td>0.0</td>\n",
2097
+ " <td>0.0</td>\n",
2098
+ " <td>mwc</td>\n",
2099
+ " <td>Z12</td>\n",
2100
+ " <td>New Mexico</td>\n",
2101
+ " <td>1985.0</td>\n",
2102
+ " <td>2024.0</td>\n",
2103
+ " <td>12.0</td>\n",
2104
+ " </tr>\n",
2105
+ " <tr>\n",
2106
+ " <th>3407</th>\n",
2107
+ " <td>1266</td>\n",
2108
+ " <td>2008</td>\n",
2109
+ " <td>M</td>\n",
2110
+ " <td>51</td>\n",
2111
  " <td>100</td>\n",
2112
+ " <td>11.841315</td>\n",
2113
+ " <td>75.5</td>\n",
2114
+ " <td>75.906250</td>\n",
2115
+ " <td>37</td>\n",
2116
+ " <td>89</td>\n",
2117
  " <td>...</td>\n",
 
 
 
2118
  " <td>1.0</td>\n",
2119
+ " <td>0.707107</td>\n",
2120
+ " <td>0.5</td>\n",
2121
+ " <td>0.5</td>\n",
2122
+ " <td>big_east</td>\n",
2123
+ " <td>Y06</td>\n",
2124
+ " <td>Marquette</td>\n",
2125
+ " <td>1985.0</td>\n",
2126
+ " <td>2024.0</td>\n",
2127
+ " <td>6.0</td>\n",
2128
  " </tr>\n",
2129
  " <tr>\n",
2130
+ " <th>5190</th>\n",
2131
+ " <td>1352</td>\n",
2132
+ " <td>2016</td>\n",
2133
  " <td>M</td>\n",
 
 
 
 
 
2134
  " <td>44</td>\n",
2135
+ " <td>89</td>\n",
2136
+ " <td>10.298567</td>\n",
2137
+ " <td>67.0</td>\n",
2138
+ " <td>65.062500</td>\n",
2139
+ " <td>45</td>\n",
2140
+ " <td>106</td>\n",
2141
  " <td>...</td>\n",
 
 
2142
  " <td>NaN</td>\n",
2143
+ " <td>NaN</td>\n",
2144
+ " <td>NaN</td>\n",
2145
+ " <td>NaN</td>\n",
2146
+ " <td>NaN</td>\n",
2147
+ " <td>NaN</td>\n",
2148
+ " <td>NaN</td>\n",
2149
+ " <td>NaN</td>\n",
2150
+ " <td>NaN</td>\n",
2151
+ " <td>NaN</td>\n",
2152
  " </tr>\n",
2153
  " <tr>\n",
2154
+ " <th>1892</th>\n",
2155
+ " <td>1194</td>\n",
2156
+ " <td>2005</td>\n",
2157
  " <td>M</td>\n",
2158
+ " <td>45</td>\n",
2159
+ " <td>104</td>\n",
2160
+ " <td>14.194618</td>\n",
2161
+ " <td>76.0</td>\n",
2162
+ " <td>76.777778</td>\n",
2163
+ " <td>59</td>\n",
2164
+ " <td>107</td>\n",
2165
  " <td>...</td>\n",
2166
+ " <td>NaN</td>\n",
2167
+ " <td>NaN</td>\n",
2168
+ " <td>NaN</td>\n",
2169
+ " <td>NaN</td>\n",
2170
+ " <td>NaN</td>\n",
2171
+ " <td>NaN</td>\n",
2172
+ " <td>NaN</td>\n",
2173
+ " <td>NaN</td>\n",
2174
+ " <td>NaN</td>\n",
2175
+ " <td>NaN</td>\n",
2176
  " </tr>\n",
2177
  " <tr>\n",
2178
+ " <th>10020</th>\n",
2179
+ " <td>3270</td>\n",
2180
  " <td>2021</td>\n",
2181
+ " <td>W</td>\n",
2182
+ " <td>24</td>\n",
2183
+ " <td>80</td>\n",
2184
+ " <td>13.385137</td>\n",
2185
+ " <td>53.0</td>\n",
2186
+ " <td>55.476190</td>\n",
2187
+ " <td>41</td>\n",
2188
+ " <td>117</td>\n",
2189
  " <td>...</td>\n",
 
 
2190
  " <td>NaN</td>\n",
2191
+ " <td>NaN</td>\n",
2192
+ " <td>NaN</td>\n",
2193
+ " <td>NaN</td>\n",
2194
+ " <td>NaN</td>\n",
2195
+ " <td>NaN</td>\n",
2196
+ " <td>NaN</td>\n",
2197
+ " <td>NaN</td>\n",
2198
+ " <td>NaN</td>\n",
2199
+ " <td>NaN</td>\n",
2200
  " </tr>\n",
2201
  " <tr>\n",
2202
+ " <th>9567</th>\n",
2203
+ " <td>3240</td>\n",
2204
+ " <td>2014</td>\n",
2205
+ " <td>W</td>\n",
2206
+ " <td>43</td>\n",
2207
+ " <td>84</td>\n",
2208
+ " <td>11.319009</td>\n",
2209
+ " <td>62.5</td>\n",
2210
+ " <td>63.593750</td>\n",
2211
+ " <td>45</td>\n",
2212
+ " <td>100</td>\n",
2213
  " <td>...</td>\n",
2214
+ " <td>NaN</td>\n",
2215
+ " <td>NaN</td>\n",
2216
+ " <td>NaN</td>\n",
2217
+ " <td>NaN</td>\n",
2218
+ " <td>NaN</td>\n",
2219
+ " <td>NaN</td>\n",
2220
+ " <td>NaN</td>\n",
2221
+ " <td>NaN</td>\n",
2222
+ " <td>NaN</td>\n",
2223
+ " <td>NaN</td>\n",
2224
  " </tr>\n",
2225
  " <tr>\n",
2226
+ " <th>12617</th>\n",
2227
+ " <td>3452</td>\n",
2228
+ " <td>2011</td>\n",
2229
+ " <td>W</td>\n",
 
 
 
 
 
2230
  " <td>39</td>\n",
2231
+ " <td>90</td>\n",
2232
+ " <td>12.518374</td>\n",
2233
+ " <td>65.0</td>\n",
2234
+ " <td>65.750000</td>\n",
2235
+ " <td>21</td>\n",
2236
  " <td>79</td>\n",
2237
  " <td>...</td>\n",
2238
+ " <td>1.0</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2239
  " <td>0.707107</td>\n",
2240
  " <td>0.5</td>\n",
2241
+ " <td>0.5</td>\n",
2242
+ " <td>NaN</td>\n",
2243
+ " <td>NaN</td>\n",
2244
+ " <td>NaN</td>\n",
2245
+ " <td>NaN</td>\n",
2246
+ " <td>NaN</td>\n",
2247
+ " <td>NaN</td>\n",
2248
  " </tr>\n",
2249
  " </tbody>\n",
2250
  "</table>\n",
2251
+ "<p>10 rows × 319 columns</p>\n",
2252
  "</div>"
2253
  ],
2254
  "text/plain": [
2255
+ " TeamID Season League TeamScore min reg TeamScore max reg \\\n",
2256
+ "12348 3430 2012 W 41 78 \n",
2257
+ "6900 1431 2018 M 33 88 \n",
2258
+ "4406 1315 2014 M 43 95 \n",
2259
+ "4233 1307 2005 M 53 101 \n",
2260
+ "3407 1266 2008 M 51 100 \n",
2261
+ "5190 1352 2016 M 44 89 \n",
2262
+ "1892 1194 2005 M 45 104 \n",
2263
+ "10020 3270 2021 W 24 80 \n",
2264
+ "9567 3240 2014 W 43 84 \n",
2265
+ "12617 3452 2011 W 39 90 \n",
2266
  "\n",
2267
+ " TeamScore std reg TeamScore median reg TeamScore mean reg \\\n",
2268
+ "12348 10.808339 61.0 58.965517 \n",
2269
+ "6900 12.283247 67.0 66.466667 \n",
2270
+ "4406 10.019980 72.0 73.000000 \n",
2271
+ "4233 12.911860 77.0 75.870968 \n",
2272
+ "3407 11.841315 75.5 75.906250 \n",
2273
+ "5190 10.298567 67.0 65.062500 \n",
2274
+ "1892 14.194618 76.0 76.777778 \n",
2275
+ "10020 13.385137 53.0 55.476190 \n",
2276
+ "9567 11.319009 62.5 63.593750 \n",
2277
+ "12617 12.518374 65.0 65.750000 \n",
2278
  "\n",
2279
+ " OppScore min reg OppScore max reg ... Win max tourney \\\n",
2280
+ "12348 36 85 ... NaN \n",
2281
+ "6900 44 97 ... NaN \n",
2282
+ "4406 61 103 ... NaN \n",
2283
+ "4233 47 81 ... 0.0 \n",
2284
+ "3407 37 89 ... 1.0 \n",
2285
+ "5190 45 106 ... NaN \n",
2286
+ "1892 59 107 ... NaN \n",
2287
+ "10020 41 117 ... NaN \n",
2288
+ "9567 45 100 ... NaN \n",
2289
+ "12617 21 79 ... 1.0 \n",
2290
  "\n",
2291
+ " Win std tourney Win median tourney Win mean tourney ConfAbbrev \\\n",
2292
+ "12348 NaN NaN NaN NaN \n",
2293
+ "6900 NaN NaN NaN NaN \n",
2294
+ "4406 NaN NaN NaN NaN \n",
2295
+ "4233 NaN 0.0 0.0 mwc \n",
2296
+ "3407 0.707107 0.5 0.5 big_east \n",
2297
+ "5190 NaN NaN NaN NaN \n",
2298
+ "1892 NaN NaN NaN NaN \n",
2299
+ "10020 NaN NaN NaN NaN \n",
2300
+ "9567 NaN NaN NaN NaN \n",
2301
+ "12617 0.707107 0.5 0.5 NaN \n",
2302
  "\n",
2303
+ " Seed TeamName FirstD1Season LastD1Season ChalkSeed \n",
2304
+ "12348 NaN NaN NaN NaN NaN \n",
2305
+ "6900 NaN NaN NaN NaN NaN \n",
2306
+ "4406 NaN NaN NaN NaN NaN \n",
2307
+ "4233 Z12 New Mexico 1985.0 2024.0 12.0 \n",
2308
+ "3407 Y06 Marquette 1985.0 2024.0 6.0 \n",
2309
+ "5190 NaN NaN NaN NaN NaN \n",
2310
+ "1892 NaN NaN NaN NaN NaN \n",
2311
+ "10020 NaN NaN NaN NaN NaN \n",
2312
+ "9567 NaN NaN NaN NaN NaN \n",
2313
+ "12617 NaN NaN NaN NaN NaN \n",
2314
  "\n",
2315
+ "[10 rows x 319 columns]"
2316
  ]
2317
  },
2318
+ "execution_count": 14,
2319
  "metadata": {},
2320
  "output_type": "execute_result"
2321
  }
2322
  ],
2323
  "source": [
2324
+ "# merge the team_conf_seeds_df with team attributes into the aggregated data\n",
2325
+ "\n",
2326
+ "team_agg_df = pd.merge(\n",
2327
+ " left=team_agg_df,\n",
2328
+ " right=team_conf_seeds_df,\n",
2329
+ " how=\"left\",\n",
2330
+ " on=[\"TeamID\", \"Season\", \"League\"],\n",
2331
+ ")\n",
2332
+ "\n",
2333
  "team_agg_df.sample(10, random_state=1)"
2334
  ]
2335
  },
2336
  {
2337
  "cell_type": "code",
2338
+ "execution_count": 15,
2339
+ "metadata": {},
2340
+ "outputs": [
2341
+ {
2342
+ "name": "stdout",
2343
+ "output_type": "stream",
2344
+ "text": [
2345
+ "<class 'pandas.core.frame.DataFrame'>\n",
2346
+ "Int64Index: 12857 entries, 0 to 12856\n",
2347
+ "Columns: 319 entries, TeamID to ChalkSeed\n",
2348
+ "dtypes: float64(251), int64(64), object(4)\n",
2349
+ "memory usage: 31.4+ MB\n"
2350
+ ]
2351
+ }
2352
+ ],
2353
+ "source": [
2354
+ "team_agg_df.info()"
2355
+ ]
2356
+ },
2357
+ {
2358
+ "cell_type": "code",
2359
+ "execution_count": 16,
2360
  "metadata": {},
2361
  "outputs": [],
2362
  "source": [