Commit ·
9c0cc03
1
Parent(s): 2a90490
columns that did not need to be aggregated were being aggregated. I
Browse files- src/pre_processing.ipynb +166 -162
src/pre_processing.ipynb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [],
|
| 8 |
"source": [
|
|
@@ -15,7 +15,7 @@
|
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"cell_type": "code",
|
| 18 |
-
"execution_count":
|
| 19 |
"metadata": {},
|
| 20 |
"outputs": [
|
| 21 |
{
|
|
@@ -85,10 +85,12 @@
|
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"cell_type": "code",
|
| 88 |
-
"execution_count":
|
| 89 |
"metadata": {},
|
| 90 |
"outputs": [],
|
| 91 |
"source": [
|
|
|
|
|
|
|
| 92 |
"\n",
|
| 93 |
"detailed_metrics = {\n",
|
| 94 |
" \"Score\",\n",
|
|
@@ -128,7 +130,7 @@
|
|
| 128 |
},
|
| 129 |
{
|
| 130 |
"cell_type": "code",
|
| 131 |
-
"execution_count":
|
| 132 |
"metadata": {},
|
| 133 |
"outputs": [
|
| 134 |
{
|
|
@@ -326,7 +328,7 @@
|
|
| 326 |
"[5 rows x 36 columns]"
|
| 327 |
]
|
| 328 |
},
|
| 329 |
-
"execution_count":
|
| 330 |
"metadata": {},
|
| 331 |
"output_type": "execute_result"
|
| 332 |
}
|
|
@@ -337,7 +339,7 @@
|
|
| 337 |
},
|
| 338 |
{
|
| 339 |
"cell_type": "code",
|
| 340 |
-
"execution_count":
|
| 341 |
"metadata": {},
|
| 342 |
"outputs": [],
|
| 343 |
"source": [
|
|
@@ -349,7 +351,7 @@
|
|
| 349 |
},
|
| 350 |
{
|
| 351 |
"cell_type": "code",
|
| 352 |
-
"execution_count":
|
| 353 |
"metadata": {},
|
| 354 |
"outputs": [
|
| 355 |
{
|
|
@@ -547,7 +549,7 @@
|
|
| 547 |
"[5 rows x 37 columns]"
|
| 548 |
]
|
| 549 |
},
|
| 550 |
-
"execution_count":
|
| 551 |
"metadata": {},
|
| 552 |
"output_type": "execute_result"
|
| 553 |
}
|
|
@@ -558,7 +560,7 @@
|
|
| 558 |
},
|
| 559 |
{
|
| 560 |
"cell_type": "code",
|
| 561 |
-
"execution_count":
|
| 562 |
"metadata": {},
|
| 563 |
"outputs": [
|
| 564 |
{
|
|
@@ -572,7 +574,7 @@
|
|
| 572 |
"Name: Win, dtype: int64"
|
| 573 |
]
|
| 574 |
},
|
| 575 |
-
"execution_count":
|
| 576 |
"metadata": {},
|
| 577 |
"output_type": "execute_result"
|
| 578 |
}
|
|
@@ -595,8 +597,36 @@
|
|
| 595 |
},
|
| 596 |
{
|
| 597 |
"cell_type": "code",
|
| 598 |
-
"execution_count":
|
| 599 |
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 600 |
"outputs": [
|
| 601 |
{
|
| 602 |
"data": {
|
|
@@ -622,13 +652,13 @@
|
|
| 622 |
" <th>TeamID</th>\n",
|
| 623 |
" <th>Season</th>\n",
|
| 624 |
" <th>League</th>\n",
|
| 625 |
-
" <th>
|
| 626 |
-
" <th>
|
| 627 |
-
" <th>
|
| 628 |
-
" <th>
|
| 629 |
-
" <th>
|
| 630 |
-
" <th>
|
| 631 |
-
" <th>
|
| 632 |
" <th>...</th>\n",
|
| 633 |
" <th>ScoreDiff min</th>\n",
|
| 634 |
" <th>ScoreDiff max</th>\n",
|
|
@@ -648,13 +678,13 @@
|
|
| 648 |
" <td>3430</td>\n",
|
| 649 |
" <td>2012</td>\n",
|
| 650 |
" <td>W</td>\n",
|
| 651 |
-
" <td>
|
| 652 |
-
" <td>
|
| 653 |
-
" <td>
|
| 654 |
-
" <td>
|
| 655 |
-
" <td>
|
| 656 |
-
" <td>
|
| 657 |
-
" <td>
|
| 658 |
" <td>...</td>\n",
|
| 659 |
" <td>-32</td>\n",
|
| 660 |
" <td>35</td>\n",
|
|
@@ -672,13 +702,13 @@
|
|
| 672 |
" <td>1431</td>\n",
|
| 673 |
" <td>2018</td>\n",
|
| 674 |
" <td>M</td>\n",
|
| 675 |
-
" <td>
|
| 676 |
-
" <td>
|
| 677 |
-
" <td>
|
| 678 |
-
" <td>
|
| 679 |
-
" <td>
|
| 680 |
-
" <td>
|
| 681 |
-
" <td>
|
| 682 |
" <td>...</td>\n",
|
| 683 |
" <td>-49</td>\n",
|
| 684 |
" <td>29</td>\n",
|
|
@@ -696,13 +726,13 @@
|
|
| 696 |
" <td>1315</td>\n",
|
| 697 |
" <td>2014</td>\n",
|
| 698 |
" <td>M</td>\n",
|
| 699 |
-
" <td>
|
| 700 |
-
" <td>
|
| 701 |
-
" <td>
|
| 702 |
-
" <td>
|
| 703 |
-
" <td>
|
| 704 |
-
" <td>
|
| 705 |
-
" <td>
|
| 706 |
" <td>...</td>\n",
|
| 707 |
" <td>-27</td>\n",
|
| 708 |
" <td>18</td>\n",
|
|
@@ -720,13 +750,13 @@
|
|
| 720 |
" <td>1307</td>\n",
|
| 721 |
" <td>2005</td>\n",
|
| 722 |
" <td>M</td>\n",
|
| 723 |
-
" <td>
|
| 724 |
-
" <td>
|
| 725 |
-
" <td>
|
| 726 |
-
" <td>
|
| 727 |
-
" <td>
|
| 728 |
-
" <td>
|
| 729 |
-
" <td>
|
| 730 |
" <td>...</td>\n",
|
| 731 |
" <td>-17</td>\n",
|
| 732 |
" <td>34</td>\n",
|
|
@@ -744,13 +774,13 @@
|
|
| 744 |
" <td>1266</td>\n",
|
| 745 |
" <td>2008</td>\n",
|
| 746 |
" <td>M</td>\n",
|
| 747 |
-
" <td>
|
| 748 |
-
" <td>
|
| 749 |
-
" <td>
|
| 750 |
-
" <td>
|
| 751 |
-
" <td>
|
| 752 |
-
" <td>
|
| 753 |
-
" <td>
|
| 754 |
" <td>...</td>\n",
|
| 755 |
" <td>-20</td>\n",
|
| 756 |
" <td>47</td>\n",
|
|
@@ -768,13 +798,13 @@
|
|
| 768 |
" <td>1352</td>\n",
|
| 769 |
" <td>2016</td>\n",
|
| 770 |
" <td>M</td>\n",
|
| 771 |
-
" <td>
|
| 772 |
-
" <td>
|
| 773 |
-
" <td>
|
| 774 |
-
" <td>
|
| 775 |
-
" <td>
|
| 776 |
-
" <td>
|
| 777 |
-
" <td>
|
| 778 |
" <td>...</td>\n",
|
| 779 |
" <td>-62</td>\n",
|
| 780 |
" <td>18</td>\n",
|
|
@@ -792,13 +822,13 @@
|
|
| 792 |
" <td>1194</td>\n",
|
| 793 |
" <td>2005</td>\n",
|
| 794 |
" <td>M</td>\n",
|
| 795 |
-
" <td>
|
| 796 |
-
" <td>
|
| 797 |
-
" <td>
|
| 798 |
-
" <td>
|
| 799 |
-
" <td>
|
| 800 |
-
" <td>
|
| 801 |
-
" <td>
|
| 802 |
" <td>...</td>\n",
|
| 803 |
" <td>-45</td>\n",
|
| 804 |
" <td>27</td>\n",
|
|
@@ -816,13 +846,13 @@
|
|
| 816 |
" <td>3270</td>\n",
|
| 817 |
" <td>2021</td>\n",
|
| 818 |
" <td>W</td>\n",
|
| 819 |
-
" <td>
|
| 820 |
-
" <td>
|
| 821 |
-
" <td>
|
| 822 |
-
" <td>
|
| 823 |
-
" <td>
|
| 824 |
-
" <td>
|
| 825 |
-
" <td>
|
| 826 |
" <td>...</td>\n",
|
| 827 |
" <td>-93</td>\n",
|
| 828 |
" <td>24</td>\n",
|
|
@@ -840,13 +870,13 @@
|
|
| 840 |
" <td>3240</td>\n",
|
| 841 |
" <td>2014</td>\n",
|
| 842 |
" <td>W</td>\n",
|
| 843 |
-
" <td>
|
| 844 |
-
" <td>
|
| 845 |
-
" <td>
|
| 846 |
-
" <td>
|
| 847 |
-
" <td>
|
| 848 |
-
" <td>
|
| 849 |
-
" <td>
|
| 850 |
" <td>...</td>\n",
|
| 851 |
" <td>-42</td>\n",
|
| 852 |
" <td>17</td>\n",
|
|
@@ -864,13 +894,13 @@
|
|
| 864 |
" <td>3452</td>\n",
|
| 865 |
" <td>2011</td>\n",
|
| 866 |
" <td>W</td>\n",
|
| 867 |
-
" <td>
|
| 868 |
-
" <td>
|
| 869 |
-
" <td>
|
| 870 |
-
" <td>
|
| 871 |
-
" <td>
|
| 872 |
-
" <td>
|
| 873 |
-
" <td>
|
| 874 |
" <td>...</td>\n",
|
| 875 |
" <td>-23</td>\n",
|
| 876 |
" <td>57</td>\n",
|
|
@@ -885,89 +915,70 @@
|
|
| 885 |
" </tr>\n",
|
| 886 |
" </tbody>\n",
|
| 887 |
"</table>\n",
|
| 888 |
-
"<p>10 rows ×
|
| 889 |
"</div>"
|
| 890 |
],
|
| 891 |
"text/plain": [
|
| 892 |
-
" TeamID Season League
|
| 893 |
-
"12348 3430 2012 W
|
| 894 |
-
"6900 1431 2018 M
|
| 895 |
-
"4406 1315 2014 M
|
| 896 |
-
"4233 1307 2005 M
|
| 897 |
-
"3407 1266 2008 M
|
| 898 |
-
"5190 1352 2016 M
|
| 899 |
-
"1892 1194 2005 M
|
| 900 |
-
"10020 3270 2021 W
|
| 901 |
-
"9567 3240 2014 W
|
| 902 |
-
"12617 3452 2011 W
|
| 903 |
"\n",
|
| 904 |
-
"
|
| 905 |
-
"12348
|
| 906 |
-
"6900
|
| 907 |
-
"4406
|
| 908 |
-
"4233
|
| 909 |
-
"3407
|
| 910 |
-
"5190
|
| 911 |
-
"1892
|
| 912 |
-
"10020
|
| 913 |
-
"9567
|
| 914 |
-
"12617
|
| 915 |
"\n",
|
| 916 |
-
" ScoreDiff max ScoreDiff std ScoreDiff median
|
| 917 |
-
"12348 35 16.997102 -1.0
|
| 918 |
-
"6900
|
| 919 |
-
"4406
|
| 920 |
-
"4233
|
| 921 |
-
"3407
|
| 922 |
-
"5190
|
| 923 |
-
"1892
|
| 924 |
-
"10020 24 27.245445 -15.0
|
| 925 |
-
"9567
|
| 926 |
-
"12617 57 18.777131 13.5
|
| 927 |
"\n",
|
| 928 |
-
" Win min Win max Win std Win median Win mean \n",
|
| 929 |
-
"12348 0 1 0.508548 0.0 0.482759 \n",
|
| 930 |
-
"6900
|
| 931 |
-
"4406
|
| 932 |
-
"4233
|
| 933 |
-
"3407
|
| 934 |
-
"5190
|
| 935 |
-
"1892
|
| 936 |
-
"10020 0 1 0.462910 0.0 0.285714 \n",
|
| 937 |
-
"9567
|
| 938 |
-
"12617 0 1 0.456803 1.0 0.718750 \n",
|
| 939 |
"\n",
|
| 940 |
-
"[10 rows x
|
| 941 |
]
|
| 942 |
},
|
| 943 |
-
"execution_count":
|
| 944 |
"metadata": {},
|
| 945 |
"output_type": "execute_result"
|
| 946 |
}
|
| 947 |
],
|
| 948 |
"source": [
|
| 949 |
-
"exclude_agg_cols = {\n",
|
| 950 |
-
" \"TeamID\",\n",
|
| 951 |
-
" \"Season\",\n",
|
| 952 |
-
" \"League\",\n",
|
| 953 |
-
" \"GameResult\",\n",
|
| 954 |
-
" \"OppLoc\",\n",
|
| 955 |
-
" \"TeamLoc\",\n",
|
| 956 |
-
"}\n",
|
| 957 |
-
"\n",
|
| 958 |
-
"agg_funcs = [\n",
|
| 959 |
-
" np.min,\n",
|
| 960 |
-
" np.max,\n",
|
| 961 |
-
" np.std,\n",
|
| 962 |
-
" np.median,\n",
|
| 963 |
-
" np.mean,\n",
|
| 964 |
-
"]\n",
|
| 965 |
-
"\n",
|
| 966 |
-
"# numeric_detailed_cols = detailed_team_results_df.select_dtypes(\"number\").columns\n",
|
| 967 |
-
"\n",
|
| 968 |
"team_reg_agg = (\n",
|
| 969 |
" detailed_team_results_df.groupby([\"TeamID\", \"Season\", \"League\"])\n",
|
| 970 |
-
" .agg({col: agg_funcs for col in detailed_team_results_df.select_dtypes(\"number\").columns})\n",
|
| 971 |
" .reset_index()\n",
|
| 972 |
")\n",
|
| 973 |
"\n",
|
|
@@ -975,13 +986,6 @@
|
|
| 975 |
"\n",
|
| 976 |
"team_reg_agg.sample(10, random_state=1)"
|
| 977 |
]
|
| 978 |
-
},
|
| 979 |
-
{
|
| 980 |
-
"cell_type": "code",
|
| 981 |
-
"execution_count": null,
|
| 982 |
-
"metadata": {},
|
| 983 |
-
"outputs": [],
|
| 984 |
-
"source": []
|
| 985 |
}
|
| 986 |
],
|
| 987 |
"metadata": {
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [],
|
| 8 |
"source": [
|
|
|
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"cell_type": "code",
|
| 18 |
+
"execution_count": 2,
|
| 19 |
"metadata": {},
|
| 20 |
"outputs": [
|
| 21 |
{
|
|
|
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"cell_type": "code",
|
| 88 |
+
"execution_count": 3,
|
| 89 |
"metadata": {},
|
| 90 |
"outputs": [],
|
| 91 |
"source": [
|
| 92 |
+
"# here we are making it such that each game has two rows, where each one is a team view of the game with\n",
|
| 93 |
+
"# opposing metrics.\n",
|
| 94 |
"\n",
|
| 95 |
"detailed_metrics = {\n",
|
| 96 |
" \"Score\",\n",
|
|
|
|
| 130 |
},
|
| 131 |
{
|
| 132 |
"cell_type": "code",
|
| 133 |
+
"execution_count": 4,
|
| 134 |
"metadata": {},
|
| 135 |
"outputs": [
|
| 136 |
{
|
|
|
|
| 328 |
"[5 rows x 36 columns]"
|
| 329 |
]
|
| 330 |
},
|
| 331 |
+
"execution_count": 4,
|
| 332 |
"metadata": {},
|
| 333 |
"output_type": "execute_result"
|
| 334 |
}
|
|
|
|
| 339 |
},
|
| 340 |
{
|
| 341 |
"cell_type": "code",
|
| 342 |
+
"execution_count": 5,
|
| 343 |
"metadata": {},
|
| 344 |
"outputs": [],
|
| 345 |
"source": [
|
|
|
|
| 351 |
},
|
| 352 |
{
|
| 353 |
"cell_type": "code",
|
| 354 |
+
"execution_count": 6,
|
| 355 |
"metadata": {},
|
| 356 |
"outputs": [
|
| 357 |
{
|
|
|
|
| 549 |
"[5 rows x 37 columns]"
|
| 550 |
]
|
| 551 |
},
|
| 552 |
+
"execution_count": 6,
|
| 553 |
"metadata": {},
|
| 554 |
"output_type": "execute_result"
|
| 555 |
}
|
|
|
|
| 560 |
},
|
| 561 |
{
|
| 562 |
"cell_type": "code",
|
| 563 |
+
"execution_count": 7,
|
| 564 |
"metadata": {},
|
| 565 |
"outputs": [
|
| 566 |
{
|
|
|
|
| 574 |
"Name: Win, dtype: int64"
|
| 575 |
]
|
| 576 |
},
|
| 577 |
+
"execution_count": 7,
|
| 578 |
"metadata": {},
|
| 579 |
"output_type": "execute_result"
|
| 580 |
}
|
|
|
|
| 597 |
},
|
| 598 |
{
|
| 599 |
"cell_type": "code",
|
| 600 |
+
"execution_count": 15,
|
| 601 |
"metadata": {},
|
| 602 |
+
"outputs": [],
|
| 603 |
+
"source": [
|
| 604 |
+
"exclude_agg_cols = {\n",
|
| 605 |
+
" \"TeamID\",\n",
|
| 606 |
+
" \"Season\",\n",
|
| 607 |
+
" \"League\",\n",
|
| 608 |
+
" \"GameResult\",\n",
|
| 609 |
+
" \"OppLoc\",\n",
|
| 610 |
+
" \"TeamLoc\",\n",
|
| 611 |
+
" \"Season\",\n",
|
| 612 |
+
" \"DayNum\",\n",
|
| 613 |
+
"}\n",
|
| 614 |
+
"\n",
|
| 615 |
+
"agg_funcs = [\n",
|
| 616 |
+
" np.min,\n",
|
| 617 |
+
" np.max,\n",
|
| 618 |
+
" np.std,\n",
|
| 619 |
+
" np.median,\n",
|
| 620 |
+
" np.mean,\n",
|
| 621 |
+
"]"
|
| 622 |
+
]
|
| 623 |
+
},
|
| 624 |
+
{
|
| 625 |
+
"cell_type": "code",
|
| 626 |
+
"execution_count": 16,
|
| 627 |
+
"metadata": {
|
| 628 |
+
"tags": []
|
| 629 |
+
},
|
| 630 |
"outputs": [
|
| 631 |
{
|
| 632 |
"data": {
|
|
|
|
| 652 |
" <th>TeamID</th>\n",
|
| 653 |
" <th>Season</th>\n",
|
| 654 |
" <th>League</th>\n",
|
| 655 |
+
" <th>TeamScore min</th>\n",
|
| 656 |
+
" <th>TeamScore max</th>\n",
|
| 657 |
+
" <th>TeamScore std</th>\n",
|
| 658 |
+
" <th>TeamScore median</th>\n",
|
| 659 |
+
" <th>TeamScore mean</th>\n",
|
| 660 |
+
" <th>OppScore min</th>\n",
|
| 661 |
+
" <th>OppScore max</th>\n",
|
| 662 |
" <th>...</th>\n",
|
| 663 |
" <th>ScoreDiff min</th>\n",
|
| 664 |
" <th>ScoreDiff max</th>\n",
|
|
|
|
| 678 |
" <td>3430</td>\n",
|
| 679 |
" <td>2012</td>\n",
|
| 680 |
" <td>W</td>\n",
|
| 681 |
+
" <td>41</td>\n",
|
| 682 |
+
" <td>78</td>\n",
|
| 683 |
+
" <td>10.808339</td>\n",
|
| 684 |
+
" <td>61.0</td>\n",
|
| 685 |
+
" <td>58.965517</td>\n",
|
| 686 |
+
" <td>36</td>\n",
|
| 687 |
+
" <td>85</td>\n",
|
| 688 |
" <td>...</td>\n",
|
| 689 |
" <td>-32</td>\n",
|
| 690 |
" <td>35</td>\n",
|
|
|
|
| 702 |
" <td>1431</td>\n",
|
| 703 |
" <td>2018</td>\n",
|
| 704 |
" <td>M</td>\n",
|
| 705 |
+
" <td>33</td>\n",
|
| 706 |
+
" <td>88</td>\n",
|
| 707 |
+
" <td>12.283247</td>\n",
|
| 708 |
+
" <td>67.0</td>\n",
|
| 709 |
+
" <td>66.466667</td>\n",
|
| 710 |
+
" <td>44</td>\n",
|
| 711 |
+
" <td>97</td>\n",
|
| 712 |
" <td>...</td>\n",
|
| 713 |
" <td>-49</td>\n",
|
| 714 |
" <td>29</td>\n",
|
|
|
|
| 726 |
" <td>1315</td>\n",
|
| 727 |
" <td>2014</td>\n",
|
| 728 |
" <td>M</td>\n",
|
| 729 |
+
" <td>43</td>\n",
|
| 730 |
+
" <td>95</td>\n",
|
| 731 |
+
" <td>10.019980</td>\n",
|
| 732 |
+
" <td>72.0</td>\n",
|
| 733 |
+
" <td>73.000000</td>\n",
|
| 734 |
+
" <td>61</td>\n",
|
| 735 |
+
" <td>103</td>\n",
|
| 736 |
" <td>...</td>\n",
|
| 737 |
" <td>-27</td>\n",
|
| 738 |
" <td>18</td>\n",
|
|
|
|
| 750 |
" <td>1307</td>\n",
|
| 751 |
" <td>2005</td>\n",
|
| 752 |
" <td>M</td>\n",
|
| 753 |
+
" <td>53</td>\n",
|
| 754 |
+
" <td>101</td>\n",
|
| 755 |
+
" <td>12.911860</td>\n",
|
| 756 |
+
" <td>77.0</td>\n",
|
| 757 |
+
" <td>75.870968</td>\n",
|
| 758 |
+
" <td>47</td>\n",
|
| 759 |
+
" <td>81</td>\n",
|
| 760 |
" <td>...</td>\n",
|
| 761 |
" <td>-17</td>\n",
|
| 762 |
" <td>34</td>\n",
|
|
|
|
| 774 |
" <td>1266</td>\n",
|
| 775 |
" <td>2008</td>\n",
|
| 776 |
" <td>M</td>\n",
|
| 777 |
+
" <td>51</td>\n",
|
| 778 |
+
" <td>100</td>\n",
|
| 779 |
+
" <td>11.841315</td>\n",
|
| 780 |
+
" <td>75.5</td>\n",
|
| 781 |
+
" <td>75.906250</td>\n",
|
| 782 |
+
" <td>37</td>\n",
|
| 783 |
+
" <td>89</td>\n",
|
| 784 |
" <td>...</td>\n",
|
| 785 |
" <td>-20</td>\n",
|
| 786 |
" <td>47</td>\n",
|
|
|
|
| 798 |
" <td>1352</td>\n",
|
| 799 |
" <td>2016</td>\n",
|
| 800 |
" <td>M</td>\n",
|
| 801 |
+
" <td>44</td>\n",
|
| 802 |
+
" <td>89</td>\n",
|
| 803 |
+
" <td>10.298567</td>\n",
|
| 804 |
+
" <td>67.0</td>\n",
|
| 805 |
+
" <td>65.062500</td>\n",
|
| 806 |
+
" <td>45</td>\n",
|
| 807 |
+
" <td>106</td>\n",
|
| 808 |
" <td>...</td>\n",
|
| 809 |
" <td>-62</td>\n",
|
| 810 |
" <td>18</td>\n",
|
|
|
|
| 822 |
" <td>1194</td>\n",
|
| 823 |
" <td>2005</td>\n",
|
| 824 |
" <td>M</td>\n",
|
| 825 |
+
" <td>45</td>\n",
|
| 826 |
+
" <td>104</td>\n",
|
| 827 |
+
" <td>14.194618</td>\n",
|
| 828 |
+
" <td>76.0</td>\n",
|
| 829 |
+
" <td>76.777778</td>\n",
|
| 830 |
+
" <td>59</td>\n",
|
| 831 |
+
" <td>107</td>\n",
|
| 832 |
" <td>...</td>\n",
|
| 833 |
" <td>-45</td>\n",
|
| 834 |
" <td>27</td>\n",
|
|
|
|
| 846 |
" <td>3270</td>\n",
|
| 847 |
" <td>2021</td>\n",
|
| 848 |
" <td>W</td>\n",
|
| 849 |
+
" <td>24</td>\n",
|
| 850 |
+
" <td>80</td>\n",
|
| 851 |
+
" <td>13.385137</td>\n",
|
| 852 |
+
" <td>53.0</td>\n",
|
| 853 |
+
" <td>55.476190</td>\n",
|
| 854 |
+
" <td>41</td>\n",
|
| 855 |
+
" <td>117</td>\n",
|
| 856 |
" <td>...</td>\n",
|
| 857 |
" <td>-93</td>\n",
|
| 858 |
" <td>24</td>\n",
|
|
|
|
| 870 |
" <td>3240</td>\n",
|
| 871 |
" <td>2014</td>\n",
|
| 872 |
" <td>W</td>\n",
|
| 873 |
+
" <td>43</td>\n",
|
| 874 |
+
" <td>84</td>\n",
|
| 875 |
+
" <td>11.319009</td>\n",
|
| 876 |
+
" <td>62.5</td>\n",
|
| 877 |
+
" <td>63.593750</td>\n",
|
| 878 |
+
" <td>45</td>\n",
|
| 879 |
+
" <td>100</td>\n",
|
| 880 |
" <td>...</td>\n",
|
| 881 |
" <td>-42</td>\n",
|
| 882 |
" <td>17</td>\n",
|
|
|
|
| 894 |
" <td>3452</td>\n",
|
| 895 |
" <td>2011</td>\n",
|
| 896 |
" <td>W</td>\n",
|
| 897 |
+
" <td>39</td>\n",
|
| 898 |
+
" <td>90</td>\n",
|
| 899 |
+
" <td>12.518374</td>\n",
|
| 900 |
+
" <td>65.0</td>\n",
|
| 901 |
+
" <td>65.750000</td>\n",
|
| 902 |
+
" <td>21</td>\n",
|
| 903 |
+
" <td>79</td>\n",
|
| 904 |
" <td>...</td>\n",
|
| 905 |
" <td>-23</td>\n",
|
| 906 |
" <td>57</td>\n",
|
|
|
|
| 915 |
" </tr>\n",
|
| 916 |
" </tbody>\n",
|
| 917 |
"</table>\n",
|
| 918 |
+
"<p>10 rows × 158 columns</p>\n",
|
| 919 |
"</div>"
|
| 920 |
],
|
| 921 |
"text/plain": [
|
| 922 |
+
" TeamID Season League TeamScore min TeamScore max TeamScore std \\\n",
|
| 923 |
+
"12348 3430 2012 W 41 78 10.808339 \n",
|
| 924 |
+
"6900 1431 2018 M 33 88 12.283247 \n",
|
| 925 |
+
"4406 1315 2014 M 43 95 10.019980 \n",
|
| 926 |
+
"4233 1307 2005 M 53 101 12.911860 \n",
|
| 927 |
+
"3407 1266 2008 M 51 100 11.841315 \n",
|
| 928 |
+
"5190 1352 2016 M 44 89 10.298567 \n",
|
| 929 |
+
"1892 1194 2005 M 45 104 14.194618 \n",
|
| 930 |
+
"10020 3270 2021 W 24 80 13.385137 \n",
|
| 931 |
+
"9567 3240 2014 W 43 84 11.319009 \n",
|
| 932 |
+
"12617 3452 2011 W 39 90 12.518374 \n",
|
| 933 |
"\n",
|
| 934 |
+
" TeamScore median TeamScore mean OppScore min OppScore max ... \\\n",
|
| 935 |
+
"12348 61.0 58.965517 36 85 ... \n",
|
| 936 |
+
"6900 67.0 66.466667 44 97 ... \n",
|
| 937 |
+
"4406 72.0 73.000000 61 103 ... \n",
|
| 938 |
+
"4233 77.0 75.870968 47 81 ... \n",
|
| 939 |
+
"3407 75.5 75.906250 37 89 ... \n",
|
| 940 |
+
"5190 67.0 65.062500 45 106 ... \n",
|
| 941 |
+
"1892 76.0 76.777778 59 107 ... \n",
|
| 942 |
+
"10020 53.0 55.476190 41 117 ... \n",
|
| 943 |
+
"9567 62.5 63.593750 45 100 ... \n",
|
| 944 |
+
"12617 65.0 65.750000 21 79 ... \n",
|
| 945 |
"\n",
|
| 946 |
+
" ScoreDiff min ScoreDiff max ScoreDiff std ScoreDiff median \\\n",
|
| 947 |
+
"12348 -32 35 16.997102 -1.0 \n",
|
| 948 |
+
"6900 -49 29 14.772645 -5.0 \n",
|
| 949 |
+
"4406 -27 18 12.316786 -2.0 \n",
|
| 950 |
+
"4233 -17 34 13.022891 11.0 \n",
|
| 951 |
+
"3407 -20 47 17.828682 10.0 \n",
|
| 952 |
+
"5190 -62 18 14.365582 -7.0 \n",
|
| 953 |
+
"1892 -45 27 14.449736 -3.0 \n",
|
| 954 |
+
"10020 -93 24 27.245445 -15.0 \n",
|
| 955 |
+
"9567 -42 17 13.277095 -2.0 \n",
|
| 956 |
+
"12617 -23 57 18.777131 13.5 \n",
|
| 957 |
"\n",
|
| 958 |
+
" ScoreDiff mean Win min Win max Win std Win median Win mean \n",
|
| 959 |
+
"12348 -2.517241 0 1 0.508548 0.0 0.482759 \n",
|
| 960 |
+
"6900 -5.100000 0 1 0.479463 0.0 0.333333 \n",
|
| 961 |
+
"4406 -2.645161 0 1 0.508001 0.0 0.483871 \n",
|
| 962 |
+
"4233 10.935484 0 1 0.401610 1.0 0.806452 \n",
|
| 963 |
+
"3407 11.593750 0 1 0.456803 1.0 0.718750 \n",
|
| 964 |
+
"5190 -5.781250 0 1 0.470929 0.0 0.312500 \n",
|
| 965 |
+
"1892 -1.888889 0 1 0.492103 0.0 0.370370 \n",
|
| 966 |
+
"10020 -14.285714 0 1 0.462910 0.0 0.285714 \n",
|
| 967 |
+
"9567 -4.093750 0 1 0.504016 0.0 0.437500 \n",
|
| 968 |
+
"12617 13.500000 0 1 0.456803 1.0 0.718750 \n",
|
| 969 |
"\n",
|
| 970 |
+
"[10 rows x 158 columns]"
|
| 971 |
]
|
| 972 |
},
|
| 973 |
+
"execution_count": 16,
|
| 974 |
"metadata": {},
|
| 975 |
"output_type": "execute_result"
|
| 976 |
}
|
| 977 |
],
|
| 978 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 979 |
"team_reg_agg = (\n",
|
| 980 |
" detailed_team_results_df.groupby([\"TeamID\", \"Season\", \"League\"])\n",
|
| 981 |
+
" .agg({col: agg_funcs for col in detailed_team_results_df.select_dtypes(\"number\").columns if col not in exclude_agg_cols})\n",
|
| 982 |
" .reset_index()\n",
|
| 983 |
")\n",
|
| 984 |
"\n",
|
|
|
|
| 986 |
"\n",
|
| 987 |
"team_reg_agg.sample(10, random_state=1)"
|
| 988 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 989 |
}
|
| 990 |
],
|
| 991 |
"metadata": {
|