updating prompt to include team name and abbreviations
Browse files- test_pretrained.ipynb +73 -28
test_pretrained.ipynb
CHANGED
|
@@ -16,7 +16,7 @@
|
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"cell_type": "code",
|
| 19 |
-
"execution_count":
|
| 20 |
"metadata": {},
|
| 21 |
"outputs": [
|
| 22 |
{
|
|
@@ -26,9 +26,9 @@
|
|
| 26 |
"Total dataset examples: 1044\n",
|
| 27 |
"\n",
|
| 28 |
"\n",
|
| 29 |
-
"
|
| 30 |
-
"SELECT
|
| 31 |
-
"
|
| 32 |
]
|
| 33 |
}
|
| 34 |
],
|
|
@@ -56,9 +56,18 @@
|
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"cell_type": "code",
|
| 59 |
-
"execution_count":
|
| 60 |
"metadata": {},
|
| 61 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
"source": [
|
| 63 |
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
| 64 |
"import torch\n",
|
|
@@ -80,7 +89,7 @@
|
|
| 80 |
},
|
| 81 |
{
|
| 82 |
"cell_type": "code",
|
| 83 |
-
"execution_count":
|
| 84 |
"metadata": {},
|
| 85 |
"outputs": [],
|
| 86 |
"source": [
|
|
@@ -189,6 +198,44 @@
|
|
| 189 |
");\n",
|
| 190 |
"\n",
|
| 191 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
"Query Guidelines\n",
|
| 193 |
"Use team_name_home and team_name_away to match teams.\n",
|
| 194 |
"\n",
|
|
@@ -238,7 +285,7 @@
|
|
| 238 |
},
|
| 239 |
{
|
| 240 |
"cell_type": "code",
|
| 241 |
-
"execution_count":
|
| 242 |
"metadata": {},
|
| 243 |
"outputs": [
|
| 244 |
{
|
|
@@ -248,7 +295,10 @@
|
|
| 248 |
"c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\generation\\configuration_utils.py:634: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.95` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
|
| 249 |
" warnings.warn(\n",
|
| 250 |
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
| 251 |
-
"Setting `pad_token_id` to `eos_token_id`:32021 for open-end generation.\n"
|
|
|
|
|
|
|
|
|
|
| 252 |
]
|
| 253 |
},
|
| 254 |
{
|
|
@@ -256,9 +306,9 @@
|
|
| 256 |
"output_type": "stream",
|
| 257 |
"text": [
|
| 258 |
"SQLite:\n",
|
| 259 |
-
"SELECT
|
| 260 |
-
"FROM
|
| 261 |
-
"WHERE
|
| 262 |
"\n"
|
| 263 |
]
|
| 264 |
}
|
|
@@ -283,7 +333,7 @@
|
|
| 283 |
},
|
| 284 |
{
|
| 285 |
"cell_type": "code",
|
| 286 |
-
"execution_count":
|
| 287 |
"metadata": {},
|
| 288 |
"outputs": [
|
| 289 |
{
|
|
@@ -291,11 +341,7 @@
|
|
| 291 |
"output_type": "stream",
|
| 292 |
"text": [
|
| 293 |
"cleaned\n",
|
| 294 |
-
"(
|
| 295 |
-
"('Miami Heat',)\n",
|
| 296 |
-
"('Minnesota Timberwolves',)\n",
|
| 297 |
-
"('Orlando Magic',)\n",
|
| 298 |
-
"('Charlotte Hornets',)\n"
|
| 299 |
]
|
| 300 |
}
|
| 301 |
],
|
|
@@ -329,7 +375,7 @@
|
|
| 329 |
},
|
| 330 |
{
|
| 331 |
"cell_type": "code",
|
| 332 |
-
"execution_count":
|
| 333 |
"metadata": {},
|
| 334 |
"outputs": [
|
| 335 |
{
|
|
@@ -344,18 +390,17 @@
|
|
| 344 |
"name": "stdout",
|
| 345 |
"output_type": "stream",
|
| 346 |
"text": [
|
| 347 |
-
"
|
| 348 |
-
"SELECT
|
| 349 |
-
"
|
| 350 |
"SQLite:\n",
|
| 351 |
-
"SELECT
|
| 352 |
"FROM game \n",
|
| 353 |
-
"WHERE
|
| 354 |
-
"AND season_id = '2196' \n",
|
| 355 |
-
"AND pts_fb_home > 15;\n",
|
| 356 |
"\n",
|
| 357 |
-
"
|
| 358 |
-
"
|
|
|
|
| 359 |
]
|
| 360 |
}
|
| 361 |
],
|
|
|
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"cell_type": "code",
|
| 19 |
+
"execution_count": 1,
|
| 20 |
"metadata": {},
|
| 21 |
"outputs": [
|
| 22 |
{
|
|
|
|
| 26 |
"Total dataset examples: 1044\n",
|
| 27 |
"\n",
|
| 28 |
"\n",
|
| 29 |
+
"What is the highest combined pts in any game involving the Miami Heat?\n",
|
| 30 |
+
"SELECT MAX(pts_home + pts_away) FROM game WHERE team_name_home = 'Miami Heat' OR team_name_away = 'Miami Heat';\n",
|
| 31 |
+
"290.0\n"
|
| 32 |
]
|
| 33 |
}
|
| 34 |
],
|
|
|
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"cell_type": "code",
|
| 59 |
+
"execution_count": 2,
|
| 60 |
"metadata": {},
|
| 61 |
+
"outputs": [
|
| 62 |
+
{
|
| 63 |
+
"name": "stderr",
|
| 64 |
+
"output_type": "stream",
|
| 65 |
+
"text": [
|
| 66 |
+
"c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 67 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 68 |
+
]
|
| 69 |
+
}
|
| 70 |
+
],
|
| 71 |
"source": [
|
| 72 |
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
| 73 |
"import torch\n",
|
|
|
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"cell_type": "code",
|
| 92 |
+
"execution_count": 3,
|
| 93 |
"metadata": {},
|
| 94 |
"outputs": [],
|
| 95 |
"source": [
|
|
|
|
| 198 |
");\n",
|
| 199 |
"\n",
|
| 200 |
"\n",
|
| 201 |
+
"Team Name Information\n",
|
| 202 |
+
"In the plaintext user questions, only the full team names will be used, but in the queries you may use the full team names or the abbreviations. \n",
|
| 203 |
+
"The full team names can be used with the game table, while the abbreviations should be used with the other_stats table.\n",
|
| 204 |
+
"Notice they are separated by the | character in the following list:\n",
|
| 205 |
+
"\n",
|
| 206 |
+
"Atlanta Hawks|ATL\n",
|
| 207 |
+
"Boston Celtics|BOS\n",
|
| 208 |
+
"Cleveland Cavaliers|CLE\n",
|
| 209 |
+
"New Orleans Pelicans|NOP\n",
|
| 210 |
+
"Chicago Bulls|CHI\n",
|
| 211 |
+
"Dallas Mavericks|DAL\n",
|
| 212 |
+
"Denver Nuggets|DEN\n",
|
| 213 |
+
"Golden State Warriors|GSW\n",
|
| 214 |
+
"Houston Rockets|HOU\n",
|
| 215 |
+
"Los Angeles Clippers|LAC\n",
|
| 216 |
+
"Los Angeles Lakers|LAL\n",
|
| 217 |
+
"Miami Heat|MIA\n",
|
| 218 |
+
"Milwaukee Bucks|MIL\n",
|
| 219 |
+
"Minnesota Timberwolves|MIN\n",
|
| 220 |
+
"Brooklyn Nets|BKN\n",
|
| 221 |
+
"New York Knicks|NYK\n",
|
| 222 |
+
"Orlando Magic|ORL\n",
|
| 223 |
+
"Indiana Pacers|IND\n",
|
| 224 |
+
"Philadelphia 76ers|PHI\n",
|
| 225 |
+
"Phoenix Suns|PHX\n",
|
| 226 |
+
"Portland Trail Blazers|POR\n",
|
| 227 |
+
"Sacramento Kings|SAC\n",
|
| 228 |
+
"San Antonio Spurs|SAS\n",
|
| 229 |
+
"Oklahoma City Thunder|OKC\n",
|
| 230 |
+
"Toronto Raptors|TOR\n",
|
| 231 |
+
"Utah Jazz|UTA\n",
|
| 232 |
+
"Memphis Grizzlies|MEM\n",
|
| 233 |
+
"Washington Wizards|WAS\n",
|
| 234 |
+
"Detroit Pistons|DET\n",
|
| 235 |
+
"Charlotte Hornets|CHA\n",
|
| 236 |
+
"\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"\n",
|
| 239 |
"Query Guidelines\n",
|
| 240 |
"Use team_name_home and team_name_away to match teams.\n",
|
| 241 |
"\n",
|
|
|
|
| 285 |
},
|
| 286 |
{
|
| 287 |
"cell_type": "code",
|
| 288 |
+
"execution_count": 4,
|
| 289 |
"metadata": {},
|
| 290 |
"outputs": [
|
| 291 |
{
|
|
|
|
| 295 |
"c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\generation\\configuration_utils.py:634: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.95` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
|
| 296 |
" warnings.warn(\n",
|
| 297 |
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
| 298 |
+
"Setting `pad_token_id` to `eos_token_id`:32021 for open-end generation.\n",
|
| 299 |
+
"The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
| 300 |
+
"c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\integrations\\sdpa_attention.py:53: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n",
|
| 301 |
+
" attn_output = torch.nn.functional.scaled_dot_product_attention(\n"
|
| 302 |
]
|
| 303 |
},
|
| 304 |
{
|
|
|
|
| 306 |
"output_type": "stream",
|
| 307 |
"text": [
|
| 308 |
"SQLite:\n",
|
| 309 |
+
"SELECT MAX(pts_home + pts_away) \n",
|
| 310 |
+
"FROM game \n",
|
| 311 |
+
"WHERE (team_name_home = 'Miami Heat' OR team_name_away = 'Miami Heat');\n",
|
| 312 |
"\n"
|
| 313 |
]
|
| 314 |
}
|
|
|
|
| 333 |
},
|
| 334 |
{
|
| 335 |
"cell_type": "code",
|
| 336 |
+
"execution_count": 5,
|
| 337 |
"metadata": {},
|
| 338 |
"outputs": [
|
| 339 |
{
|
|
|
|
| 341 |
"output_type": "stream",
|
| 342 |
"text": [
|
| 343 |
"cleaned\n",
|
| 344 |
+
"(290.0,)\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
]
|
| 346 |
}
|
| 347 |
],
|
|
|
|
| 375 |
},
|
| 376 |
{
|
| 377 |
"cell_type": "code",
|
| 378 |
+
"execution_count": 16,
|
| 379 |
"metadata": {},
|
| 380 |
"outputs": [
|
| 381 |
{
|
|
|
|
| 390 |
"name": "stdout",
|
| 391 |
"output_type": "stream",
|
| 392 |
"text": [
|
| 393 |
+
"What is the average number of reb in away games by the Detroit Pistons?\n",
|
| 394 |
+
"SELECT AVG(reb_away) FROM game WHERE team_name_away = 'Detroit Pistons';\n",
|
| 395 |
+
"42.10948081264108\n",
|
| 396 |
"SQLite:\n",
|
| 397 |
+
"SELECT AVG(reb_away) \n",
|
| 398 |
"FROM game \n",
|
| 399 |
+
"WHERE team_name_away = 'Detroit Pistons';\n",
|
|
|
|
|
|
|
| 400 |
"\n",
|
| 401 |
+
"[(42.10948081264108,)]\n",
|
| 402 |
+
"SQL matched? True\n",
|
| 403 |
+
"Result matched? True\n"
|
| 404 |
]
|
| 405 |
}
|
| 406 |
],
|