USC-Applied-NLP-Group
/

SQL-Generation

TensorBoard

Safetensors

Model card Files Files and versions

xet

Metrics Training metrics Community

DeanGumas commited on Apr 2, 2025

Commit

a30f35d

1 Parent(s): 9f2b199

Adding completed pre-training testing runs to python notebook

Browse files

Files changed (1) hide show

test_pretrained.ipynb +293 -38

test_pretrained.ipynb CHANGED Viewed

@@ -26,9 +26,9 @@
       "Total dataset examples: 1044\n",
       "\n",
       "\n",
-      "What was the combined rebound total for the Toronto Raptors and Brooklyn Nets in their highest scoring game against each other?\n",
-      "SELECT MAX(g.pts_home + g.pts_away) AS total_points,        g.reb_home + g.reb_away AS total_rebounds FROM game g WHERE (g.team_name_home = 'Toronto Raptors' AND g.team_name_away = 'Brooklyn Nets')    OR (g.team_name_home = 'Brooklyn Nets' AND g.team_name_away = 'Toronto Raptors') ORDER BY total_points DESC LIMIT 1;\n",
-      "272.0 | 101.0 \n"
      ]
     }
    ],
@@ -83,7 +83,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -287,10 +287,9 @@
      "output_type": "stream",
      "text": [
       "SQLite:\n",
-      "SELECT SUM(reb_home + reb_away) AS combined_rebounds\n",
-      "FROM game\n",
-      "WHERE (team_name_home = 'Toronto Raptors' AND team_name_away = 'Brooklyn Nets')\n",
-      "OR (team_name_home = 'Brooklyn Nets' AND team_name_away = 'Toronto Raptors');\n",
       "\n"
      ]
     }
@@ -323,7 +322,7 @@
      "output_type": "stream",
      "text": [
       "cleaned\n",
-      "(4350.0,)\n"
      ]
     }
    ],
@@ -368,14 +367,15 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "What was the three-point shooting percentage for the Los Angeles Clippers in games against the Los Angeles Lakers?\n",
-      "SELECT AVG(   CASE      WHEN team_name_home = 'LA Clippers' THEN fg3_pct_home     ELSE fg3_pct_away   END ) AS avg_3pt_percentage FROM game WHERE (team_name_home = 'LA Clippers' AND team_name_away = 'Los Angeles Lakers')    OR (team_name_home = 'Los Angeles Lakers' AND team_name_away = 'LA Clippers');\n",
-      "0.3734705882\n",
       "SQLite:\n",
-      "SELECT team_name_home, team_name_away, AVG(fg3_pct_home) AS three_point_percentage\n",
-      "FROM game\n",
-      "WHERE team_name_home = 'Los Angeles Clippers' AND team_name_away = 'Los Angeles Lakers'\n",
-      "GROUP BY team_name_home, team_name_away;\n",
       "\n",
       "Statement valid? True\n",
       "SQLite matched? False\n",
@@ -508,20 +508,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Less than 90 results:\n",
-      "Percent valid: 0.0653061224489796\n",
-      "Percent SQLite matched: 0.00816326530612245\n",
-      "Percent result matched: 0.024489795918367346\n"
-     ]
-    }
-   ],
    "source": [
     "def run_evaluation(nba_df, title):\n",
     "    counter = 0\n",
@@ -550,27 +539,293 @@
     "        counter += 1\n",
     "        if counter % 50 == 0:\n",
     "            print(\"Completed \" + str(counter))\n",
-    "        elif counter == 20:\n",
-    "            break\n",
     "\n",
     "    # Print evaluation results\n",
-    "    print(title + \" results:\")\n",
     "    print(\"Percent valid: \" + str(num_valid / len(nba_df)))\n",
     "    print(\"Percent SQLite matched: \" + str(num_sql_matched / len(nba_df)))\n",
-    "    print(\"Percent result matched: \" + str(num_result_matched / len(nba_df)))\n",
-    "\n",
     "less_than_90_df = pd.read_csv(\"./train-data/less_than_90.tsv\", sep='\\t')\n",
     "run_evaluation(less_than_90_df, \"Less than 90\")\n",
-    "\n",
-    "# Run evaluation on all training data\n",
-    "#run_evaluation(df, \"All training data\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Evaluate on less than 90 dataset"
    ]
   }
  ],

       "Total dataset examples: 1044\n",
       "\n",
       "\n",
+      "What is the average number of tov in home games by the Miami Heat?\n",
+      "SELECT AVG(tov_home) FROM game WHERE team_name_home = 'Miami Heat';\n",
+      "14.627184466019418\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
      "output_type": "stream",
      "text": [
       "SQLite:\n",
+      "SELECT AVG(tov_home) \n",
+      "FROM game \n",
+      "WHERE team_name_home = 'Miami Heat';\n",
       "\n"
      ]
     }
      "output_type": "stream",
      "text": [
       "cleaned\n",
+      "(14.627184466019418,)\n"
      ]
     }
    ],
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "How many times have the Houston Rockets won an away game while scoring at least 110 points?\n",
+      "SELECT COUNT(*)  FROM game  WHERE team_abbreviation_away = 'HOU'  AND pts_away >= 110  AND wl_away = 'W';\n",
+      "425\n",
       "SQLite:\n",
+      "SELECT COUNT(*) \n",
+      "FROM game \n",
+      "WHERE team_name_away = 'Houston Rockets' \n",
+      "AND wl_away = 'W' \n",
+      "AND pts_away >= 110;\n",
       "\n",
       "Statement valid? True\n",
       "SQLite matched? False\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
+   "outputs": [],
    "source": [
     "def run_evaluation(nba_df, title):\n",
     "    counter = 0\n",
     "        counter += 1\n",
     "        if counter % 50 == 0:\n",
     "            print(\"Completed \" + str(counter))\n",
     "\n",
     "    # Print evaluation results\n",
+    "    print(\"\\n\" + title + \" results:\")\n",
     "    print(\"Percent valid: \" + str(num_valid / len(nba_df)))\n",
     "    print(\"Percent SQLite matched: \" + str(num_sql_matched / len(nba_df)))\n",
+    "    print(\"Percent result matched: \" + str(num_result_matched / len(nba_df)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Evaluate on less than 90 dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completed 50\n",
+      "Completed 100\n",
+      "Completed 150\n",
+      "Completed 200\n",
+      "\n",
+      "Less than 90 results:\n",
+      "Percent valid: 0.8612244897959184\n",
+      "Percent SQLite matched: 0.4163265306122449\n",
+      "Percent result matched: 0.6530612244897959\n",
+      "Dataset length: 245\n"
+     ]
+    }
+   ],
+   "source": [
     "less_than_90_df = pd.read_csv(\"./train-data/less_than_90.tsv\", sep='\\t')\n",
     "run_evaluation(less_than_90_df, \"Less than 90\")\n",
+    "print(\"Dataset length: \" + str(len(less_than_90_df)))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "# Evaluate on game table queries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completed 50\n",
+      "Completed 100\n",
+      "Completed 150\n",
+      "Completed 200\n",
+      "Completed 250\n",
+      "Completed 300\n",
+      "Completed 350\n",
+      "Completed 400\n",
+      "Completed 450\n",
+      "Completed 500\n",
+      "Completed 550\n",
+      "Completed 600\n",
+      "Completed 650\n",
+      "Completed 700\n",
+      "Completed 750\n",
+      "Completed 800\n",
+      "\n",
+      "Queries from game results:\n",
+      "Percent valid: 0.7708830548926014\n",
+      "Percent SQLite matched: 0.1431980906921241\n",
+      "Percent result matched: 0.40692124105011934\n",
+      "Dataset length: 838\n"
+     ]
+    }
+   ],
+   "source": [
+    "game_queries = pd.read_csv(\"./train-data/queries_from_game.tsv\", sep='\\t')\n",
+    "run_evaluation(game_queries, \"Queries from game\")\n",
+    "print(\"Dataset length: \" + str(len(game_queries)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluate on other stats queries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completed 50\n",
+      "Completed 100\n",
+      "Completed 150\n",
+      "\n",
+      "Queries from other stats results:\n",
+      "Percent valid: 0.07792207792207792\n",
+      "Percent SQLite matched: 0.0\n",
+      "Percent result matched: 0.0\n",
+      "Dataset length: 154\n"
+     ]
+    }
+   ],
+   "source": [
+    "other_stats_queries = pd.read_csv(\"./train-data/queries_from_other_stats.tsv\", sep='\\t')\n",
+    "run_evaluation(other_stats_queries, \"Queries from other stats\")\n",
+    "print(\"Dataset length: \" + str(len(other_stats_queries)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluate on team queries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completed 50\n",
+      "\n",
+      "Queries from team results:\n",
+      "Percent valid: 0.75\n",
+      "Percent SQLite matched: 0.2692307692307692\n",
+      "Percent result matched: 0.6153846153846154\n",
+      "Dataset length: 52\n"
+     ]
+    }
+   ],
+   "source": [
+    "team_queries = pd.read_csv(\"./train-data/queries_from_team.tsv\", sep='\\t')\n",
+    "run_evaluation(team_queries, \"Queries from team\")\n",
+    "print(\"Dataset length: \" + str(len(team_queries)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluate on queries requiring join statements"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completed 50\n",
+      "Completed 100\n",
+      "Completed 150\n",
+      "\n",
+      "Queries with join results:\n",
+      "Percent valid: 0.06486486486486487\n",
+      "Percent SQLite matched: 0.0\n",
+      "Percent result matched: 0.010810810810810811\n",
+      "Dataset length: 185\n"
+     ]
+    }
+   ],
+   "source": [
+    "join_queries = pd.read_csv(\"./train-data/with_join.tsv\", sep='\\t')\n",
+    "run_evaluation(join_queries, \"Queries with join\")\n",
+    "print(\"Dataset length: \" + str(len(join_queries)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluate on queries not requiring join statements"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completed 50\n",
+      "Completed 100\n",
+      "Completed 150\n",
+      "Completed 200\n",
+      "Completed 250\n",
+      "Completed 300\n",
+      "Completed 350\n",
+      "Completed 400\n",
+      "Completed 450\n",
+      "Completed 500\n",
+      "Completed 550\n",
+      "Completed 600\n",
+      "Completed 650\n",
+      "Completed 700\n",
+      "Completed 750\n",
+      "Completed 800\n",
+      "Completed 850\n",
+      "\n",
+      "Queries without join results:\n",
+      "Percent valid: 0.7974388824214202\n",
+      "Percent SQLite matched: 0.1559953434225844\n",
+      "Percent result matched: 0.4318975552968568\n",
+      "Dataset length: 859\n"
+     ]
+    }
+   ],
+   "source": [
+    "no_join_queries = pd.read_csv(\"./train-data/without_join.tsv\", sep='\\t')\n",
+    "run_evaluation(no_join_queries, \"Queries without join\")\n",
+    "print(\"Dataset length: \" + str(len(no_join_queries)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluate on full training dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completed 50\n",
+      "Completed 100\n",
+      "Completed 150\n",
+      "Completed 200\n",
+      "Completed 250\n",
+      "Completed 300\n",
+      "Completed 350\n",
+      "Completed 400\n",
+      "Completed 450\n",
+      "Completed 500\n",
+      "Completed 550\n",
+      "Completed 600\n",
+      "Completed 650\n",
+      "Completed 700\n",
+      "Completed 750\n",
+      "Completed 800\n",
+      "Completed 850\n",
+      "Completed 900\n",
+      "Completed 950\n",
+      "Completed 1000\n",
+      "\n",
+      "All training data results:\n",
+      "Percent valid: 0.6676245210727969\n",
+      "Percent SQLite matched: 0.12835249042145594\n",
+      "Percent result matched: 0.35823754789272033\n",
+      "Dataset length: 1044\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Run evaluation on all training data\n",
+    "run_evaluation(df, \"All training data\")\n",
+    "print(\"Dataset length: \" + str(len(df)))"
    ]
   }
  ],