USC-Applied-NLP-Group
/

SQL-Generation

TensorBoard

Safetensors

Model card Files Files and versions

xet

Metrics Training metrics Community

DeanGumas commited on Apr 2, 2025

Commit

9f2b199

1 Parent(s): 88abe86

Created evaluation loop for running on full dataframes

Browse files

Files changed (1) hide show

test_pretrained.ipynb +106 -64

test_pretrained.ipynb CHANGED Viewed

@@ -26,14 +26,16 @@
       "Total dataset examples: 1044\n",
       "\n",
       "\n",
-      "What was the largest lead the Golden State Warriors had in a game during the 2018 season?\n",
-      "SELECT MAX(other_stats.largest_lead_home)  FROM other_stats  JOIN game ON other_stats.game_id = game.game_id  WHERE game.team_name_home = 'Golden State Warriors'  AND game.season_id = '22018';\n",
-      "44\n"
      ]
     }
    ],
    "source": [
     "import pandas as pd \n",
     "\n",
     "# Load dataset and check length\n",
     "df = pd.read_csv(\"./train-data/sql_train.tsv\", sep='\\t')\n",
@@ -58,16 +60,7 @@
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
    "source": [
     "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
     "import torch\n",
@@ -77,7 +70,8 @@
     "\n",
     "# Load model and tokenizer\n",
     "tokenizer = AutoTokenizer.from_pretrained(\"./deepseek-coder-1.3b-instruct\")\n",
-    "model = AutoModelForCausalLM.from_pretrained(\"./deepseek-coder-1.3b-instruct\", torch_dtype=torch.bfloat16, device_map=device) "
    ]
   },
   {
@@ -288,27 +282,15 @@
    "execution_count": 4,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\generation\\configuration_utils.py:634: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.95` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
-      "  warnings.warn(\n",
-      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
-      "Setting `pad_token_id` to `eos_token_id`:32021 for open-end generation.\n",
-      "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
-      "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\integrations\\sdpa_attention.py:53: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n",
-      "  attn_output = torch.nn.functional.scaled_dot_product_attention(\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "SQLite:\n",
-      "SELECT MAX(largest_lead_home) \n",
-      "FROM other_stats \n",
-      "WHERE team_name_home = 'Golden State Warriors' AND season_id = '22018';\n",
       "\n"
      ]
     }
@@ -340,18 +322,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "cleaned\n"
-     ]
-    },
-    {
-     "ename": "OperationalError",
-     "evalue": "no such column: team_name_home",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mOperationalError\u001b[0m                          Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[5], line 15\u001b[0m\n\u001b[0;32m     13\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m     14\u001b[0m     query \u001b[38;5;241m=\u001b[39m query_output\n\u001b[1;32m---> 15\u001b[0m \u001b[43mcursor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     16\u001b[0m rows \u001b[38;5;241m=\u001b[39m cursor\u001b[38;5;241m.\u001b[39mfetchall()\n\u001b[0;32m     17\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m row \u001b[38;5;129;01min\u001b[39;00m rows:\n",
-      "\u001b[1;31mOperationalError\u001b[0m: no such column: team_name_home"
      ]
     }
    ],
@@ -370,10 +342,14 @@
     "    query = query_output[4:]\n",
     "else:\n",
     "    query = query_output\n",
-    "cursor.execute(query)\n",
-    "rows = cursor.fetchall()\n",
-    "for row in rows:\n",
-    "    print(row)"
    ]
   },
   {
@@ -385,30 +361,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
-      "Setting `pad_token_id` to `eos_token_id`:32021 for open-end generation.\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "What is the total number of assists by the Chicago Bulls at home?\n",
-      "SELECT SUM(ast_home) as total_assists  FROM game  WHERE team_name_home = 'Chicago Bulls';\n",
-      "45090.0\n",
       "SQLite:\n",
-      "SELECT SUM(ast_home) \n",
-      "FROM game \n",
-      "WHERE team_name_home = 'Chicago Bulls';\n",
       "\n",
-      "[(45090.0,)]\n",
       "Statement valid? True\n",
       "SQLite matched? False\n",
       "Result matched? True\n"
@@ -444,7 +412,7 @@
     "\n",
     "        # Check if this is a multi-line query\n",
     "        if \"|\" in sample_result or \"(\" in sample_result:\n",
-    "            print(rows)\n",
     "            # Create list of results by stripping separators and splitting on them\n",
     "            if \"(\" in sample_result:\n",
     "                sample_result = sample_result.replace(\"(\", \"\").replace(\")\", \"\")\n",
@@ -477,7 +445,7 @@
     "            return True, query_match, result\n",
     "        # Else the sample result is a single value or string\n",
     "        else:\n",
-    "            print(rows)\n",
     "            result = False\n",
     "            # Loop through model result and see if it contains the sample result\n",
     "            for row in rows:\n",
@@ -530,6 +498,80 @@
     "print(\"SQLite matched? \" + str(result[1]))\n",
     "print(\"Result matched? \" + str(result[2]))"
    ]
   }
  ],
  "metadata": {

       "Total dataset examples: 1044\n",
       "\n",
       "\n",
+      "What was the combined rebound total for the Toronto Raptors and Brooklyn Nets in their highest scoring game against each other?\n",
+      "SELECT MAX(g.pts_home + g.pts_away) AS total_points,        g.reb_home + g.reb_away AS total_rebounds FROM game g WHERE (g.team_name_home = 'Toronto Raptors' AND g.team_name_away = 'Brooklyn Nets')    OR (g.team_name_home = 'Brooklyn Nets' AND g.team_name_away = 'Toronto Raptors') ORDER BY total_points DESC LIMIT 1;\n",
+      "272.0 | 101.0 \n"
      ]
     }
    ],
    "source": [
     "import pandas as pd \n",
+    "import warnings\n",
+    "warnings.filterwarnings(\"ignore\")\n",
     "\n",
     "# Load dataset and check length\n",
     "df = pd.read_csv(\"./train-data/sql_train.tsv\", sep='\\t')\n",
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
+   "outputs": [],
    "source": [
     "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
     "import torch\n",
     "\n",
     "# Load model and tokenizer\n",
     "tokenizer = AutoTokenizer.from_pretrained(\"./deepseek-coder-1.3b-instruct\")\n",
+    "model = AutoModelForCausalLM.from_pretrained(\"./deepseek-coder-1.3b-instruct\", torch_dtype=torch.bfloat16, device_map=device) \n",
+    "model.generation_config.pad_token_id = tokenizer.pad_token_id"
    ]
   },
   {
    "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "SQLite:\n",
+      "SELECT SUM(reb_home + reb_away) AS combined_rebounds\n",
+      "FROM game\n",
+      "WHERE (team_name_home = 'Toronto Raptors' AND team_name_away = 'Brooklyn Nets')\n",
+      "OR (team_name_home = 'Brooklyn Nets' AND team_name_away = 'Toronto Raptors');\n",
       "\n"
      ]
     }
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "cleaned\n",
+      "(4350.0,)\n"
      ]
     }
    ],
     "    query = query_output[4:]\n",
     "else:\n",
     "    query = query_output\n",
+    "\n",
+    "try:\n",
+    "    cursor.execute(query)\n",
+    "    rows = cursor.fetchall()\n",
+    "    for row in rows:\n",
+    "        print(row)\n",
+    "except:\n",
+    "    pass"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "What was the three-point shooting percentage for the Los Angeles Clippers in games against the Los Angeles Lakers?\n",
+      "SELECT AVG(   CASE      WHEN team_name_home = 'LA Clippers' THEN fg3_pct_home     ELSE fg3_pct_away   END ) AS avg_3pt_percentage FROM game WHERE (team_name_home = 'LA Clippers' AND team_name_away = 'Los Angeles Lakers')    OR (team_name_home = 'Los Angeles Lakers' AND team_name_away = 'LA Clippers');\n",
+      "0.3734705882\n",
       "SQLite:\n",
+      "SELECT team_name_home, team_name_away, AVG(fg3_pct_home) AS three_point_percentage\n",
+      "FROM game\n",
+      "WHERE team_name_home = 'Los Angeles Clippers' AND team_name_away = 'Los Angeles Lakers'\n",
+      "GROUP BY team_name_home, team_name_away;\n",
       "\n",
       "Statement valid? True\n",
       "SQLite matched? False\n",
       "Result matched? True\n"
     "\n",
     "        # Check if this is a multi-line query\n",
     "        if \"|\" in sample_result or \"(\" in sample_result:\n",
+    "            #print(rows)\n",
     "            # Create list of results by stripping separators and splitting on them\n",
     "            if \"(\" in sample_result:\n",
     "                sample_result = sample_result.replace(\"(\", \"\").replace(\")\", \"\")\n",
     "            return True, query_match, result\n",
     "        # Else the sample result is a single value or string\n",
     "        else:\n",
+    "            #print(rows)\n",
     "            result = False\n",
     "            # Loop through model result and see if it contains the sample result\n",
     "            for row in rows:\n",
     "print(\"SQLite matched? \" + str(result[1]))\n",
     "print(\"Result matched? \" + str(result[2]))"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create function to evaluate pretrained model on full datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Less than 90 results:\n",
+      "Percent valid: 0.0653061224489796\n",
+      "Percent SQLite matched: 0.00816326530612245\n",
+      "Percent result matched: 0.024489795918367346\n"
+     ]
+    }
+   ],
+   "source": [
+    "def run_evaluation(nba_df, title):\n",
+    "    counter = 0\n",
+    "    num_valid = 0\n",
+    "    num_sql_matched = 0\n",
+    "    num_result_matched = 0\n",
+    "    for index, row in nba_df.iterrows():\n",
+    "        # Create message with sample query and run model\n",
+    "        message=[{ 'role': 'user', 'content': input_text + row[\"natural_query\"]}]\n",
+    "        inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors=\"pt\").to(model.device)\n",
+    "        outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, top_k=50, top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)\n",
+    "\n",
+    "        # Obtain output\n",
+    "        query_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)\n",
+    "\n",
+    "        # Evaluate model result\n",
+    "        valid, sql_matched, result_matched = compare_result(row[\"sql_query\"], row[\"result\"], query_output)\n",
+    "        if valid:\n",
+    "            num_valid += 1\n",
+    "        if sql_matched:\n",
+    "            num_sql_matched += 1\n",
+    "        if result_matched:\n",
+    "            num_result_matched += 1\n",
+    "\n",
+    "        # Break after predefined number of examples\n",
+    "        counter += 1\n",
+    "        if counter % 50 == 0:\n",
+    "            print(\"Completed \" + str(counter))\n",
+    "        elif counter == 20:\n",
+    "            break\n",
+    "\n",
+    "    # Print evaluation results\n",
+    "    print(title + \" results:\")\n",
+    "    print(\"Percent valid: \" + str(num_valid / len(nba_df)))\n",
+    "    print(\"Percent SQLite matched: \" + str(num_sql_matched / len(nba_df)))\n",
+    "    print(\"Percent result matched: \" + str(num_result_matched / len(nba_df)))\n",
+    "\n",
+    "less_than_90_df = pd.read_csv(\"./train-data/less_than_90.tsv\", sep='\\t')\n",
+    "run_evaluation(less_than_90_df, \"Less than 90\")\n",
+    "\n",
+    "# Run evaluation on all training data\n",
+    "#run_evaluation(df, \"All training data\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Evaluate on less than 90 dataset"
+   ]
   }
  ],
  "metadata": {