USC-Applied-NLP-Group
/

SQL-Generation

TensorBoard

Safetensors

Model card Files Files and versions

xet

Metrics Training metrics Community

licesma commited on Apr 10, 2025

Commit

c397e97

1 Parent(s): bfad6ce

colab support part two

Browse files

Files changed (1) hide show

test_pretrained.ipynb +17 -13

test_pretrained.ipynb CHANGED Viewed

@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -19,13 +19,14 @@
     "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
     "import torch\n",
     "import sys\n",
     "import sqlite3 as sql\n",
     "from huggingface_hub import snapshot_download"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -34,22 +35,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
     "if is_google_colab:\n",
     "    hugging_face_path = snapshot_download(\n",
     "        repo_id=\"USC-Applied-NLP-Group/SQL-Generation\",\n",
     "        repo_type=\"model\",  \n",
     "        allow_patterns=[\"src/*\"],  \n",
     "    )\n",
-    "    sys.path.append(hugging_face_path)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,7 +70,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -76,15 +80,15 @@
       "Total dataset examples: 1044\n",
       "\n",
       "\n",
-      "Which team had the largest lead in a single game in the 2001 season?\n",
-      "SELECT g.team_name_home AS team, os.largest_lead_home AS lead FROM other_stats os JOIN game g ON os.game_id = g.game_id WHERE g.season_id = '22001' ORDER BY os.largest_lead_home DESC LIMIT 1;\n",
-      "Portland Trail Blazers|47\n"
      ]
     }
    ],
    "source": [
     "# Load dataset and check length\n",
-    "df = pd.read_csv(\"./train-data/sql_train.tsv\", sep='\\t')\n",
     "print(\"Total dataset examples: \" + str(len(df)))\n",
     "print(\"\\n\")\n",
     "\n",
@@ -126,7 +130,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -159,7 +163,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -202,7 +206,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [
     {

   },
   {
    "cell_type": "code",
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
     "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
     "import torch\n",
     "import sys\n",
+    "import os\n",
     "import sqlite3 as sql\n",
     "from huggingface_hub import snapshot_download"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
+    "current_path = \"./\"\n",
+    "\n",
     "if is_google_colab:\n",
     "    hugging_face_path = snapshot_download(\n",
     "        repo_id=\"USC-Applied-NLP-Group/SQL-Generation\",\n",
     "        repo_type=\"model\",  \n",
     "        allow_patterns=[\"src/*\"],  \n",
     "    )\n",
+    "    sys.path.append(hugging_face_path)\n",
+    "    current_path = hugging_face_path"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
       "Total dataset examples: 1044\n",
       "\n",
       "\n",
+      "How many points did the Phoenix Suns score in the highest scoring away game they played?\n",
+      "SELECT MAX(pts_away) FROM game WHERE team_abbreviation_away = 'PHX';\n",
+      "161.0\n"
      ]
     }
    ],
    "source": [
     "# Load dataset and check length\n",
+    "df = pd.read_csv(os.path.join(current_path, \"train-data/sql_train.tsv\"), sep=\"\\t\")\n",
     "print(\"Total dataset examples: \" + str(len(df)))\n",
     "print(\"\\n\")\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {