Spaces:

MightyOctopus
/

worth-brain

Sleeping

App Files Files Community

MightyOctopus commited on Feb 16

Commit

13495b5

verified ·

1 Parent(s): 71ea44c

Upload 6 files

Browse files

Files changed (7) hide show

.gitattributes +1 -0
models/.ipynb_checkpoints/neural_network-checkpoint.ipynb +355 -0
models/__pycache__/neural_network.cpython-313.pyc +0 -0
models/neural_network.ipynb +0 -0
models/neural_network.py +27 -0
models/neural_network_pricer_model.pt +3 -0
models/vectorizer.joblib +0 -0

.gitattributes CHANGED Viewed

@@ -3,3 +3,4 @@ products_vectordb_production/2663ea41-1052-4307-83f1-1edb20555e69/index_metadata
 products_vectordb_production/2663ea41-1052-4307-83f1-1edb20555e69/length.bin filter=lfs diff=lfs merge=lfs -text
 products_vectordb_production/2663ea41-1052-4307-83f1-1edb20555e69/link_lists.bin filter=lfs diff=lfs merge=lfs -text
 products_vectordb_production/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

 products_vectordb_production/2663ea41-1052-4307-83f1-1edb20555e69/length.bin filter=lfs diff=lfs merge=lfs -text
 products_vectordb_production/2663ea41-1052-4307-83f1-1edb20555e69/link_lists.bin filter=lfs diff=lfs merge=lfs -text
 products_vectordb_production/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
+models/neural_network_pricer_model.pt filter=lfs diff=lfs merge=lfs -text

models/.ipynb_checkpoints/neural_network-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,355 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "5ae2adf5-1c4b-4d55-98b3-523e65ed84f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import numpy as np \n",
+    "from tqdm import tqdm\n",
+    "from sklearn.feature_extraction.text import HashingVectorizer\n",
+    "import torch \n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim \n",
+    "from torch.utils.data import DataLoader, TensorDataset\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "import pickle\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "8e0b5764-2450-4ef5-b47f-47833a441c21",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Volumes/VTG/Dev/C_5/Projects/week8/.venv/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "with open(\"train.pkl\", \"rb\") as f: \n",
+    "    train_ds = pickle.load(f)\n",
+    "\n",
+    "with open(\"test.pkl\", \"rb\") as f: \n",
+    "    test_ds = pickle.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "8d4532ef-acd8-4b40-921c-ca27891cab7c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Documents for X / Price for y\n",
+    "\n",
+    "y = np.array([float(item.price) for item in train_ds])\n",
+    "documents = [str(item).split(\"= $\")[0].replace(\"<\", \"\").strip() for item in train_ds]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8bfa2437-6376-4abb-ad75-01d9eef0253a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use the HashingVectorizer for a Bag of Words model\n",
+    "\n",
+    "np.random.seed(42)\n",
+    "vectorizer = HashingVectorizer(n_features=5000, stop_words=\"english\", binary=True)\n",
+    "X = vectorizer.fit_transform(documents)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "31154065-3973-4afa-a9f9-9f348c105ba1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class NeuralNetwork(nn.Module): \n",
+    "    def __init__(self, input_size): \n",
+    "        super(NeuralNetwork, self).__init__()\n",
+    "        self.layer1 = nn.Linear(input_size, 128)\n",
+    "        self.layer2 = nn.Linear(128, 64)\n",
+    "        self.layer3 = nn.Linear(64, 64)\n",
+    "        self.layer4 = nn.Linear(64, 64)\n",
+    "        self.layer5 = nn.Linear(64, 64) \n",
+    "        self.layer6 = nn.Linear(64, 64)\n",
+    "        self.layer7 = nn.Linear(64, 64)\n",
+    "        self.layer8 = nn.Linear(64, 1)\n",
+    "        self.relu = nn.ReLU()\n",
+    "\n",
+    "    def forward(self, x): \n",
+    "        output1 = self.relu(self.layer1(x))\n",
+    "        output2 = self.relu(self.layer2(output1))\n",
+    "        output3 = self.relu(self.layer3(output2))\n",
+    "        output4 = self.relu(self.layer4(output3))\n",
+    "        output5 = self.relu(self.layer5(output4)) \n",
+    "        output6 = self.relu(self.layer6(output5)) \n",
+    "        output7 = self.relu(self.layer7(output6)) \n",
+    "        output8 = self.layer8(output7)\n",
+    "\n",
+    "        return output8"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "6d538f09-4c01-4e1b-bb6f-661d196ea05b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Convert data to PyTorch tensors\n",
+    "X_train_tensor = torch.FloatTensor(X.toarray())\n",
+    "y_train_tensor = torch.FloatTensor(y).unsqueeze(1)\n",
+    "\n",
+    "### Split the data into training and validation sets\n",
+    "X_train, X_val, y_train, y_val = train_test_split(\n",
+    "    X_train_tensor, \n",
+    "    y_train_tensor, \n",
+    "    test_size=0.01, \n",
+    "    random_state=42\n",
+    ")\n",
+    "\n",
+    "### Create the loader (modified the original batch_size 64)\n",
+    "train_dataset = TensorDataset(X_train, y_train)\n",
+    "train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)\n",
+    "\n",
+    "### Initialize the model\n",
+    "input_size = X_train_tensor.shape[1]\n",
+    "model = NeuralNetwork(input_size)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "b8b2d948-011d-4a84-b49e-f5dba399511a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of trainable parameters: 669,249\n"
+     ]
+    }
+   ],
+   "source": [
+    "trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
+    "\n",
+    "print(f\"Number of trainable parameters: {trainable_params:,}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "bb19432d-dc08-4e7e-a515-834c50608e3d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|█████████████████████████████████████████████████████████████████████████| 1161/1161 [00:05<00:00, 197.43it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/3], Train Loss: 39517.398, Val Loss: 19185.752\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|█████████████████████████████████████████████████████████████████████████| 1161/1161 [00:05<00:00, 214.18it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [2/3], Train Loss: 40145.402, Val Loss: 18731.053\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|█████████████████████████████████████████████████████████████████████████| 1161/1161 [00:05<00:00, 225.37it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [3/3], Train Loss: 12280.417, Val Loss: 17233.447\n"
+     ]
+    }
+   ],
+   "source": [
+    "## Define loss function and optimizer \n",
+    "\n",
+    "loss_function = nn.MSELoss() \n",
+    "optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
+    "\n",
+    "EPOCHS = 3\n",
+    "\n",
+    "for epoch in range(EPOCHS): \n",
+    "    model.train()\n",
+    "    for batch_X, batch_y in tqdm(train_loader): \n",
+    "        optimizer.zero_grad()\n",
+    "\n",
+    "        ### forward pass, loss calculation, backward pass, optimizer\n",
+    "        outputs = model(batch_X)\n",
+    "        loss = loss_function(outputs, batch_y)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "    model.eval()\n",
+    "    with torch.no_grad(): \n",
+    "        val_outputs = model(X_val)\n",
+    "        val_loss = loss_function(val_outputs, y_val)\n",
+    "\n",
+    "    print(f'Epoch [{epoch+1}/{EPOCHS}], Train Loss: {loss.item():.3f}, Val Loss: {val_loss.item():.3f}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "7eafa04b-ec1a-4905-b2db-1c37adc2e153",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def neural_network(item): \n",
+    "    model.eval()\n",
+    "    with torch.no_grad(): \n",
+    "        vector = vectorizer.transform([item])\n",
+    "        vector = torch.FloatTensor(vector.toarray())\n",
+    "        result = model(vector)[0].item()\n",
+    "\n",
+    "    return max(0, result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "982bd690-bf50-4244-bece-66e647a639d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_items = [\n",
+    "    {\n",
+    "    \"item\": str(item).split(\"= $\")[0].replace(\"<\", \"\").strip(), \n",
+    "    \"price\": item.price, \n",
+    "    \"title\": item.title\n",
+    "    }\n",
+    "    for item in test_ds\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b18a98d-6199-4793-8e10-595c6aad98c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from prototypes.testing_for_neural_network import TesterForNeuralNetwork\n",
+    "\n",
+    "TesterForNeuralNetwork.test(neural_network, test_items)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10e61448-0211-49e0-865b-dd6fc4498a41",
+   "metadata": {},
+   "source": [
+    "## Save the neural network model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "fb70ea49-7df7-4d98-9e72-3efa59a94d21",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['vectorizer.joblib']"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "torch.save(model.state_dict(), \"models/neural_network_pricer_model.pt\")\n",
+    "\n",
+    "import joblib\n",
+    "joblib.dump(vectorizer, \"vectorizer.joblib\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "303a5497-e6e7-4f4b-81e5-fc36665073a4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'sklearn.feature_extraction.text.HashingVectorizer'>\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(type(vectorizer))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cf3e1e1f-7393-4fc2-8147-8f3850a2f3e0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

models/__pycache__/neural_network.cpython-313.pyc ADDED Viewed

Binary file (2.44 kB). View file

models/neural_network.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

models/neural_network.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import torch
+import torch.nn as nn
+class NeuralNetwork(nn.Module):
+    def __init__(self, input_size):
+        super(NeuralNetwork, self).__init__()
+        self.layer1 = nn.Linear(input_size, 128)
+        self.layer2 = nn.Linear(128, 64)
+        self.layer3 = nn.Linear(64, 64)
+        self.layer4 = nn.Linear(64, 64)
+        self.layer5 = nn.Linear(64, 64)
+        self.layer6 = nn.Linear(64, 64)
+        self.layer7 = nn.Linear(64, 64)
+        self.layer8 = nn.Linear(64, 1)
+        self.relu = nn.ReLU()
+    def forward(self, x):
+        output1 = self.relu(self.layer1(x))
+        output2 = self.relu(self.layer2(output1))
+        output3 = self.relu(self.layer3(output2))
+        output4 = self.relu(self.layer4(output3))
+        output5 = self.relu(self.layer5(output4))
+        output6 = self.relu(self.layer6(output5))
+        output7 = self.relu(self.layer7(output6))
+        output8 = self.layer8(output7)
+        return output8

models/neural_network_pricer_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3df70692cef20ed4618414252a343d450c2156a9aa8305c30422b900d6fe28db
+size 2683243

models/vectorizer.joblib ADDED Viewed

Binary file (424 Bytes). View file