{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "FP8Kp96zuRU0" }, "outputs": [], "source": [ "from sklearn.model_selection import RandomizedSearchCV, train_test_split, KFold\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.linear_model import Ridge\n", "from sklearn.pipeline import Pipeline\n", "import pandas as pd\n", "import numpy as np\n", "from math import floor\n", "from collections import namedtuple as NamedTuple" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 190 }, "id": "GCbKPkDP_Fra", "outputId": "f3cbedc6-1e64-4595-d54f-803cb586eaab" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "RandomizedSearchCV(cv=KFold(n_splits=5, random_state=42, shuffle=True),\n", " estimator=Pipeline(steps=[('standard_scaler',\n", " StandardScaler()),\n", " ('ridge', Ridge())]),\n", " n_iter=150,\n", " param_distributions={'ridge__alpha': array([0.001, 0.012, 0.023, 0.034, 0.045, 0.056, 0.067, 0.078, 0.089,\n", " 0.1 ]),\n", " 'ridge__max_iter': [5000, 10000]},\n", " random_state=42)" ], "text/html": [ "
RandomizedSearchCV(cv=KFold(n_splits=5, random_state=42, shuffle=True),\n",
              "                   estimator=Pipeline(steps=[('standard_scaler',\n",
              "                                              StandardScaler()),\n",
              "                                             ('ridge', Ridge())]),\n",
              "                   n_iter=150,\n",
              "                   param_distributions={'ridge__alpha': array([0.001, 0.012, 0.023, 0.034, 0.045, 0.056, 0.067, 0.078, 0.089,\n",
              "       0.1  ]),\n",
              "                                        'ridge__max_iter': [5000, 10000]},\n",
              "                   random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ] }, "metadata": {}, "execution_count": 87 } ], "source": [ "gpu_data = pd.get_dummies(pd.read_csv(\"./sample_data/GPU_specs.csv\"), columns=[\"mem_type\"]) # the GPU price prediction model dataframe\n", "ssd_data = pd.read_csv(\"./sample_data/SSD_specs.csv\")[[\"seq_read\", \"seq_write\", \"interface\", \"protocol\", \"type\", \"endurance\", \"launch_price\"]].dropna() # the SSD price prediction model dataframe\n", "ssd_data = pd.get_dummies(ssd_data, columns = [\"type\", \"interface\", \"protocol\"])\n", "ssd_data[\"seq_read+seq_write\"] = ssd_data[\"seq_read\"] + ssd_data[\"seq_write\"]\n", "ssd_data = ssd_data.drop([\"seq_read\", \"seq_write\"], axis=1)\n", "ssd_data = ssd_data[[\"seq_read+seq_write\", *ssd_data.columns.tolist()[:-1]]]\n", "\n", "kf_gpu = KFold(n_splits=8, shuffle=True, random_state=42)\n", "kf_ssd = KFold(n_splits=5, shuffle=True, random_state=42)\n", "\n", "central_gpu = Pipeline([\n", " (\"standard_scaler\", StandardScaler()),\n", " (\"ridge\", Ridge())\n", "])\n", "central_ssd = Pipeline([\n", " (\"standard_scaler\", StandardScaler()),\n", " (\"ridge\", Ridge())\n", "])\n", "\n", "central_gpu_params = {\"ridge__alpha\" : np.linspace(0.001, 0.1, 10),\n", " \"ridge__max_iter\" : [5000, 10000]}\n", "central_cpu_params = {\"ridge__alpha\" : np.linspace(0.001, 0.1, 10),\n", " \"ridge__max_iter\" : [5000, 10000]}\n", "\n", "GPUxtrain, GPUxtest, GPUytrain, GPUytest = train_test_split(gpu_data.loc[:, \"transistors\":\"mem_type_GDDR7\"].drop(\"launch_price\", axis=1),\n", " gpu_data[\"launch_price\"],\n", " train_size=0.8,\n", " random_state=12,\n", " shuffle=True)\n", "SSDxtrain, SSDxtest, SSDytrain, SSDytest = train_test_split(ssd_data.loc[:, \"seq_read+seq_write\":\"protocol_NVMe 2.0\"].drop(\"launch_price\", axis=1),\n", " ssd_data[\"launch_price\"],\n", " train_size=0.8,\n", " random_state=12,\n", " shuffle = True)\n", "central_gpu_rs = RandomizedSearchCV(central_gpu, central_gpu_params, cv=kf_gpu, random_state=42, n_iter=150)\n", "central_ssd_rs = RandomizedSearchCV(central_ssd, central_cpu_params, cv=kf_ssd, random_state=42, n_iter=150)\n", "central_gpu_rs.fit(GPUxtrain, GPUytrain)\n", "central_ssd_rs.fit(SSDxtrain, SSDytrain)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "d5INN6Xf2wGe", "outputId": "5e4e11e3-10f4-4cba-8c63-046a93df1c03" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "------------evaluation field------------\n", "GPU model r² score : 0.6332012667256661\n", "SSD model r² score : 0.23117551100698042\n", "----------------------------------------\n" ] } ], "source": [ "print(f\"\"\"{'-' * 12}evaluation field{'-' * 12}\n", "GPU model r² score : {central_gpu_rs.score(GPUxtest, GPUytest)}\n", "SSD model r² score : {central_ssd_rs.score(SSDxtest, SSDytest)}\n", "{'-'*(24+len(\"evaluation field\"))}\"\"\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "o3WzaCm79LXr" }, "outputs": [], "source": [ "# the initializations needed in order to predict for specific GPU/SSD specs\n", "# this is the block you mainly need to use to predict using the two models\n", "# just give the properties of the GPU or SSD you like to predict the price of and then execute the next cell.\n", "\n", "gpu = NamedTuple(\"GPU_SPECS\", [\"transistors\",\n", " \"base_clock\",\n", " \"mem_clock\",\n", " \"mem_size\",\n", " \"shading_units\",\n", " \"TMUs\",\n", " \"ROPs\",\n", " \"mem_type_GDDR5\",\n", " \"mem_type_GDDR5X\",\n", " \"mem_type_GDDR6\",\n", " \"mem_type_GDDR6X\",\n", " \"mem_type_GDDR7\"])\n", "ssd = NamedTuple(\"SSD_SPECS\", [\n", " \"seq_read_write\",\n", " \"endurance\",\n", " \"type_MLC\",\n", " \"type_QLC\",\n", " \"type_SLC\",\n", " \"type_TLC\",\n", " \"interface_PCIe_3_0_x4\",\n", " \"interface_PCIe_4_0_x4\",\n", " \"interface_PCIe_5_0_x4\",\n", " \"interface_SATA_6_Gbps\",\n", " \"protocol_AHCI\",\n", " \"protocol_NVMe\",\n", " \"protocol_NVMe_1_2\",\n", " \"protocol_NVMe_1_3\",\n", " \"protocol_NVMe_1_4\",\n", " \"protocol_NVMe_2_0\"\n", "])\n", "GPU_SPECS = gpu(\n", " # memory and graphics related specs\n", " transistors = 21760, # milion transistor\n", " base_clock = 680, # in MHz\n", " mem_clock = 1750, # in MHz\n", " mem_size = 8 , # in GBs\n", " shading_units = 5888,\n", " TMUs = 184,\n", " ROPs = 96,\n", "\n", " # memory type\n", " mem_type_GDDR5 = 0,\n", " mem_type_GDDR5X = 0,\n", " mem_type_GDDR6 = 1,\n", " mem_type_GDDR6X = 0,\n", " mem_type_GDDR7 = 0\n", ")\n", "SSD_SPECS = ssd(\n", " seq_read_write = 7000+6500, # in MB/s (sequential read + sequential write)\n", " endurance = 750, # in TBW\n", "\n", " # SSD types\n", " type_MLC = 0,\n", " type_QLC = 0,\n", " type_SLC = 0,\n", " type_TLC = 1,\n", "\n", " # SSD interfaces\n", " interface_PCIe_3_0_x4 = 0,\n", " interface_PCIe_4_0_x4 = 1,\n", " interface_PCIe_5_0_x4 = 0,\n", " interface_SATA_6_Gbps = 0,\n", "\n", " # SSD protocols\n", " protocol_AHCI = 0,\n", " protocol_NVMe = 0,\n", " protocol_NVMe_1_2 = 0,\n", " protocol_NVMe_1_3 = 0,\n", " protocol_NVMe_1_4 = 1,\n", " protocol_NVMe_2_0 = 0\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "k4jnsIX5LBu3", "outputId": "ab428a4d-3bf9-41bc-8617-f8b7d068e6f9" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", " ----------predictions----------\n", " the GPU price : $490\n", " the SSD price : $123\n", " -------------------------------\n", " \n" ] } ], "source": [ "# the predictions cell\n", "print(\n", " f\"\"\"\n", " {\"-\"*10}predictions{\"-\"*10}\n", " the GPU price : ${floor(central_gpu_rs.predict([*[GPU_SPECS]]))}\n", " the SSD price : ${floor(central_ssd_rs.predict([*[SSD_SPECS]]))}\n", " {\"-\"*(20+len(\"predictions\"))}\n", " \"\"\"\n", ")" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }