{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "e752885f", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.preprocessing import StandardScaler" ] }, { "cell_type": "code", "execution_count": 5, "id": "d10d05d0", "metadata": {}, "outputs": [], "source": [ "main_dataset = pd.read_csv(\"newfin.csv\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "43f81efa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1040, 3)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "main_dataset.shape" ] }, { "cell_type": "code", "execution_count": 7, "id": "8ade81ff", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RainfallRiverLabel
06.24.950
154.95.430
216.44.570
356.84.500
4167.44.280
\n", "
" ], "text/plain": [ " Rainfall River Label\n", "0 6.2 4.95 0\n", "1 54.9 5.43 0\n", "2 16.4 4.57 0\n", "3 56.8 4.50 0\n", "4 167.4 4.28 0" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "main_dataset.head()" ] }, { "cell_type": "code", "execution_count": 8, "id": "f7a4f5ed", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RainfallRiverLabel
10350.00.9420
10360.00.8780
10370.00.8350
10380.01.9900
10390.02.0700
\n", "
" ], "text/plain": [ " Rainfall River Label\n", "1035 0.0 0.942 0\n", "1036 0.0 0.878 0\n", "1037 0.0 0.835 0\n", "1038 0.0 1.990 0\n", "1039 0.0 2.070 0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "main_dataset.tail()" ] }, { "cell_type": "code", "execution_count": 9, "id": "37519036", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RainfallRiverLabel
06.24.9500
154.95.4300
216.44.5700
356.84.5000
4167.44.2800
............
10350.00.9420
10360.00.8780
10370.00.8350
10380.01.9900
10390.02.0700
\n", "

1040 rows × 3 columns

\n", "
" ], "text/plain": [ " Rainfall River Label\n", "0 6.2 4.950 0\n", "1 54.9 5.430 0\n", "2 16.4 4.570 0\n", "3 56.8 4.500 0\n", "4 167.4 4.280 0\n", "... ... ... ...\n", "1035 0.0 0.942 0\n", "1036 0.0 0.878 0\n", "1037 0.0 0.835 0\n", "1038 0.0 1.990 0\n", "1039 0.0 2.070 0\n", "\n", "[1040 rows x 3 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "main_dataset.fillna('')" ] }, { "cell_type": "code", "execution_count": 10, "id": "775b1c8e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RainfallRiverLabel
count1040.0000001040.0000001040.000000
mean167.32625014.1828620.435577
std186.68795623.5847390.496071
min0.0000000.8350000.000000
25%6.5250002.8100000.000000
50%100.0000004.6400000.000000
75%265.57500017.7250001.000000
max2198.100000369.0000001.000000
\n", "
" ], "text/plain": [ " Rainfall River Label\n", "count 1040.000000 1040.000000 1040.000000\n", "mean 167.326250 14.182862 0.435577\n", "std 186.687956 23.584739 0.496071\n", "min 0.000000 0.835000 0.000000\n", "25% 6.525000 2.810000 0.000000\n", "50% 100.000000 4.640000 0.000000\n", "75% 265.575000 17.725000 1.000000\n", "max 2198.100000 369.000000 1.000000" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "main_dataset.describe()" ] }, { "cell_type": "code", "execution_count": 11, "id": "43db8205", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Label\n", "0 587\n", "1 453\n", "Name: count, dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "main_dataset['Label'].value_counts()" ] }, { "cell_type": "code", "execution_count": 12, "id": "e32b80d9", "metadata": {}, "outputs": [], "source": [ "X = main_dataset.drop(columns = 'Label', axis = 1)\n", "Y = main_dataset['Label']" ] }, { "cell_type": "code", "execution_count": 13, "id": "054230d9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Rainfall River\n", "0 6.2 4.950\n", "1 54.9 5.430\n", "2 16.4 4.570\n", "3 56.8 4.500\n", "4 167.4 4.280\n", "... ... ...\n", "1035 0.0 0.942\n", "1036 0.0 0.878\n", "1037 0.0 0.835\n", "1038 0.0 1.990\n", "1039 0.0 2.070\n", "\n", "[1040 rows x 2 columns]\n" ] } ], "source": [ "print(X)" ] }, { "cell_type": "code", "execution_count": 14, "id": "deb164b0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 0\n", "1 0\n", "2 0\n", "3 0\n", "4 0\n", " ..\n", "1035 0\n", "1036 0\n", "1037 0\n", "1038 0\n", "1039 0\n", "Name: Label, Length: 1040, dtype: int64\n" ] } ], "source": [ "print(Y)" ] }, { "cell_type": "code", "execution_count": 15, "id": "660e25c5", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y, random_state = 42)" ] }, { "cell_type": "code", "execution_count": 34, "id": "d0339123-cbf1-4cbc-9d10-641de826b559", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "test_data = X_test.copy()\n", "test_data['Label'] = Y_test \n", "\n", "test_data.to_csv('test_data.csv', index=False)" ] }, { "cell_type": "code", "execution_count": 16, "id": "729f6ae4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(832, 2) (832,) (208, 2) (208,)\n" ] } ], "source": [ "print(X_train.shape, Y_train.shape,X_test.shape,Y_test.shape)" ] }, { "cell_type": "code", "execution_count": 18, "id": "5dc74a26", "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression" ] }, { "cell_type": "code", "execution_count": 20, "id": "9c24647f", "metadata": {}, "outputs": [], "source": [ "model = LinearRegression()" ] }, { "cell_type": "code", "execution_count": 21, "id": "17833f81", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "LinearRegression()" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(X_train, Y_train)" ] }, { "cell_type": "code", "execution_count": 23, "id": "ab5d6d4d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE: 0.16963572059262821\n", "R² Score: 0.310686595687098\n" ] } ], "source": [ "from sklearn.metrics import mean_squared_error, r2_score\n", "\n", "mse = mean_squared_error(Y_test, X_test_prediction)\n", "r2 = r2_score(Y_test, X_test_prediction)\n", "\n", "print(\"MSE:\", mse)\n", "print(\"R² Score:\", r2)\n" ] }, { "cell_type": "code", "execution_count": 25, "id": "9c28442b", "metadata": {}, "outputs": [], "source": [ "import pickle" ] }, { "cell_type": "code", "execution_count": 26, "id": "d1f915f3", "metadata": {}, "outputs": [], "source": [ "pickle.dump(model, open('mainmodel.pkl', 'wb'))" ] }, { "cell_type": "code", "execution_count": 27, "id": "ca1e7621", "metadata": {}, "outputs": [], "source": [ "rslt = pickle.load(open('mainmodel.pkl', 'rb'))" ] }, { "cell_type": "code", "execution_count": 28, "id": "839a4832", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LinearRegression()\n" ] } ], "source": [ "print(rslt)" ] }, { "cell_type": "code", "execution_count": 53, "id": "49944ff7", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "X_new = pd.DataFrame([[597,53]], columns=['Rainfall', 'River'])\n", "prediction = model.predict(X_new)" ] }, { "cell_type": "code", "execution_count": 55, "id": "1af2dbee", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[6.92990444]\n" ] } ], "source": [ "print(prediction)" ] }, { "cell_type": "code", "execution_count": 46, "id": "48d2f015", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['./model.joblib']" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from joblib import dump\n", "\n", "dump(model, './model.joblib')" ] }, { "cell_type": "code", "execution_count": null, "id": "dfc16ebc-191e-472a-9c77-1d6ee33c37a4", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" }, "vscode": { "interpreter": { "hash": "76d7c06053c3456e5600312cec90888656fc0ed30c03d8425b9dac6e4fc8e014" } } }, "nbformat": 4, "nbformat_minor": 5 }