{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "e752885f",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.preprocessing import StandardScaler"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "d10d05d0",
"metadata": {},
"outputs": [],
"source": [
"main_dataset = pd.read_csv(\"newfin.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "43f81efa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1040, 3)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"main_dataset.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8ade81ff",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Rainfall | \n",
" River | \n",
" Label | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 6.2 | \n",
" 4.95 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 54.9 | \n",
" 5.43 | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 16.4 | \n",
" 4.57 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 56.8 | \n",
" 4.50 | \n",
" 0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 167.4 | \n",
" 4.28 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Rainfall River Label\n",
"0 6.2 4.95 0\n",
"1 54.9 5.43 0\n",
"2 16.4 4.57 0\n",
"3 56.8 4.50 0\n",
"4 167.4 4.28 0"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"main_dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "f7a4f5ed",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Rainfall | \n",
" River | \n",
" Label | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1035 | \n",
" 0.0 | \n",
" 0.942 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1036 | \n",
" 0.0 | \n",
" 0.878 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1037 | \n",
" 0.0 | \n",
" 0.835 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1038 | \n",
" 0.0 | \n",
" 1.990 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1039 | \n",
" 0.0 | \n",
" 2.070 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Rainfall River Label\n",
"1035 0.0 0.942 0\n",
"1036 0.0 0.878 0\n",
"1037 0.0 0.835 0\n",
"1038 0.0 1.990 0\n",
"1039 0.0 2.070 0"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"main_dataset.tail()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "37519036",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Rainfall | \n",
" River | \n",
" Label | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 6.2 | \n",
" 4.950 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 54.9 | \n",
" 5.430 | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 16.4 | \n",
" 4.570 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 56.8 | \n",
" 4.500 | \n",
" 0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 167.4 | \n",
" 4.280 | \n",
" 0 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 1035 | \n",
" 0.0 | \n",
" 0.942 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1036 | \n",
" 0.0 | \n",
" 0.878 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1037 | \n",
" 0.0 | \n",
" 0.835 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1038 | \n",
" 0.0 | \n",
" 1.990 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1039 | \n",
" 0.0 | \n",
" 2.070 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
1040 rows × 3 columns
\n",
"
"
],
"text/plain": [
" Rainfall River Label\n",
"0 6.2 4.950 0\n",
"1 54.9 5.430 0\n",
"2 16.4 4.570 0\n",
"3 56.8 4.500 0\n",
"4 167.4 4.280 0\n",
"... ... ... ...\n",
"1035 0.0 0.942 0\n",
"1036 0.0 0.878 0\n",
"1037 0.0 0.835 0\n",
"1038 0.0 1.990 0\n",
"1039 0.0 2.070 0\n",
"\n",
"[1040 rows x 3 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"main_dataset.fillna('')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "775b1c8e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Rainfall | \n",
" River | \n",
" Label | \n",
"
\n",
" \n",
" \n",
" \n",
" | count | \n",
" 1040.000000 | \n",
" 1040.000000 | \n",
" 1040.000000 | \n",
"
\n",
" \n",
" | mean | \n",
" 167.326250 | \n",
" 14.182862 | \n",
" 0.435577 | \n",
"
\n",
" \n",
" | std | \n",
" 186.687956 | \n",
" 23.584739 | \n",
" 0.496071 | \n",
"
\n",
" \n",
" | min | \n",
" 0.000000 | \n",
" 0.835000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" | 25% | \n",
" 6.525000 | \n",
" 2.810000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" | 50% | \n",
" 100.000000 | \n",
" 4.640000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" | 75% | \n",
" 265.575000 | \n",
" 17.725000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" | max | \n",
" 2198.100000 | \n",
" 369.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Rainfall River Label\n",
"count 1040.000000 1040.000000 1040.000000\n",
"mean 167.326250 14.182862 0.435577\n",
"std 186.687956 23.584739 0.496071\n",
"min 0.000000 0.835000 0.000000\n",
"25% 6.525000 2.810000 0.000000\n",
"50% 100.000000 4.640000 0.000000\n",
"75% 265.575000 17.725000 1.000000\n",
"max 2198.100000 369.000000 1.000000"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"main_dataset.describe()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "43db8205",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Label\n",
"0 587\n",
"1 453\n",
"Name: count, dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"main_dataset['Label'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "e32b80d9",
"metadata": {},
"outputs": [],
"source": [
"X = main_dataset.drop(columns = 'Label', axis = 1)\n",
"Y = main_dataset['Label']"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "054230d9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Rainfall River\n",
"0 6.2 4.950\n",
"1 54.9 5.430\n",
"2 16.4 4.570\n",
"3 56.8 4.500\n",
"4 167.4 4.280\n",
"... ... ...\n",
"1035 0.0 0.942\n",
"1036 0.0 0.878\n",
"1037 0.0 0.835\n",
"1038 0.0 1.990\n",
"1039 0.0 2.070\n",
"\n",
"[1040 rows x 2 columns]\n"
]
}
],
"source": [
"print(X)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "deb164b0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 0\n",
"1 0\n",
"2 0\n",
"3 0\n",
"4 0\n",
" ..\n",
"1035 0\n",
"1036 0\n",
"1037 0\n",
"1038 0\n",
"1039 0\n",
"Name: Label, Length: 1040, dtype: int64\n"
]
}
],
"source": [
"print(Y)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "660e25c5",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y, random_state = 42)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "d0339123-cbf1-4cbc-9d10-641de826b559",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"test_data = X_test.copy()\n",
"test_data['Label'] = Y_test \n",
"\n",
"test_data.to_csv('test_data.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "729f6ae4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(832, 2) (832,) (208, 2) (208,)\n"
]
}
],
"source": [
"print(X_train.shape, Y_train.shape,X_test.shape,Y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "5dc74a26",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "9c24647f",
"metadata": {},
"outputs": [],
"source": [
"model = LinearRegression()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "17833f81",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
],
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(X_train, Y_train)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "ab5d6d4d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MSE: 0.16963572059262821\n",
"R² Score: 0.310686595687098\n"
]
}
],
"source": [
"from sklearn.metrics import mean_squared_error, r2_score\n",
"\n",
"mse = mean_squared_error(Y_test, X_test_prediction)\n",
"r2 = r2_score(Y_test, X_test_prediction)\n",
"\n",
"print(\"MSE:\", mse)\n",
"print(\"R² Score:\", r2)\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "9c28442b",
"metadata": {},
"outputs": [],
"source": [
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "d1f915f3",
"metadata": {},
"outputs": [],
"source": [
"pickle.dump(model, open('mainmodel.pkl', 'wb'))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "ca1e7621",
"metadata": {},
"outputs": [],
"source": [
"rslt = pickle.load(open('mainmodel.pkl', 'rb'))"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "839a4832",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LinearRegression()\n"
]
}
],
"source": [
"print(rslt)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "49944ff7",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"X_new = pd.DataFrame([[597,53]], columns=['Rainfall', 'River'])\n",
"prediction = model.predict(X_new)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "1af2dbee",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[6.92990444]\n"
]
}
],
"source": [
"print(prediction)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "48d2f015",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['./model.joblib']"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from joblib import dump\n",
"\n",
"dump(model, './model.joblib')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfc16ebc-191e-472a-9c77-1d6ee33c37a4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
},
"vscode": {
"interpreter": {
"hash": "76d7c06053c3456e5600312cec90888656fc0ed30c03d8425b9dac6e4fc8e014"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}