{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "68dfebbc",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "68dfebbc",
"outputId": "80579e75-dfb3-46ea-dfe9-16713cbf0ab8"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" v1 v2 Unnamed: 2 \\\n",
"0 ham Go until jurong point, crazy.. Available only ... NaN \n",
"1 ham Ok lar... Joking wif u oni... NaN \n",
"2 spam Free entry in 2 a wkly comp to win FA Cup fina... NaN \n",
"3 ham U dun say so early hor... U c already then say... NaN \n",
"4 ham Nah I don't think he goes to usf, he lives aro... NaN \n",
"\n",
" Unnamed: 3 Unnamed: 4 \n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN "
],
"text/html": [
"\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
"
\n",
"
\n",
"
v1
\n",
"
v2
\n",
"
Unnamed: 2
\n",
"
Unnamed: 3
\n",
"
Unnamed: 4
\n",
"
\n",
" \n",
" \n",
"
\n",
"
0
\n",
"
ham
\n",
"
Go until jurong point, crazy.. Available only ...
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
"
\n",
"
\n",
"
1
\n",
"
ham
\n",
"
Ok lar... Joking wif u oni...
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
"
\n",
"
\n",
"
2
\n",
"
spam
\n",
"
Free entry in 2 a wkly comp to win FA Cup fina...
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
"
\n",
"
\n",
"
3
\n",
"
ham
\n",
"
U dun say so early hor... U c already then say...
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
"
\n",
"
\n",
"
4
\n",
"
ham
\n",
"
Nah I don't think he goes to usf, he lives aro...
\n",
"
NaN
\n",
"
NaN
\n",
"
NaN
\n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
"\n",
" \n",
"\n",
" \n",
"
\n",
"\n",
"\n",
"
\n",
" \n",
"\n",
"\n",
"\n",
" \n",
"
\n",
"\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 5572,\n \"fields\": [\n {\n \"column\": \"v1\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"spam\",\n \"ham\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"v2\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5169,\n \"samples\": [\n \"Did u download the fring app?\",\n \"Pass dis to all ur contacts n see wat u get! Red;i'm in luv wid u. Blue;u put a smile on my face. Purple;u r realy hot. Pink;u r so swt. Orange;i thnk i lyk u. Green;i realy wana go out wid u. Yelow;i wnt u bck. Black;i'm jealous of u. Brown;i miss you Nw plz giv me one color\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Unnamed: 2\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 43,\n \"samples\": [\n \" GOD said\",\n \" SHE SHUDVETOLD U. DID URGRAN KNOW?NEWAY\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Unnamed: 3\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 10,\n \"samples\": [\n \" \\\\\\\"OH No! COMPETITION\\\\\\\". Who knew\",\n \" why to miss them\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Unnamed: 4\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"GNT:-)\\\"\",\n \" one day these two will become FREINDS FOREVER!\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 4
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DS25/spam.csv',encoding='latin1')\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XM9z3jeNj9tz",
"outputId": "d13928c6-3292-481f-9567-d0b9da377d10"
},
"id": "XM9z3jeNj9tz",
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a40ec53a",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "a40ec53a",
"outputId": "f7f07c22-1764-43a4-9e5e-3a113ae64c08"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" v2 v1\n",
"0 Go until jurong point, crazy.. Available only ... ham\n",
"1 Ok lar... Joking wif u oni... ham\n",
"2 Free entry in 2 a wkly comp to win FA Cup fina... spam\n",
"3 U dun say so early hor... U c already then say... ham\n",
"4 Nah I don't think he goes to usf, he lives aro... ham"
],
"text/html": [
"\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
"
\n",
"
\n",
"
v2
\n",
"
v1
\n",
"
\n",
" \n",
" \n",
"
\n",
"
0
\n",
"
Go until jurong point, crazy.. Available only ...
\n",
"
ham
\n",
"
\n",
"
\n",
"
1
\n",
"
Ok lar... Joking wif u oni...
\n",
"
ham
\n",
"
\n",
"
\n",
"
2
\n",
"
Free entry in 2 a wkly comp to win FA Cup fina...
\n",
"
spam
\n",
"
\n",
"
\n",
"
3
\n",
"
U dun say so early hor... U c already then say...
\n",
"
ham
\n",
"
\n",
"
\n",
"
4
\n",
"
Nah I don't think he goes to usf, he lives aro...
\n",
"
ham
\n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
"\n",
" \n",
"\n",
" \n",
"
\n",
"\n",
"\n",
"
\n",
" \n",
"\n",
"\n",
"\n",
" \n",
"
\n",
"\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 5572,\n \"fields\": [\n {\n \"column\": \"v2\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5169,\n \"samples\": [\n \"Did u download the fring app?\",\n \"Pass dis to all ur contacts n see wat u get! Red;i'm in luv wid u. Blue;u put a smile on my face. Purple;u r realy hot. Pink;u r so swt. Orange;i thnk i lyk u. Green;i realy wana go out wid u. Yelow;i wnt u bck. Black;i'm jealous of u. Brown;i miss you Nw plz giv me one color\",\n \"Ok...\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"v1\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"spam\",\n \"ham\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 6
}
],
"source": [
"df = df[['v2','v1']]\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "12ef474a",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 147
},
"id": "12ef474a",
"outputId": "d9b84ab1-f142-4d31-d334-0a20677f4c72"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"v2 0\n",
"v1 0\n",
"dtype: int64"
],
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
"
\n",
"
\n",
"
0
\n",
"
\n",
" \n",
" \n",
"
\n",
"
v2
\n",
"
0
\n",
"
\n",
"
\n",
"
v1
\n",
"
0
\n",
"
\n",
" \n",
"
\n",
"
"
]
},
"metadata": {},
"execution_count": 7
}
],
"source": [
"df.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "776032f0",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "776032f0",
"outputId": "ba137f5e-66e2-4d11-9888-5fe20073492b"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" v2 v1\n",
"0 Go until jurong point, crazy.. Available only ... 0\n",
"1 Ok lar... Joking wif u oni... 0\n",
"2 Free entry in 2 a wkly comp to win FA Cup fina... 1\n",
"3 U dun say so early hor... U c already then say... 0\n",
"4 Nah I don't think he goes to usf, he lives aro... 0"
],
"text/html": [
"\n",
"
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.