{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "939855ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fba51ea4",
   "metadata": {},
   "outputs": [],
   "source": [
    "df=pd.read_csv(\"spam2.csv\",encoding=\"latin-1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "07c9eb3d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head(6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f8c8f1d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fe0d6616",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5865574",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5f96657e",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.rename(columns={'v1': 'class', 'v2': 'message'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d3cdb2f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0e196722",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['class']=df['class'].map({'ham':0, 'spam':1})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7279b415",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "06d05ab6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import CountVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dcba8c4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "cv=CountVectorizer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb354066",
   "metadata": {},
   "outputs": [],
   "source": [
    "x=df['message']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a64ff973",
   "metadata": {},
   "outputs": [],
   "source": [
    "y=df['class']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ea3ad8e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "x.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "df685f7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed8a1948",
   "metadata": {},
   "outputs": [],
   "source": [
    "x=cv.fit_transform(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dca72cdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4f96137d",
   "metadata": {},
   "source": [
    "1.The Cat\n",
    "2.The Dog \n",
    "3.The rat\n",
    "\n",
    "   The Cat Dog rat\n",
    "1.   1   1   0   0\n",
    "2.    1    0   1  0\n",
    "3.    1    0   0  1\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "877394e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection  import train_test_split\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "563bd95e",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train, x_test, y_train, y_test=train_test_split(x,y, test_size=0.2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "81b47b81",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1b33ace6",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from sklearn.naive_bayes import MultinomialNB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "829d8553",
   "metadata": {},
   "outputs": [],
   "source": [
    "model=MultinomialNB()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e4e5310d",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "295a5529",
   "metadata": {},
   "outputs": [],
   "source": [
    "result=model .score(x_train, y_train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98b97f5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "result*100\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9fa0997c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "920727b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "pickle.dump(model,open(\"spam2.pkl\",\"wb\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e7e327a",
   "metadata": {},
   "outputs": [],
   "source": [
    "pickle.dump(cv,open(\"vectorizer.pkl\",\"wb\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "512372fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "clf=pickle.load(open(\"spam2.pkl\",\"rb\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4fae6f43",
   "metadata": {},
   "outputs": [],
   "source": [
    "clf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8a6869c",
   "metadata": {},
   "outputs": [],
   "source": [
    "msg =\"hi Sandeep Yu are a hero\"\n",
    "df=[msg]\n",
    "vect=cv.transform(df).toarray()\n",
    "result= model.predict(vect)\n",
    "print(result)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}