{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "939855ce", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n" ] }, { "cell_type": "code", "execution_count": null, "id": "fba51ea4", "metadata": {}, "outputs": [], "source": [ "df=pd.read_csv(\"spam2.csv\",encoding=\"latin-1\")" ] }, { "cell_type": "code", "execution_count": null, "id": "07c9eb3d", "metadata": {}, "outputs": [], "source": [ "df.head(6)" ] }, { "cell_type": "code", "execution_count": null, "id": "f8c8f1d7", "metadata": {}, "outputs": [], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": null, "id": "fe0d6616", "metadata": {}, "outputs": [], "source": [ "df.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "b5865574", "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "5f96657e", "metadata": {}, "outputs": [], "source": [ "df = df.rename(columns={'v1': 'class', 'v2': 'message'})" ] }, { "cell_type": "code", "execution_count": null, "id": "4d3cdb2f", "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "0e196722", "metadata": {}, "outputs": [], "source": [ "df['class']=df['class'].map({'ham':0, 'spam':1})" ] }, { "cell_type": "code", "execution_count": null, "id": "7279b415", "metadata": {}, "outputs": [], "source": [ "df.head()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "06d05ab6", "metadata": {}, "outputs": [], "source": [ "from sklearn.feature_extraction.text import CountVectorizer" ] }, { "cell_type": "code", "execution_count": null, "id": "dcba8c4a", "metadata": {}, "outputs": [], "source": [ "cv=CountVectorizer()" ] }, { "cell_type": "code", "execution_count": null, "id": "cb354066", "metadata": {}, "outputs": [], "source": [ "x=df['message']" ] }, { "cell_type": "code", "execution_count": null, "id": "a64ff973", "metadata": {}, "outputs": [], "source": [ "y=df['class']" ] }, { "cell_type": "code", "execution_count": null, "id": "ea3ad8e3", "metadata": {}, "outputs": [], "source": [ "x.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "df685f7b", "metadata": {}, "outputs": [], "source": [ "y.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "ed8a1948", "metadata": {}, "outputs": [], "source": [ "x=cv.fit_transform(x)" ] }, { "cell_type": "code", "execution_count": null, "id": "dca72cdb", "metadata": {}, "outputs": [], "source": [ "x" ] }, { "cell_type": "markdown", "id": "4f96137d", "metadata": {}, "source": [ "1.The Cat\n", "2.The Dog \n", "3.The rat\n", "\n", " The Cat Dog rat\n", "1. 1 1 0 0\n", "2. 1 0 1 0\n", "3. 1 0 0 1\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "877394e3", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n" ] }, { "cell_type": "code", "execution_count": null, "id": "563bd95e", "metadata": {}, "outputs": [], "source": [ "x_train, x_test, y_train, y_test=train_test_split(x,y, test_size=0.2)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "81b47b81", "metadata": {}, "outputs": [], "source": [ "x_train.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "1b33ace6", "metadata": { "scrolled": true }, "outputs": [], "source": [ "from sklearn.naive_bayes import MultinomialNB" ] }, { "cell_type": "code", "execution_count": null, "id": "829d8553", "metadata": {}, "outputs": [], "source": [ "model=MultinomialNB()" ] }, { "cell_type": "code", "execution_count": null, "id": "e4e5310d", "metadata": {}, "outputs": [], "source": [ "model.fit(x_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "id": "295a5529", "metadata": {}, "outputs": [], "source": [ "result=model .score(x_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "98b97f5a", "metadata": {}, "outputs": [], "source": [ "result*100\n" ] }, { "cell_type": "code", "execution_count": null, "id": "9fa0997c", "metadata": {}, "outputs": [], "source": [ "import pickle" ] }, { "cell_type": "code", "execution_count": null, "id": "920727b0", "metadata": {}, "outputs": [], "source": [ "pickle.dump(model,open(\"spam2.pkl\",\"wb\"))" ] }, { "cell_type": "code", "execution_count": null, "id": "5e7e327a", "metadata": {}, "outputs": [], "source": [ "pickle.dump(cv,open(\"vectorizer.pkl\",\"wb\"))" ] }, { "cell_type": "code", "execution_count": null, "id": "512372fc", "metadata": {}, "outputs": [], "source": [ "clf=pickle.load(open(\"spam2.pkl\",\"rb\"))" ] }, { "cell_type": "code", "execution_count": null, "id": "4fae6f43", "metadata": {}, "outputs": [], "source": [ "clf" ] }, { "cell_type": "code", "execution_count": null, "id": "d8a6869c", "metadata": {}, "outputs": [], "source": [ "msg =\"hi Sandeep Yu are a hero\"\n", "df=[msg]\n", "vect=cv.transform(df).toarray()\n", "result= model.predict(vect)\n", "print(result)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }