Pushp123 commited on
Commit
ddc394e
·
verified ·
1 Parent(s): 0375059

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import numpy as np
2
  import pandas as pd
3
  from sklearn.model_selection import train_test_split
@@ -6,20 +8,23 @@ from sklearn.linear_model import LogisticRegression
6
  from sklearn.metrics import accuracy_score
7
  import gradio as gr
8
 
9
- df=pd.read_csv("mail_data.csv")
10
 
 
11
 
12
  df.loc[df["Category"]=="spam","Category",]=0
13
  df.loc[df["Category"]=="ham","Category",]=1
14
 
 
 
15
  x=df["Message"]
16
  y=df["Category"]
17
 
18
-
19
-
20
 
21
  x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)
22
 
 
23
  feature_extraction=TfidfVectorizer(min_df=1,stop_words="english",lowercase=True)
24
 
25
  x_train_features = feature_extraction.fit_transform(x_train)
@@ -31,24 +36,16 @@ y_test = y_test.astype("int")
31
 
32
 
33
  model=LogisticRegression()
34
-
35
  model.fit(x_train_features,y_train)
36
 
37
-
38
-
39
  x_predict=model.predict(x_train_features)
40
  x_accuracy=accuracy_score(x_predict,y_train)
41
 
42
-
43
  y_predict=model.predict(x_test_features)
44
  y_accuracy=accuracy_score(y_predict,y_test)
45
 
46
-
47
-
48
-
49
-
50
-
51
-
52
 
53
  # Function to predict whether the email is spam or ham
54
  def classify_email(email_text):
 
1
+ #1. Importing Lib
2
+
3
  import numpy as np
4
  import pandas as pd
5
  from sklearn.model_selection import train_test_split
 
8
  from sklearn.metrics import accuracy_score
9
  import gradio as gr
10
 
11
+ #2. Data Preprocessing
12
 
13
+ df=pd.read_csv("mail_data.csv")
14
 
15
  df.loc[df["Category"]=="spam","Category",]=0
16
  df.loc[df["Category"]=="ham","Category",]=1
17
 
18
+ # Spliting Data into xand y
19
+
20
  x=df["Message"]
21
  y=df["Category"]
22
 
23
+ #3. Modeling Part
 
24
 
25
  x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)
26
 
27
+ # Features extractions using TfidfVectorizer
28
  feature_extraction=TfidfVectorizer(min_df=1,stop_words="english",lowercase=True)
29
 
30
  x_train_features = feature_extraction.fit_transform(x_train)
 
36
 
37
 
38
  model=LogisticRegression()
39
+ # Trains the model only at Train data features
40
  model.fit(x_train_features,y_train)
41
 
 
 
42
  x_predict=model.predict(x_train_features)
43
  x_accuracy=accuracy_score(x_predict,y_train)
44
 
 
45
  y_predict=model.predict(x_test_features)
46
  y_accuracy=accuracy_score(y_predict,y_test)
47
 
48
+ #4. UI For Model
 
 
 
 
 
49
 
50
  # Function to predict whether the email is spam or ham
51
  def classify_email(email_text):