File size: 1,152 Bytes
7830025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from NoCodeTextClassifier.preprocessing import *
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from pathlib import Path
import joblib

# Input the email
text = input("Enter the Email: \n")

# load train data
train_path = Path("./ML Engineer/train.csv")
df = pd.read_csv(train_path)

# clean the text
currency_symbols = r'[\$\£\€\¥\₹\¢\₽\₩\₪]'  
text_cleaner = TextCleaner(currency_symbols)
df['clean_text'] = df['email'].apply(lambda x: text_cleaner.clean_text(x))

# fit the TfIdfVecotrizer with train data
vectorizer = TfidfVectorizer(max_features=10000)
X = vectorizer.fit(df['clean_text'])

# clean the input email
clean_text = str(text_cleaner.clean_text(text))
print(f"\nThe clean text is : {clean_text}")

# vectorize the clean email
y = vectorizer.transform([clean_text])

# Load the model from the file
loaded_model = joblib.load('email_detection_model.pkl')

# perform prediction of mail
predictions = int(loaded_model.predict(y)[0])
predictions = "spam" if predictions==1 else "not_spam"

# print the prediction
print(f"\nThe prediction is : {predictions}")