Spaces:

Alamgirapi
/

NoCodeTextClassifier

Sleeping

File size: 1,152 Bytes
from NoCodeTextClassifier.preprocessing import *
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from pathlib import Path
import joblib

# Input the email
text = input("Enter the Email: \n")

# load train data
train_path = Path("./ML Engineer/train.csv")
df = pd.read_csv(train_path)

# clean the text
currency_symbols = r'[\$\£\€\¥\₹\¢\₽\₩\₪]'  
text_cleaner = TextCleaner(currency_symbols)
df['clean_text'] = df['email'].apply(lambda x: text_cleaner.clean_text(x))

# fit the TfIdfVecotrizer with train data
vectorizer = TfidfVectorizer(max_features=10000)
X = vectorizer.fit(df['clean_text'])

# clean the input email
clean_text = str(text_cleaner.clean_text(text))
print(f"\nThe clean text is : {clean_text}")

# vectorize the clean email
y = vectorizer.transform([clean_text])

# Load the model from the file
loaded_model = joblib.load('email_detection_model.pkl')

# perform prediction of mail
predictions = int(loaded_model.predict(y)[0])
predictions = "spam" if predictions==1 else "not_spam"

# print the prediction
print(f"\nThe prediction is : {predictions}")