File size: 952 Bytes
8350ed6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from fastapi import FastAPI
from pydantic import BaseModel
import joblib


model = joblib.load("model.joblib")
tfidf_vectorizer = joblib.load("tfidf_vectorizer.joblib")


class TextInput(BaseModel):
    text: str


app = FastAPI()


@app.post("/predict")
def predict(input: TextInput):
   
    processed_text = preprocess_text(input.text)
    
    
    text_tfidf = tfidf_vectorizer.transform([processed_text])
   
    prediction = model.predict(text_tfidf)
    
    return {"prediction": "Spam" if int(prediction[0]) == 0 else "Ham"}

def preprocess_text(text):
    import re
    from nltk.stem import WordNetLemmatizer
    from nltk.corpus import stopwords
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))
    
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower()
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return ' '.join(words)