Anwaree's picture
Create app.py
33690b7 verified
# -----------------------------
# app.py pour déploiement Spam Detector
# -----------------------------
import gradio as gr
import joblib
import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import nltk
nltk.download('stopwords')
# -----------------------------
# 1️⃣ Prétraitement d'un message
# -----------------------------
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()
def preprocess_message(text):
"""Prétraite un message pour qu'il corresponde au format du modèle."""
if not text:
return ""
text = text.lower()
text = re.sub(r'http\S+|www\S+', '', text)
text = re.sub(r'\S+@\S+', '', text)
text = re.sub(r'\+?\d[\d -]{8,}\d', '', text)
text = re.sub(r'\d+', '', text)
text = re.sub(r'[^a-z\s!/+>]', '', text) # garder ponctuation utile pour spam
words = [stemmer.stem(word) for word in text.split() if word not in stop_words]
return " ".join(words)
# -----------------------------
# 2️⃣ Charger modèle et TF-IDF
# -----------------------------
model = joblib.load("spam_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")
# -----------------------------
# 3️⃣ Fonction de prédiction
# -----------------------------
def predict_message(message):
cleaned = preprocess_message(message)
X = vectorizer.transform([cleaned])
prediction = model.predict(X)[0]
probability = model.predict_proba(X)[0][1] if hasattr(model, 'predict_proba') else None
return {
"Message": message,
"Prediction": prediction,
"Spam Probability": round(float(probability), 4) if probability is not None else None
}
# -----------------------------
# 4️⃣ Interface Gradio
# -----------------------------
iface = gr.Interface(
fn=predict_message,
inputs=gr.Textbox(lines=3, placeholder="Entrez votre message..."),
outputs="json",
title="📩 Spam Detector",
description="Entrez un message pour savoir s'il s'agit de spam ou non."
)
# -----------------------------
# 5️⃣ Lancer l'application
# -----------------------------
if __name__ == "__main__":
iface.launch()