import gradio as gr import joblib import re import pandas as pd from nltk.corpus import stopwords from nltk.stem import PorterStemmer import nltk # ----------------------------- # 1️⃣ Prétraitement # ----------------------------- nltk.download('stopwords') stop_words = set(stopwords.words('english')) stemmer = PorterStemmer() def preprocess_message(text): """ Prétraitement générique pour messages inconnus (nouveaux messages à prédire) : garde ponctuation utile pour spam """ if pd.isna(text): return "" text = text.lower() text = re.sub(r'http\S+|www\S+', '', text) text = re.sub(r'\S+@\S+', '', text) text = re.sub(r'\+?\d[\d -]{8,}\d', '', text) text = re.sub(r'\d+', '', text) # garder ponctuation typique spam text = re.sub(r'[^a-z\s!/+>]', '', text) words = [stemmer.stem(word) for word in text.split() if word not in stop_words] return " ".join(words) # ----------------------------- # 2️⃣ Chargement du modèle # ----------------------------- model = joblib.load("spam_model.pkl") vectorizer = joblib.load("tfidf_vectorizer.pkl") # ----------------------------- # 3️⃣ Fonction de prédiction # ----------------------------- def predict_message(message): cleaned = preprocess_message(message) X = vectorizer.transform([cleaned]) prediction = model.predict(X)[0] probability = model.predict_proba(X)[0][1] if hasattr(model, 'predict_proba') else None return { "Message": message, "Prediction": prediction, "Spam Probability": round(float(probability), 4) if probability is not None else None } # ----------------------------- # 4️⃣ Interface Gradio # ----------------------------- iface = gr.Interface( fn=predict_message, inputs=gr.Textbox(lines=3, placeholder="Entrez votre message ici..."), outputs="json", title="📩 Spam Detector", description="Un modèle ML qui détecte si un message est SPAM ou HAM." ) if __name__ == "__main__": iface.launch()