Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import streamlit as st | |
| from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.pipeline import Pipeline | |
| # Load dataset | |
| def load_data(): | |
| # Replace with your dataset path or URL | |
| url = "spam.csv" | |
| df = pd.read_csv(url, encoding="latin-1") | |
| df = df.rename(columns={"v1": "label", "v2": "text"}) # Rename columns | |
| df = df[['text', 'label']] # Keep only necessary columns | |
| df['label'] = df['label'].map({'spam': 'spam', 'ham': 'legit'}) # Standardize labels | |
| return df | |
| # Load data | |
| df = load_data() | |
| # Split dataset into training and testing sets | |
| X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42) | |
| # Build spam classifier model | |
| model = Pipeline([ | |
| ('vectorizer', CountVectorizer()), | |
| ('tfidf', TfidfTransformer()), | |
| ('classifier', MultinomialNB()) | |
| ]) | |
| # Train the model | |
| model.fit(X_train, y_train) | |
| # Streamlit UI | |
| st.title("Spam Filter Email Classifier") | |
| st.write("This app classifies emails as **spam** or **legit** based on trained data.") | |
| # File uploader for a custom dataset | |
| uploaded_file = st.file_uploader("Upload your own spam dataset (CSV format)", type=["csv"]) | |
| if uploaded_file: | |
| df = pd.read_csv(uploaded_file) | |
| if "text" in df.columns and "label" in df.columns: | |
| X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42) | |
| model.fit(X_train, y_train) | |
| st.success("Custom dataset loaded and model retrained!") | |
| else: | |
| st.error("CSV file must contain 'text' and 'label' columns.") | |
| # Text input for email classification | |
| email_input = st.text_area("Enter email content:") | |
| if st.button("Classify Email"): | |
| if email_input: | |
| prediction = model.predict([email_input])[0] | |
| st.subheader(f"The email is classified as: **{prediction}**") | |
| else: | |
| st.write("Please enter an email to classify.") | |