import pandas as pd
import streamlit as st
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

# Load dataset
@st.cache_data
def load_data():
    # Replace with your dataset path or URL
    url = "spam.csv"
    
    df = pd.read_csv(url, encoding="latin-1")
    df = df.rename(columns={"v1": "label", "v2": "text"})  # Rename columns
    df = df[['text', 'label']]  # Keep only necessary columns
    df['label'] = df['label'].map({'spam': 'spam', 'ham': 'legit'})  # Standardize labels
    
    return df

# Load data
df = load_data()

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Build spam classifier model
model = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('classifier', MultinomialNB())
])

# Train the model
model.fit(X_train, y_train)

# Streamlit UI
st.title("Spam Filter Email Classifier")
st.write("This app classifies emails as **spam** or **legit** based on trained data.")

# File uploader for a custom dataset
uploaded_file = st.file_uploader("Upload your own spam dataset (CSV format)", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)
    if "text" in df.columns and "label" in df.columns:
        X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)
        model.fit(X_train, y_train)
        st.success("Custom dataset loaded and model retrained!")
    else:
        st.error("CSV file must contain 'text' and 'label' columns.")

# Text input for email classification
email_input = st.text_area("Enter email content:")
if st.button("Classify Email"):
    if email_input:
        prediction = model.predict([email_input])[0]
        st.subheader(f"The email is classified as: **{prediction}**")
    else:
        st.write("Please enter an email to classify.")