Spaces:

Tzetha
/

MachineLearning

Sleeping

App Files Files Community

Tzetha commited on Mar 4, 2025

Commit

04da40f

1 Parent(s): 4d3c78f

added database

Browse files

Files changed (2) hide show

app.py +36 -35
spam.csv +0 -0

app.py CHANGED Viewed

@@ -1,43 +1,30 @@
 import pandas as pd
-import numpy as np
 import streamlit as st
 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
 from sklearn.model_selection import train_test_split
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.pipeline import Pipeline
-from sklearn.metrics import accuracy_score, classification_report
-# Sample dataset (email, label)
-data = {'text': [
-    'Congratulations! You have won a free lottery ticket.',
-    'Important meeting scheduled for tomorrow.',
-    'Limited-time offer! Get a discount now!',
-    'Your bank account needs urgent verification.',
-    'Lunch meeting at 1 PM.',
-    'Win a free trip to the Bahamas!',
-    'Project deadline extended to next week.',
-    'Exclusive deal just for you! Buy now!',
-    'Reminder: Your doctor appointment is at 10 AM tomorrow.',
-    'Earn money fast with this simple trick!',
-    'Meeting rescheduled to 3 PM.',
-    'Verify your email to secure your account.',
-    'Huge discount on your favorite products!',
-    'Team outing planned for this weekend.',
-    'Act now! Limited seats available for the webinar.',
-    'Your order has been shipped successfully.',
-    'Congratulations! You have been selected for a special reward.',
-    'Last chance to claim your exclusive offer!',
-    'Monthly budget report attached.',
-    'Reminder: Submit your timesheet by Friday.'
-],
-'label': ['spam', 'legit', 'spam', 'spam', 'legit', 'spam', 'legit', 'spam', 'legit', 'spam', 'legit', 'spam', 'spam', 'legit', 'spam', 'legit', 'spam', 'spam', 'legit', 'legit']}
-df = pd.DataFrame(data)
-# Splitting data into training and testing sets
 X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)
-# Building the spam filter model using a pipeline
 model = Pipeline([
     ('vectorizer', CountVectorizer()),
     ('tfidf', TfidfTransformer()),
@@ -47,13 +34,27 @@ model = Pipeline([
 # Train the model
 model.fit(X_train, y_train)
-# Streamlit App
 st.title("Spam Filter Email Classifier")
 email_input = st.text_area("Enter email content:")
 if st.button("Classify Email"):
     if email_input:
         prediction = model.predict([email_input])[0]
-        st.write(f"The email is classified as: {prediction}")
     else:
-        st.write("Please enter an email to classify.")

 import pandas as pd
 import streamlit as st
 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
 from sklearn.model_selection import train_test_split
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.pipeline import Pipeline
+# Load dataset
+@st.cache_data
+def load_data():
+    # Replace with your dataset path or URL
+    url = "spam.csv"
+    df = pd.read_csv(url, encoding="latin-1")
+    df = df.rename(columns={"v1": "label", "v2": "text"})  # Rename columns
+    df = df[['text', 'label']]  # Keep only necessary columns
+    df['label'] = df['label'].map({'spam': 'spam', 'ham': 'legit'})  # Standardize labels
+    return df
+# Load data
+df = load_data()
+# Split dataset into training and testing sets
 X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)
+# Build spam classifier model
 model = Pipeline([
     ('vectorizer', CountVectorizer()),
     ('tfidf', TfidfTransformer()),
 # Train the model
 model.fit(X_train, y_train)
+# Streamlit UI
 st.title("Spam Filter Email Classifier")
+st.write("This app classifies emails as **spam** or **legit** based on trained data.")
+# File uploader for a custom dataset
+uploaded_file = st.file_uploader("Upload your own spam dataset (CSV format)", type=["csv"])
+if uploaded_file:
+    df = pd.read_csv(uploaded_file)
+    if "text" in df.columns and "label" in df.columns:
+        X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)
+        model.fit(X_train, y_train)
+        st.success("Custom dataset loaded and model retrained!")
+    else:
+        st.error("CSV file must contain 'text' and 'label' columns.")
+# Text input for email classification
 email_input = st.text_area("Enter email content:")
 if st.button("Classify Email"):
     if email_input:
         prediction = model.predict([email_input])[0]
+        st.subheader(f"The email is classified as: **{prediction}**")
     else:
+        st.write("Please enter an email to classify.")

spam.csv ADDED Viewed

The diff for this file is too large to render. See raw diff