Tzetha commited on
Commit
04da40f
·
1 Parent(s): 4d3c78f

added database

Browse files
Files changed (2) hide show
  1. app.py +36 -35
  2. spam.csv +0 -0
app.py CHANGED
@@ -1,43 +1,30 @@
1
  import pandas as pd
2
- import numpy as np
3
  import streamlit as st
4
  from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
5
  from sklearn.model_selection import train_test_split
6
  from sklearn.naive_bayes import MultinomialNB
7
  from sklearn.pipeline import Pipeline
8
- from sklearn.metrics import accuracy_score, classification_report
9
-
10
- # Sample dataset (email, label)
11
- data = {'text': [
12
- 'Congratulations! You have won a free lottery ticket.',
13
- 'Important meeting scheduled for tomorrow.',
14
- 'Limited-time offer! Get a discount now!',
15
- 'Your bank account needs urgent verification.',
16
- 'Lunch meeting at 1 PM.',
17
- 'Win a free trip to the Bahamas!',
18
- 'Project deadline extended to next week.',
19
- 'Exclusive deal just for you! Buy now!',
20
- 'Reminder: Your doctor appointment is at 10 AM tomorrow.',
21
- 'Earn money fast with this simple trick!',
22
- 'Meeting rescheduled to 3 PM.',
23
- 'Verify your email to secure your account.',
24
- 'Huge discount on your favorite products!',
25
- 'Team outing planned for this weekend.',
26
- 'Act now! Limited seats available for the webinar.',
27
- 'Your order has been shipped successfully.',
28
- 'Congratulations! You have been selected for a special reward.',
29
- 'Last chance to claim your exclusive offer!',
30
- 'Monthly budget report attached.',
31
- 'Reminder: Submit your timesheet by Friday.'
32
- ],
33
- 'label': ['spam', 'legit', 'spam', 'spam', 'legit', 'spam', 'legit', 'spam', 'legit', 'spam', 'legit', 'spam', 'spam', 'legit', 'spam', 'legit', 'spam', 'spam', 'legit', 'legit']}
34
-
35
- df = pd.DataFrame(data)
36
-
37
- # Splitting data into training and testing sets
38
  X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)
39
 
40
- # Building the spam filter model using a pipeline
41
  model = Pipeline([
42
  ('vectorizer', CountVectorizer()),
43
  ('tfidf', TfidfTransformer()),
@@ -47,13 +34,27 @@ model = Pipeline([
47
  # Train the model
48
  model.fit(X_train, y_train)
49
 
50
- # Streamlit App
51
  st.title("Spam Filter Email Classifier")
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
 
53
  email_input = st.text_area("Enter email content:")
54
  if st.button("Classify Email"):
55
  if email_input:
56
  prediction = model.predict([email_input])[0]
57
- st.write(f"The email is classified as: {prediction}")
58
  else:
59
- st.write("Please enter an email to classify.")
 
1
  import pandas as pd
 
2
  import streamlit as st
3
  from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
4
  from sklearn.model_selection import train_test_split
5
  from sklearn.naive_bayes import MultinomialNB
6
  from sklearn.pipeline import Pipeline
7
+
8
+ # Load dataset
9
+ @st.cache_data
10
+ def load_data():
11
+ # Replace with your dataset path or URL
12
+ url = "spam.csv"
13
+
14
+ df = pd.read_csv(url, encoding="latin-1")
15
+ df = df.rename(columns={"v1": "label", "v2": "text"}) # Rename columns
16
+ df = df[['text', 'label']] # Keep only necessary columns
17
+ df['label'] = df['label'].map({'spam': 'spam', 'ham': 'legit'}) # Standardize labels
18
+
19
+ return df
20
+
21
+ # Load data
22
+ df = load_data()
23
+
24
+ # Split dataset into training and testing sets
 
 
 
 
 
 
 
 
 
 
 
 
25
  X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)
26
 
27
+ # Build spam classifier model
28
  model = Pipeline([
29
  ('vectorizer', CountVectorizer()),
30
  ('tfidf', TfidfTransformer()),
 
34
  # Train the model
35
  model.fit(X_train, y_train)
36
 
37
+ # Streamlit UI
38
  st.title("Spam Filter Email Classifier")
39
+ st.write("This app classifies emails as **spam** or **legit** based on trained data.")
40
+
41
+ # File uploader for a custom dataset
42
+ uploaded_file = st.file_uploader("Upload your own spam dataset (CSV format)", type=["csv"])
43
+
44
+ if uploaded_file:
45
+ df = pd.read_csv(uploaded_file)
46
+ if "text" in df.columns and "label" in df.columns:
47
+ X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)
48
+ model.fit(X_train, y_train)
49
+ st.success("Custom dataset loaded and model retrained!")
50
+ else:
51
+ st.error("CSV file must contain 'text' and 'label' columns.")
52
 
53
+ # Text input for email classification
54
  email_input = st.text_area("Enter email content:")
55
  if st.button("Classify Email"):
56
  if email_input:
57
  prediction = model.predict([email_input])[0]
58
+ st.subheader(f"The email is classified as: **{prediction}**")
59
  else:
60
+ st.write("Please enter an email to classify.")
spam.csv ADDED
The diff for this file is too large to render. See raw diff