kaisarhossain commited on
Commit
6ae5e44
·
verified ·
1 Parent(s): 8bb4891

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -63
app.py CHANGED
@@ -3,58 +3,24 @@ import torch
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  from dotenv import load_dotenv
5
  import os
6
- from huggingface_hub import login
7
  import numpy as np
 
 
8
 
 
 
 
9
  st.set_page_config(
10
- page_title="Smart Email Classification App",
11
  page_icon="📧",
12
  layout="wide",
13
- initial_sidebar_state="collapsed"
14
  )
15
 
16
- st.title("Smart Email Classification App")
17
-
18
- # 1. Load Environment Variables
19
- load_dotenv()
20
- HF_TOKEN = os.getenv("HF_TOKEN")
21
- MODEL_REPO = os.getenv("MODEL_REPO", "kaisarhossain/email_classifier_model") # default fallback
22
-
23
- # Login to Hugging Face (optional if model is public)
24
- if HF_TOKEN:
25
- login(token=HF_TOKEN)
26
-
27
- # 2. Load Model & Tokenizer
28
- @st.cache_resource(show_spinner=True)
29
- def load_model():
30
- tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
31
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO)
32
- return tokenizer, model
33
-
34
- tokenizer, model = load_model()
35
-
36
- # Define the label schema
37
- LABELS = [
38
- "Promotions",
39
- "Spam",
40
- "Social Media Updates",
41
- "Forum Updates",
42
- "Code Verification",
43
- "Work Updates"
44
- ]
45
-
46
- # 3. Streamlit App Configuration
47
-
48
-
49
- # st.set_page_config(
50
- # page_title="Smart Email Classification App",
51
- # page_icon="📧",
52
- # layout="wide",
53
- # initial_sidebar_state="collapsed"
54
- # )
55
-
56
- # st.title("Smart Email Classification App")
57
-
58
  st.markdown("""
59
  <style>
60
  body {
@@ -70,30 +36,136 @@ st.markdown("""
70
  text-align: center;
71
  color: #444;
72
  font-size: 1.1rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  }
74
  .prediction-card {
75
  background-color: white;
76
  border-radius: 12px;
77
  padding: 20px;
78
  box-shadow: 0 4px 20px rgba(0,0,0,0.1);
79
- margin-top: 20px;
80
  }
81
  </style>
82
  """, unsafe_allow_html=True)
83
 
 
 
 
84
  st.markdown("<h1 class='main-title'>📧 Smart Email Classifier</h1>", unsafe_allow_html=True)
85
- st.markdown("<p class='sub-title'>The Smart Email Classifier application is capable of classifying emails of different types based on email subject or body using advanced Natural Language Processing (NLP) techniques and using a fine-tuned NLP model.</p>", unsafe_allow_html=True)
86
-
87
- # 4. User Input Section
88
- with st.container():
89
- st.subheader("✉️ Enter email text (subject/body) for classification:")
90
- email_text = st.text_area(
91
- "Enter Email Text Below:",
92
- placeholder="Example: Your code for verification is 123456 or Meeting scheduled for 3 PM today.",
93
- height=150
94
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
- # 5. Prediction Logic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def classify_email(text):
98
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256)
99
  with torch.no_grad():
@@ -103,7 +175,55 @@ def classify_email(text):
103
  confidence = probs[0][predicted_idx].item()
104
  return LABELS[predicted_idx], confidence, probs[0].numpy()
105
 
106
- # 6. Predict Button
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  if st.button("🔍 Classify Email"):
108
  if not email_text.strip():
109
  st.warning("⚠️ Please enter email text before classifying.")
@@ -116,18 +236,18 @@ if st.button("🔍 Classify Email"):
116
  st.markdown(f"**Confidence:** {confidence * 100:.2f}%")
117
  st.progress(confidence)
118
 
119
- # Display class probabilities in a bar chart
120
- st.markdown("#### 📊 Category Probabilities:")
121
  prob_dict = {LABELS[i]: float(all_probs[i]) for i in range(len(LABELS))}
 
122
  st.bar_chart(prob_dict)
123
  st.markdown("</div>", unsafe_allow_html=True)
124
 
125
- # 7. Footer
 
 
126
  st.markdown("---")
127
  st.markdown("""
128
  <p style='text-align: left; color: gray; font-size: 0.9rem'>
129
  Built for CSC-546: Natural Language Processing (Smart Email Classification Project) |
130
- Built by: Mohammed Golam Kaisar Hossain Bhuyan (hossainbhuyan@cua.edu)
131
  </p>
132
-
133
  """, unsafe_allow_html=True)
 
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  from dotenv import load_dotenv
5
  import os
6
+ import random
7
  import numpy as np
8
+ import pandas as pd
9
+ from huggingface_hub import login
10
 
11
+ # ----------------------------
12
+ # 1. Streamlit App Configuration
13
+ # ----------------------------
14
  st.set_page_config(
15
+ page_title="Email Classifier using NLP",
16
  page_icon="📧",
17
  layout="wide",
18
+ initial_sidebar_state="expanded"
19
  )
20
 
21
+ # ----------------------------
22
+ # 2. Custom Styling
23
+ # ----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  st.markdown("""
25
  <style>
26
  body {
 
36
  text-align: center;
37
  color: #444;
38
  font-size: 1.1rem;
39
+ margin-bottom: 40px;
40
+ }
41
+ .email-list {
42
+ background-color: white;
43
+ border-radius: 10px;
44
+ padding: 10px;
45
+ height: 500px;
46
+ overflow-y: auto;
47
+ box-shadow: 0 4px 20px rgba(0,0,0,0.1);
48
+ }
49
+ .email-item {
50
+ padding: 10px;
51
+ border-bottom: 1px solid #eee;
52
+ cursor: pointer;
53
+ }
54
+ .email-item:hover {
55
+ background-color: #f5f5f5;
56
+ }
57
+ .email-content {
58
+ background-color: white;
59
+ border-radius: 10px;
60
+ padding: 20px;
61
+ box-shadow: 0 4px 20px rgba(0,0,0,0.1);
62
+ height: 500px;
63
+ overflow-y: auto;
64
  }
65
  .prediction-card {
66
  background-color: white;
67
  border-radius: 12px;
68
  padding: 20px;
69
  box-shadow: 0 4px 20px rgba(0,0,0,0.1);
 
70
  }
71
  </style>
72
  """, unsafe_allow_html=True)
73
 
74
+ # ----------------------------
75
+ # 3. App Header
76
+ # ----------------------------
77
  st.markdown("<h1 class='main-title'>📧 Smart Email Classifier</h1>", unsafe_allow_html=True)
78
+ st.markdown("<p class='sub-title'>Smart Email Classification App is an advanced Natural Language Processing (NLP) and Deep Learning project designed to automate email intent classification. The application is capable of categorizing emails into six widely-used categories: Promotions, Spam, Social Media Updates, Forum Updates, Code Verification, and Work Updates.</p>", unsafe_allow_html=True)
79
+
80
+ # ----------------------------
81
+ # 4. Sidebar: Model Selection
82
+ # ----------------------------
83
+ st.sidebar.header("⚙️ Model Configuration")
84
+
85
+ # Define model options
86
+ model_options = {
87
+ "DistilBERT (Fine-tuned) 1": "kaisarhossain/email-classifier-distilbert-finetuned-kaisar",
88
+ "DistilBERT (Fine-tuned) 2": "kaisarhossain/email_classifier_model"
89
+ }
90
+
91
+ model_choice = st.sidebar.selectbox("Select Model", list(model_options.keys()))
92
+ MODEL_REPO = model_options[model_choice]
93
+ st.sidebar.info(f"Using model: {MODEL_REPO}")
94
+
95
+ # ----------------------------
96
+ # 5. Environment Variables & Authentication
97
+ # ----------------------------
98
+ load_dotenv()
99
+ HF_TOKEN = os.getenv("HF_TOKEN")
100
+
101
+ if HF_TOKEN:
102
+ try:
103
+ login(token=HF_TOKEN)
104
+ except Exception as e:
105
+ st.sidebar.warning("⚠️ Unable to authenticate with Hugging Face token.")
106
+ st.sidebar.write(e)
107
+
108
+ # ----------------------------
109
+ # 6. Load Model Dynamically
110
+ # ----------------------------
111
+ @st.cache_resource(show_spinner=True)
112
+ def load_model(model_repo):
113
+ tokenizer = AutoTokenizer.from_pretrained(model_repo)
114
+ model = AutoModelForSequenceClassification.from_pretrained(model_repo)
115
+ return tokenizer, model
116
+
117
+ try:
118
+ tokenizer, model = load_model(MODEL_REPO)
119
+ except Exception as e:
120
+ st.error(f"❌ Failed to load model from {MODEL_REPO}")
121
+ st.exception(e)
122
+ st.stop()
123
 
124
+ # ----------------------------
125
+ # 7. Labels and Dummy Inbox
126
+ # ----------------------------
127
+ LABELS = [
128
+ "Promotions",
129
+ "Spam",
130
+ "Social Media Updates",
131
+ "Forum Updates",
132
+ "Code Verification",
133
+ "Work Updates"
134
+ ]
135
+
136
+ dummy_subjects = {
137
+ "Promotions": ["50% OFF Today Only!", "Your Exclusive Coupon Awaits", "Flash Sale on Electronics"],
138
+ "Spam": ["Claim your free reward!", "Win an iPhone 15 now!", "You’ve been selected!"],
139
+ "Social Media Updates": ["New friend request on Facebook", "Someone mentioned you on Twitter", "New followers on Instagram"],
140
+ "Forum Updates": ["Your Stack Overflow answer received upvotes", "New discussion thread in Data Science Forum", "Python 3.12 update discussion"],
141
+ "Code Verification": ["Your verification code is 482915", "Confirm login attempt", "Verify your new device"],
142
+ "Work Updates": ["Meeting rescheduled for 3 PM", "Project deadline extended", "Client feedback received"]
143
+ }
144
+
145
+ dummy_bodies = {
146
+ "Promotions": "Get up to 70% off on your favorite brands. Offer valid for a limited time only!",
147
+ "Spam": "Click this link to win cash prizes. Limited slots available!",
148
+ "Social Media Updates": "You have new notifications and updates from your social media network.",
149
+ "Forum Updates": "A new reply has been posted to a thread you are following.",
150
+ "Code Verification": "Enter this code in the app to verify your login session.",
151
+ "Work Updates": "Please find attached the meeting notes and next steps for the team."
152
+ }
153
+
154
+ # Generate dummy Gmail inbox
155
+ random.seed(42)
156
+ inbox_data = []
157
+ for _ in range(100):
158
+ label = random.choice(LABELS)
159
+ inbox_data.append({
160
+ "Category": label,
161
+ "Subject": random.choice(dummy_subjects[label]),
162
+ "Body": dummy_bodies[label]
163
+ })
164
+ inbox_df = pd.DataFrame(inbox_data)
165
+
166
+ # ----------------------------
167
+ # 8. Classification Function
168
+ # ----------------------------
169
  def classify_email(text):
170
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256)
171
  with torch.no_grad():
 
175
  confidence = probs[0][predicted_idx].item()
176
  return LABELS[predicted_idx], confidence, probs[0].numpy()
177
 
178
+ # ----------------------------
179
+ # 9. Gmail-like Layout
180
+ # ----------------------------
181
+ st.markdown("## 📥 Inbox")
182
+ st.markdown("---")
183
+ col1, col2, col3 = st.columns([2, 3, 2])
184
+
185
+ with col1:
186
+ st.markdown("#### 📩 Inbox")
187
+ selected_email = st.radio(
188
+ "Choose an email to view:",
189
+ range(len(inbox_df)),
190
+ format_func=lambda i: inbox_df.iloc[i]["Subject"],
191
+ label_visibility="collapsed",
192
+ index=None
193
+ )
194
+
195
+ with col2:
196
+ st.markdown("#### ✉️ Email Details")
197
+ if selected_email is not None:
198
+ selected_row = inbox_df.iloc[selected_email]
199
+ st.markdown(f"**Subject:** {selected_row['Subject']}")
200
+ st.markdown(f"**Body:** {selected_row['Body']}")
201
+ else:
202
+ st.info("📩 Select an email from the inbox to view details.")
203
+
204
+ with col3:
205
+ st.markdown("#### 📊 Classification Result")
206
+ if selected_email is not None:
207
+ text = inbox_df.iloc[selected_email]["Subject"] + " " + inbox_df.iloc[selected_email]["Body"]
208
+ predicted_label, confidence, all_probs = classify_email(text)
209
+ st.markdown(f"**Predicted Category:** {predicted_label}")
210
+ st.markdown(f"**Confidence:** {confidence * 100:.2f}%")
211
+ prob_dict = {LABELS[i]: float(all_probs[i]) for i in range(len(LABELS))}
212
+ st.bar_chart(prob_dict)
213
+ else:
214
+ st.warning("Select an email to see classification results.")
215
+
216
+ # ----------------------------
217
+ # 10. Manual Custom Email Input
218
+ # ----------------------------
219
+ st.markdown("---")
220
+ st.subheader("✉️ Enter email text (subject/body) for classification:")
221
+ email_text = st.text_area(
222
+ "Enter Email Text Below:",
223
+ placeholder="Example: Your code for verification is 123456 or Meeting scheduled for 3 PM today.",
224
+ height=150
225
+ )
226
+
227
  if st.button("🔍 Classify Email"):
228
  if not email_text.strip():
229
  st.warning("⚠️ Please enter email text before classifying.")
 
236
  st.markdown(f"**Confidence:** {confidence * 100:.2f}%")
237
  st.progress(confidence)
238
 
 
 
239
  prob_dict = {LABELS[i]: float(all_probs[i]) for i in range(len(LABELS))}
240
+ st.markdown("#### 📊 Category Probabilities:")
241
  st.bar_chart(prob_dict)
242
  st.markdown("</div>", unsafe_allow_html=True)
243
 
244
+ # ----------------------------
245
+ # 11. Footer
246
+ # ----------------------------
247
  st.markdown("---")
248
  st.markdown("""
249
  <p style='text-align: left; color: gray; font-size: 0.9rem'>
250
  Built for CSC-546: Natural Language Processing (Smart Email Classification Project) |
251
+ Developed by: Mohammed Golam Kaisar Hossain Bhuyan (hossainbhuyan@cua.edu)
252
  </p>
 
253
  """, unsafe_allow_html=True)