Hameed13 commited on
Commit
bec097b
·
1 Parent(s): 81af064

Added app files

Browse files
Files changed (5) hide show
  1. app.py +70 -0
  2. count_vectorizer.pkl +3 -0
  3. naive_bayes_model.pkl +3 -0
  4. requirements.txt +5 -0
  5. vscode.txt +2 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import joblib
4
+
5
+ # Load your saved model and vectorizer
6
+ model = joblib.load('naive_bayes_model.pkl')
7
+ vectorizer = joblib.load('count_vectorizer.pkl')
8
+
9
+ # Streamlit App Title
10
+ st.title("Text Classification App")
11
+ st.write("This app classifies text into 'Spam' or 'Not Spam' categories. You can input text or upload a CSV file for bulk classification.")
12
+
13
+ # Section: Single Text Input
14
+ st.header("Single Text Classification")
15
+ user_input = st.text_area("Enter your text here:")
16
+
17
+ if st.button("Classify"):
18
+ if user_input.strip():
19
+ # Preprocess the input
20
+ input_vector = vectorizer.transform([user_input])
21
+ prediction = model.predict(input_vector)
22
+ confidence = model.predict_proba(input_vector).max() * 100
23
+
24
+ # Display results
25
+ st.write(f"Prediction: {'Spam' if prediction[0] == 1 else 'Not Spam'}")
26
+ st.write(f"Confidence Score: {confidence:.2f}%")
27
+ else:
28
+ st.warning("Please enter some text for classification.")
29
+
30
+ # Section: Bulk File Classification
31
+ st.header("Bulk Classification from CSV")
32
+ uploaded_file = st.file_uploader("Upload a CSV file for bulk classification", type=["csv"])
33
+
34
+ if uploaded_file:
35
+ try:
36
+ # Attempt to read the file with the correct encoding
37
+ df = pd.read_csv(uploaded_file, encoding='latin1') # Use 'latin1' encoding for non-UTF-8 files
38
+ if 'Text' in df.columns:
39
+ input_vectors = vectorizer.transform(df['Text'])
40
+ df['Prediction'] = model.predict(input_vectors)
41
+ df['Confidence Score'] = model.predict_proba(input_vectors).max(axis=1) * 100
42
+
43
+ # Display first few predictions
44
+ st.write("First few predictions:")
45
+ st.write(df.head())
46
+
47
+ # Visualization: Prediction distribution
48
+ st.subheader("Prediction Distribution")
49
+ st.bar_chart(df['Prediction'].value_counts())
50
+
51
+ # Confidence threshold filter
52
+ st.subheader("Filter Results by Confidence Score")
53
+ threshold = st.slider("Confidence Threshold", 0, 100, 50)
54
+ filtered_df = df[df['Confidence Score'] >= threshold]
55
+ st.write("Filtered Results:")
56
+ st.write(filtered_df)
57
+
58
+ # Download filtered predictions
59
+ st.subheader("Download Filtered Predictions")
60
+ csv = filtered_df.to_csv(index=False)
61
+ st.download_button(
62
+ label="Download Filtered Predictions",
63
+ data=csv,
64
+ file_name="filtered_predictions.csv",
65
+ mime="text/csv"
66
+ )
67
+ else:
68
+ st.error("The uploaded file must contain a 'text' column.")
69
+ except Exception as e:
70
+ st.error(f"An error occurred while processing the file: {e}")
count_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0a3641d27bf9a3dc8ac9ca3cea1a7019f934c47f9dbf0717ae1772280a4b3e1
3
+ size 84564
naive_bayes_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78c7ff428289a9c05b5bdbcec751090bd51b85bd6c93bc6bb6f7eed3fe7137df
3
+ size 223063
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ scikit-learn
3
+ joblib
4
+ pandas
5
+ numpy
vscode.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pip install pandas
2
+