Spaces:
Sleeping
Sleeping
Added app files
Browse files- app.py +70 -0
- count_vectorizer.pkl +3 -0
- naive_bayes_model.pkl +3 -0
- requirements.txt +5 -0
- vscode.txt +2 -0
app.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import joblib
|
| 4 |
+
|
| 5 |
+
# Load your saved model and vectorizer
|
| 6 |
+
model = joblib.load('naive_bayes_model.pkl')
|
| 7 |
+
vectorizer = joblib.load('count_vectorizer.pkl')
|
| 8 |
+
|
| 9 |
+
# Streamlit App Title
|
| 10 |
+
st.title("Text Classification App")
|
| 11 |
+
st.write("This app classifies text into 'Spam' or 'Not Spam' categories. You can input text or upload a CSV file for bulk classification.")
|
| 12 |
+
|
| 13 |
+
# Section: Single Text Input
|
| 14 |
+
st.header("Single Text Classification")
|
| 15 |
+
user_input = st.text_area("Enter your text here:")
|
| 16 |
+
|
| 17 |
+
if st.button("Classify"):
|
| 18 |
+
if user_input.strip():
|
| 19 |
+
# Preprocess the input
|
| 20 |
+
input_vector = vectorizer.transform([user_input])
|
| 21 |
+
prediction = model.predict(input_vector)
|
| 22 |
+
confidence = model.predict_proba(input_vector).max() * 100
|
| 23 |
+
|
| 24 |
+
# Display results
|
| 25 |
+
st.write(f"Prediction: {'Spam' if prediction[0] == 1 else 'Not Spam'}")
|
| 26 |
+
st.write(f"Confidence Score: {confidence:.2f}%")
|
| 27 |
+
else:
|
| 28 |
+
st.warning("Please enter some text for classification.")
|
| 29 |
+
|
| 30 |
+
# Section: Bulk File Classification
|
| 31 |
+
st.header("Bulk Classification from CSV")
|
| 32 |
+
uploaded_file = st.file_uploader("Upload a CSV file for bulk classification", type=["csv"])
|
| 33 |
+
|
| 34 |
+
if uploaded_file:
|
| 35 |
+
try:
|
| 36 |
+
# Attempt to read the file with the correct encoding
|
| 37 |
+
df = pd.read_csv(uploaded_file, encoding='latin1') # Use 'latin1' encoding for non-UTF-8 files
|
| 38 |
+
if 'Text' in df.columns:
|
| 39 |
+
input_vectors = vectorizer.transform(df['Text'])
|
| 40 |
+
df['Prediction'] = model.predict(input_vectors)
|
| 41 |
+
df['Confidence Score'] = model.predict_proba(input_vectors).max(axis=1) * 100
|
| 42 |
+
|
| 43 |
+
# Display first few predictions
|
| 44 |
+
st.write("First few predictions:")
|
| 45 |
+
st.write(df.head())
|
| 46 |
+
|
| 47 |
+
# Visualization: Prediction distribution
|
| 48 |
+
st.subheader("Prediction Distribution")
|
| 49 |
+
st.bar_chart(df['Prediction'].value_counts())
|
| 50 |
+
|
| 51 |
+
# Confidence threshold filter
|
| 52 |
+
st.subheader("Filter Results by Confidence Score")
|
| 53 |
+
threshold = st.slider("Confidence Threshold", 0, 100, 50)
|
| 54 |
+
filtered_df = df[df['Confidence Score'] >= threshold]
|
| 55 |
+
st.write("Filtered Results:")
|
| 56 |
+
st.write(filtered_df)
|
| 57 |
+
|
| 58 |
+
# Download filtered predictions
|
| 59 |
+
st.subheader("Download Filtered Predictions")
|
| 60 |
+
csv = filtered_df.to_csv(index=False)
|
| 61 |
+
st.download_button(
|
| 62 |
+
label="Download Filtered Predictions",
|
| 63 |
+
data=csv,
|
| 64 |
+
file_name="filtered_predictions.csv",
|
| 65 |
+
mime="text/csv"
|
| 66 |
+
)
|
| 67 |
+
else:
|
| 68 |
+
st.error("The uploaded file must contain a 'text' column.")
|
| 69 |
+
except Exception as e:
|
| 70 |
+
st.error(f"An error occurred while processing the file: {e}")
|
count_vectorizer.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0a3641d27bf9a3dc8ac9ca3cea1a7019f934c47f9dbf0717ae1772280a4b3e1
|
| 3 |
+
size 84564
|
naive_bayes_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78c7ff428289a9c05b5bdbcec751090bd51b85bd6c93bc6bb6f7eed3fe7137df
|
| 3 |
+
size 223063
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
scikit-learn
|
| 3 |
+
joblib
|
| 4 |
+
pandas
|
| 5 |
+
numpy
|
vscode.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pip install pandas
|
| 2 |
+
|