import streamlit as st import pandas as pd import joblib # Load your saved model and vectorizer model = joblib.load('naive_bayes_model.pkl') vectorizer = joblib.load('count_vectorizer.pkl') # Streamlit App Title st.title("Text Classification App") st.write("This app classifies text into 'Spam' or 'Not Spam' categories. You can input text or upload a CSV file for bulk classification.") # Section: Single Text Input st.header("Single Text Classification") user_input = st.text_area("Enter your text here:") if st.button("Classify"): if user_input.strip(): # Preprocess the input input_vector = vectorizer.transform([user_input]) prediction = model.predict(input_vector) confidence = model.predict_proba(input_vector).max() * 100 # Display results st.write(f"Prediction: {'Spam' if prediction[0] == 1 else 'Not Spam'}") st.write(f"Confidence Score: {confidence:.2f}%") else: st.warning("Please enter some text for classification.") # Section: Bulk File Classification st.header("Bulk Classification from CSV") uploaded_file = st.file_uploader("Upload a CSV file for bulk classification", type=["csv"]) if uploaded_file: try: # Attempt to read the file with the correct encoding df = pd.read_csv(uploaded_file, encoding='latin1') # Use 'latin1' encoding for non-UTF-8 files if 'Text' in df.columns: input_vectors = vectorizer.transform(df['Text']) df['Prediction'] = model.predict(input_vectors) df['Confidence Score'] = model.predict_proba(input_vectors).max(axis=1) * 100 # Display first few predictions st.write("First few predictions:") st.write(df.head()) # Visualization: Prediction distribution st.subheader("Prediction Distribution") st.bar_chart(df['Prediction'].value_counts()) # Confidence threshold filter st.subheader("Filter Results by Confidence Score") threshold = st.slider("Confidence Threshold", 0, 100, 50) filtered_df = df[df['Confidence Score'] >= threshold] st.write("Filtered Results:") st.write(filtered_df) # Download filtered predictions st.subheader("Download Filtered Predictions") csv = filtered_df.to_csv(index=False) st.download_button( label="Download Filtered Predictions", data=csv, file_name="filtered_predictions.csv", mime="text/csv" ) else: st.error("The uploaded file must contain a 'text' column.") except Exception as e: st.error(f"An error occurred while processing the file: {e}")