import streamlit as st import pandas as pd import joblib import re from nltk.tokenize import word_tokenize from nltk.corpus import stopwords import nltk # Ensure required NLTK data is available nltk.download('stopwords') nltk.download('punkt') # Load the dataset and model df = pd.read_csv("./bbc_data.csv") model = joblib.load('model.pkl') # Load your pre-trained model vectorizer = joblib.load('vectorizer.pkl') # Load pre-trained vectorizer X = df['data'] y = df['labels'] # Preprocessing function def preprocess_text(text): text = re.sub(r'[^\w\s]', '', text.lower()) # Remove punctuation tokens = word_tokenize(text) # Tokenize the text stop_words = set(stopwords.words('english')) # Load stopwords tokens = [word for word in tokens if word not in stop_words] # Remove stopwords return ' '.join(tokens) # Title of the app st.title('News Classification App') # User input user_input = st.text_area('Enter a headline') if st.button('Classify'): if user_input: # Preprocess the input text preprocessed_input = preprocess_text(user_input) # Convert preprocessed text to numerical data using the loaded vectorizer input_vector = vectorizer.transform([preprocessed_input]) # Make prediction prediction = model.predict(input_vector) # Display the result st.write(f'Predicted Category: {prediction[0]}') else: st.write('Please enter a headline')