Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import joblib | |
| import re | |
| from nltk.tokenize import word_tokenize | |
| from nltk.corpus import stopwords | |
| import nltk | |
| # Ensure required NLTK data is available | |
| nltk.download('stopwords') | |
| nltk.download('punkt') | |
| # Load the dataset and model | |
| df = pd.read_csv("./bbc_data.csv") | |
| model = joblib.load('model.pkl') # Load your pre-trained model | |
| vectorizer = joblib.load('vectorizer.pkl') # Load pre-trained vectorizer | |
| X = df['data'] | |
| y = df['labels'] | |
| # Preprocessing function | |
| def preprocess_text(text): | |
| text = re.sub(r'[^\w\s]', '', text.lower()) # Remove punctuation | |
| tokens = word_tokenize(text) # Tokenize the text | |
| stop_words = set(stopwords.words('english')) # Load stopwords | |
| tokens = [word for word in tokens if word not in stop_words] # Remove stopwords | |
| return ' '.join(tokens) | |
| # Title of the app | |
| st.title('News Classification App') | |
| # User input | |
| user_input = st.text_area('Enter a headline') | |
| if st.button('Classify'): | |
| if user_input: | |
| # Preprocess the input text | |
| preprocessed_input = preprocess_text(user_input) | |
| # Convert preprocessed text to numerical data using the loaded vectorizer | |
| input_vector = vectorizer.transform([preprocessed_input]) | |
| # Make prediction | |
| prediction = model.predict(input_vector) | |
| # Display the result | |
| st.write(f'Predicted Category: {prediction[0]}') | |
| else: | |
| st.write('Please enter a headline') | |