Adityaganesh's picture
Update app.py
294f97f verified
raw
history blame
2.1 kB
import re
import emoji
import nltk
import numpy as np
import streamlit as st
import pickle # To load the tokenizer
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Ensure necessary downloads
nltk.download("punkt")
nltk.download("wordnet")
nltk.download("omw-1.4")
nltk.download("averaged_perceptron_tagger")
lemmatizer = WordNetLemmatizer()
# Function to preprocess text
def pre_process(x):
x = x.lower()
x = re.sub("<.*?>", "", x)
x = re.sub("http[s]?://.+?\\S+", "", x)
x = re.sub("[@#].+?\\S", "", x)
x = re.sub(r"\\_+", " ", x)
x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
x = emoji.demojize(x)
x = re.sub(":.*?:", "", x)
x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
words = word_tokenize(x)
x = " ".join([lemmatizer.lemmatize(word) for word in words])
return x
# Load trained model
model = load_model("best_rnn_model.h5")
# Load the same tokenizer used during training
with open("tokenizer.pickle", "rb") as handle:
tokenizer = pickle.load(handle)
# Maximum length (must match training settings)
MAX_LENGTH = 100
# Class labels
class_labels = ['Sports', 'Business', 'SciTech', 'World']
# Function to predict category
def predict_category(text):
processed_text = pre_process(text)
seq = tokenizer.texts_to_sequences([processed_text])
padded_seq = pad_sequences(seq, maxlen=MAX_LENGTH, padding='post')
prediction = model.predict(padded_seq)
predicted_label = class_labels[np.argmax(prediction)]
return predicted_label
# Streamlit UI
st.title("πŸ“° News Category Classifier")
st.write("Enter a news headline or article snippet, and the model will predict its category.")
user_input = st.text_area("✍ Enter text here:")
if st.button("πŸ” Predict"):
if user_input.strip():
prediction = predict_category(user_input)
st.success(f"πŸ“Œ Predicted Category: **{prediction}**")
else:
st.warning("⚠️ Please enter some text to classify.")