| import re |
| import emoji |
| import nltk |
| import numpy as np |
| import streamlit as st |
| import pickle |
|
|
| from nltk.tokenize import word_tokenize |
| from nltk.stem import WordNetLemmatizer |
| from tensorflow.keras.models import load_model |
| from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
|
| |
| nltk.download("punkt") |
| nltk.download("wordnet") |
| nltk.download("omw-1.4") |
| nltk.download("averaged_perceptron_tagger") |
|
|
| lemmatizer = WordNetLemmatizer() |
|
|
| |
| def pre_process(x): |
| x = x.lower() |
| x = re.sub("<.*?>", "", x) |
| x = re.sub("http[s]?://.+?\\S+", "", x) |
| x = re.sub("[@#].+?\\S", "", x) |
| x = re.sub(r"\\_+", " ", x) |
| x = re.sub("^[A-Za-z.].*\\s-\\s", "", x) |
| x = emoji.demojize(x) |
| x = re.sub(":.*?:", "", x) |
| x = re.sub("[^a-zA-Z0-9\\s_]", "", x) |
| words = word_tokenize(x) |
| x = " ".join([lemmatizer.lemmatize(word) for word in words]) |
| return x |
|
|
| |
| model = load_model("best_rnn_model.h5") |
|
|
| |
| with open("tokenizer.pickle", "rb") as handle: |
| tokenizer = pickle.load(handle) |
|
|
| |
| MAX_LENGTH = 100 |
|
|
| |
| class_labels = ['Sports', 'Business', 'SciTech', 'World'] |
|
|
| |
| def predict_category(text): |
| processed_text = pre_process(text) |
| seq = tokenizer.texts_to_sequences([processed_text]) |
| padded_seq = pad_sequences(seq, maxlen=MAX_LENGTH, padding='post') |
| prediction = model.predict(padded_seq) |
| predicted_label = class_labels[np.argmax(prediction)] |
| return predicted_label |
|
|
| |
| st.title("π° News Category Classifier") |
| st.write("Enter a news headline or article snippet, and the model will predict its category.") |
|
|
| user_input = st.text_area("β Enter text here:") |
|
|
| if st.button("π Predict"): |
| if user_input.strip(): |
| prediction = predict_category(user_input) |
| st.success(f"π Predicted Category: **{prediction}**") |
| else: |
| st.warning("β οΈ Please enter some text to classify.") |
|
|