|
|
import streamlit as st |
|
|
import pickle |
|
|
import tensorflow as tf |
|
|
import numpy as np |
|
|
import re |
|
|
import emoji |
|
|
import nltk |
|
|
from nltk.tokenize import word_tokenize |
|
|
from nltk.stem import WordNetLemmatizer |
|
|
from nltk.corpus import stopwords |
|
|
|
|
|
|
|
|
nltk.download("punkt") |
|
|
nltk.download("wordnet") |
|
|
nltk.download('stopwords') |
|
|
|
|
|
lemmatizer = WordNetLemmatizer() |
|
|
stop_words = set(stopwords.words('english')) |
|
|
|
|
|
def pre_process(text): |
|
|
text = text.lower() |
|
|
text = re.sub("<.*?>", "", text) |
|
|
text = re.sub("http[s]?://\\S+", "", text) |
|
|
text = re.sub("[@#]\\S+", "", text) |
|
|
text = re.sub(r"\\_+", " ", text) |
|
|
text = re.sub("^[A-Za-z.].*\\s-\\s", "", text) |
|
|
text = emoji.demojize(text) |
|
|
text = re.sub(":.*?:", "", text) |
|
|
text = re.sub("[^a-zA-Z0-9\\s_]", "", text) |
|
|
words = word_tokenize(text) |
|
|
words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words] |
|
|
return " ".join(words) |
|
|
|
|
|
|
|
|
with open("label_encoder.pkl", "rb") as f: |
|
|
label_encoder = pickle.load(f) |
|
|
|
|
|
|
|
|
text_vectorizer = tf.keras.models.load_model("news_tv_model.keras") |
|
|
|
|
|
|
|
|
news_model = tf.keras.models.load_model("news_model.keras") |
|
|
|
|
|
def predict_category(text): |
|
|
processed_text = [pre_process(text)] |
|
|
vectorized_text = text_vectorizer(processed_text) |
|
|
prediction = news_model.predict(vectorized_text) |
|
|
predicted_label_index = np.argmax(prediction, axis=1)[0] |
|
|
return label_encoder.inverse_transform([predicted_label_index])[0] |
|
|
|
|
|
|
|
|
st.title("News Classification App") |
|
|
|
|
|
|
|
|
user_text = st.text_area("Enter your news content for classification.") |
|
|
|
|
|
if st.button("Predict Category"): |
|
|
if user_text.strip(): |
|
|
category = predict_category(user_text) |
|
|
st.success(f"Predicted Category: {category}") |
|
|
else: |
|
|
st.warning("Please enter some text to classify.") |
|
|
|