Adityaganesh's picture
Update app.py
1df2517 verified
raw
history blame
1.93 kB
import streamlit as st
import pickle
import tensorflow as tf
import numpy as np
import re
import emoji
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
# Ensure necessary downloads
nltk.download("punkt")
nltk.download("wordnet")
nltk.download('stopwords')
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
def pre_process(text):
text = text.lower()
text = re.sub("<.*?>", "", text)
text = re.sub("http[s]?://\\S+", "", text)
text = re.sub("[@#]\\S+", "", text)
text = re.sub(r"\\_+", " ", text)
text = re.sub("^[A-Za-z.].*\\s-\\s", "", text)
text = emoji.demojize(text)
text = re.sub(":.*?:", "", text)
text = re.sub("[^a-zA-Z0-9\\s_]", "", text)
words = word_tokenize(text)
words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
return " ".join(words)
# Load the label encoder
with open("label_encoder.pkl", "rb") as f:
label_encoder = pickle.load(f)
# Load the text vectorization model
text_vectorizer = tf.keras.models.load_model("news_tv_model.keras")
# Load the news classification model
news_model = tf.keras.models.load_model("news_model.keras")
def predict_category(text):
processed_text = [pre_process(text)]
vectorized_text = text_vectorizer(processed_text)
prediction = news_model.predict(vectorized_text)
predicted_label_index = np.argmax(prediction, axis=1)[0]
return label_encoder.inverse_transform([predicted_label_index])[0]
# Streamlit UI
st.title("News Classification App")
# User input
user_text = st.text_area("Enter your news content for classification.")
if st.button("Predict Category"):
if user_text.strip():
category = predict_category(user_text)
st.success(f"Predicted Category: {category}")
else:
st.warning("Please enter some text to classify.")