Update app.py
Browse files
app.py
CHANGED
|
@@ -11,21 +11,20 @@ from nltk.corpus import stopwords
|
|
| 11 |
from nltk.stem import WordNetLemmatizer
|
| 12 |
|
| 13 |
# Download necessary resources
|
| 14 |
-
nltk.download('
|
| 15 |
nltk.download('stopwords')
|
| 16 |
nltk.download('wordnet')
|
| 17 |
|
| 18 |
-
import tensorflow
|
| 19 |
import keras
|
| 20 |
-
from keras.
|
| 21 |
|
| 22 |
import pickle
|
| 23 |
|
| 24 |
# Set Streamlit page configuration
|
| 25 |
st.set_page_config(page_title="News Category Classifier", page_icon="π°", layout="centered")
|
| 26 |
|
| 27 |
-
|
| 28 |
-
# Function to set background image
|
| 29 |
def set_background(image_path):
|
| 30 |
if not os.path.exists(image_path):
|
| 31 |
st.error(f"β Background image not found: {image_path}")
|
|
@@ -36,12 +35,9 @@ def set_background(image_path):
|
|
| 36 |
|
| 37 |
bg_image_style = f"""
|
| 38 |
<style>
|
| 39 |
-
|
| 40 |
-
background
|
| 41 |
background-size: cover;
|
| 42 |
-
background-repeat: no-repeat;
|
| 43 |
-
background-position: center;
|
| 44 |
-
background-attachment: fixed;
|
| 45 |
}}
|
| 46 |
</style>
|
| 47 |
"""
|
|
@@ -55,23 +51,21 @@ set_background("Images/News image.jpg")
|
|
| 55 |
stop_words = set(stopwords.words('english')).union({"pm"})
|
| 56 |
lemmatizer = WordNetLemmatizer()
|
| 57 |
|
| 58 |
-
|
| 59 |
# Preprocessing function
|
| 60 |
def pre_process(x):
|
| 61 |
x = x.lower()
|
| 62 |
x = re.sub("<.*?>", "", x) # Remove HTML tags
|
| 63 |
-
x = re.sub("http[s]?://\S+", "", x) # Remove URLs
|
| 64 |
-
x = re.sub("[@#]\S+", "", x) # Remove mentions and hashtags
|
| 65 |
x = re.sub(r"\_+", " ", x) # Replace underscores with space
|
| 66 |
x = emoji.demojize(x) # Convert emojis to text
|
| 67 |
-
x = re.sub(":.*?:", "", x) # Remove emoji text
|
| 68 |
-
x = re.sub("[^a-zA-Z0-9\s_]", "", x) # Remove special characters
|
| 69 |
words = word_tokenize(x)
|
| 70 |
words = [word for word in words if word not in stop_words]
|
| 71 |
x = " ".join([lemmatizer.lemmatize(word) for word in words])
|
| 72 |
return x
|
| 73 |
|
| 74 |
-
|
| 75 |
# Cache model loading to improve performance
|
| 76 |
@st.cache_resource
|
| 77 |
def load_model():
|
|
@@ -87,11 +81,9 @@ def load_model():
|
|
| 87 |
|
| 88 |
return model, vectorizer, label_encoder
|
| 89 |
|
| 90 |
-
|
| 91 |
# Load the models
|
| 92 |
model, vectorizer, label_encoder = load_model()
|
| 93 |
|
| 94 |
-
|
| 95 |
# Prediction function
|
| 96 |
def predict_category(text):
|
| 97 |
processed_text = [pre_process(text)]
|
|
@@ -100,7 +92,6 @@ def predict_category(text):
|
|
| 100 |
category_idx = np.argmax(prediction, axis=1)[0]
|
| 101 |
return label_encoder.inverse_transform([category_idx])[0]
|
| 102 |
|
| 103 |
-
|
| 104 |
# Streamlit UI
|
| 105 |
st.markdown(
|
| 106 |
"""
|
|
|
|
| 11 |
from nltk.stem import WordNetLemmatizer
|
| 12 |
|
| 13 |
# Download necessary resources
|
| 14 |
+
nltk.download('punkt')
|
| 15 |
nltk.download('stopwords')
|
| 16 |
nltk.download('wordnet')
|
| 17 |
|
| 18 |
+
import tensorflow as tf
|
| 19 |
import keras
|
| 20 |
+
from keras.preprocessing.sequence import pad_sequences
|
| 21 |
|
| 22 |
import pickle
|
| 23 |
|
| 24 |
# Set Streamlit page configuration
|
| 25 |
st.set_page_config(page_title="News Category Classifier", page_icon="π°", layout="centered")
|
| 26 |
|
| 27 |
+
# Function to set background image correctly
|
|
|
|
| 28 |
def set_background(image_path):
|
| 29 |
if not os.path.exists(image_path):
|
| 30 |
st.error(f"β Background image not found: {image_path}")
|
|
|
|
| 35 |
|
| 36 |
bg_image_style = f"""
|
| 37 |
<style>
|
| 38 |
+
.stApp {{
|
| 39 |
+
background: url("data:image/jpg;base64,{encoded_img}") no-repeat center center fixed;
|
| 40 |
background-size: cover;
|
|
|
|
|
|
|
|
|
|
| 41 |
}}
|
| 42 |
</style>
|
| 43 |
"""
|
|
|
|
| 51 |
stop_words = set(stopwords.words('english')).union({"pm"})
|
| 52 |
lemmatizer = WordNetLemmatizer()
|
| 53 |
|
|
|
|
| 54 |
# Preprocessing function
|
| 55 |
def pre_process(x):
|
| 56 |
x = x.lower()
|
| 57 |
x = re.sub("<.*?>", "", x) # Remove HTML tags
|
| 58 |
+
x = re.sub(r"http[s]?://\S+", "", x) # Remove URLs
|
| 59 |
+
x = re.sub(r"[@#]\S+", "", x) # Remove mentions and hashtags
|
| 60 |
x = re.sub(r"\_+", " ", x) # Replace underscores with space
|
| 61 |
x = emoji.demojize(x) # Convert emojis to text
|
| 62 |
+
x = re.sub(r":.*?:", "", x) # Remove emoji text
|
| 63 |
+
x = re.sub(r"[^a-zA-Z0-9\s_]", "", x) # Remove special characters
|
| 64 |
words = word_tokenize(x)
|
| 65 |
words = [word for word in words if word not in stop_words]
|
| 66 |
x = " ".join([lemmatizer.lemmatize(word) for word in words])
|
| 67 |
return x
|
| 68 |
|
|
|
|
| 69 |
# Cache model loading to improve performance
|
| 70 |
@st.cache_resource
|
| 71 |
def load_model():
|
|
|
|
| 81 |
|
| 82 |
return model, vectorizer, label_encoder
|
| 83 |
|
|
|
|
| 84 |
# Load the models
|
| 85 |
model, vectorizer, label_encoder = load_model()
|
| 86 |
|
|
|
|
| 87 |
# Prediction function
|
| 88 |
def predict_category(text):
|
| 89 |
processed_text = [pre_process(text)]
|
|
|
|
| 92 |
category_idx = np.argmax(prediction, axis=1)[0]
|
| 93 |
return label_encoder.inverse_transform([category_idx])[0]
|
| 94 |
|
|
|
|
| 95 |
# Streamlit UI
|
| 96 |
st.markdown(
|
| 97 |
"""
|