Adityaganesh commited on
Commit
4b40003
·
verified ·
1 Parent(s): cc31e38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -48
app.py CHANGED
@@ -1,78 +1,155 @@
1
  import streamlit as st
2
- import pickle
3
- import tensorflow as tf
4
  import numpy as np
5
  import re
6
  import emoji
 
7
  import nltk
8
  from nltk.tokenize import word_tokenize
 
9
  from nltk.stem import WordNetLemmatizer
10
- from nltk.corpus import stopwords
11
- from tensorflow.keras.preprocessing.sequence import pad_sequences
12
-
13
- # Ensure necessary downloads
14
  nltk.download('punkt')
15
  nltk.download('stopwords')
16
  nltk.download('wordnet')
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  lemmatizer = WordNetLemmatizer()
19
- stop_words = set(stopwords.words('english'))
20
-
21
- @st.cache_data
22
- def pre_process(text):
23
- text = text.lower()
24
- text = re.sub("<.*?>", "", text)
25
- text = re.sub("http[s]?://\\S+", "", text)
26
- text = re.sub("[@#]\\S+", "", text)
27
- text = re.sub(r"\\_+", " ", text)
28
- text = re.sub("^[A-Za-z.].*\\s-\\s", "", text)
29
- text = emoji.demojize(text)
30
- text = re.sub(":.*?:", "", text)
31
- text = re.sub("[^a-zA-Z0-9\\s_]", "", text)
32
- words = word_tokenize(text)
33
- words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
34
- return " ".join(words)
35
 
36
  @st.cache_resource
37
  def load_model():
38
- model = tf.keras.models.load_model("news_model.keras")
39
- vectorizer = tf.keras.models.load_model("news_tv_model.keras")
40
- with open("label_encoder.pkl", 'rb') as file:
 
 
 
 
41
  label_encoder = pickle.load(file)
42
  return model, vectorizer, label_encoder
43
 
44
- # Load models
45
  model, vectorizer, label_encoder = load_model()
46
 
47
  def predict_category(text):
48
  processed_text = [pre_process(text)]
49
- text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=128)
50
  prediction = model.predict(text_vectorized)
51
  category_idx = np.argmax(prediction, axis=1)[0]
52
  return label_encoder.inverse_transform([category_idx])[0]
53
 
54
- # ✅ Set Background Image
55
- image_path = "News image 2.png" # Ensure this image is inside the app directory
56
-
57
- page_bg_img = f"""
58
- <style>
59
- .stApp {{
60
- background: url('{image_path}') no-repeat center center fixed;
61
- background-size: cover;
62
- }}
63
- </style>
64
- """
65
-
66
- st.markdown(page_bg_img, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- st.title("News Classification App")
 
69
 
70
- # User input
71
- user_text = st.text_area("Enter your news content for classification.")
72
 
73
- if st.button("Predict Category"):
74
- if user_text.strip():
75
- category = predict_category(user_text)
76
- st.success(f"Predicted Category: {category}")
77
  else:
78
- st.warning("Please enter some text to classify.")
 
1
  import streamlit as st
 
 
2
  import numpy as np
3
  import re
4
  import emoji
5
+
6
  import nltk
7
  from nltk.tokenize import word_tokenize
8
+ from nltk.corpus import stopwords
9
  from nltk.stem import WordNetLemmatizer
10
+ # Download necessary resources
 
 
 
11
  nltk.download('punkt')
12
  nltk.download('stopwords')
13
  nltk.download('wordnet')
14
 
15
+ import tensorflow
16
+ import keras
17
+ from keras.utils import pad_sequences
18
+
19
+ import pickle
20
+
21
+ # Streamlit UI
22
+ st.set_page_config(page_title="News Category Classifier", page_icon="📰", layout="centered")
23
+
24
+ def set_background(image_path):
25
+ bg_image_style = f"""
26
+ <style>
27
+ .stApp {{
28
+ background: url('{image_path}') no-repeat center center fixed;
29
+ background-size: cover;
30
+ }}
31
+ </style>
32
+ """
33
+ st.markdown(bg_image_style, unsafe_allow_html=True)
34
+
35
+ # Call this function with the path to your image
36
+ set_background("News image 2.png") # Ensure the image is in the same directory
37
+
38
+ # Initialize stopwords and lemmatizer
39
+ stop_words = set(stopwords.words('english')).union({"pm"})
40
  lemmatizer = WordNetLemmatizer()
41
+
42
+ def pre_process(x):
43
+ x = x.lower()
44
+ x = re.sub("<.*?>", "", x)
45
+ x = re.sub("http[s]?://.+?\\S+", "", x)
46
+ x = re.sub("[@#].+?\\S", "", x)
47
+ x = re.sub(r"\\_+", " ", x)
48
+ x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
49
+ x = emoji.demojize(x)
50
+ x = re.sub(":.*?:", "", x)
51
+ x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
52
+ words = word_tokenize(x)
53
+ words = [word for word in words if word not in stop_words]
54
+ x = " ".join([lemmatizer.lemmatize(word) for word in words])
55
+ return x
 
56
 
57
  @st.cache_resource
58
  def load_model():
59
+ model_path = "news_model.keras"
60
+ vectorizer_path = "news_tv_model.keras"
61
+ label_encoder_path = "label_encoder.pkl"
62
+
63
+ model = keras.models.load_model(model_path)
64
+ vectorizer = keras.models.load_model(vectorizer_path)
65
+ with open(label_encoder_path, 'rb') as file:
66
  label_encoder = pickle.load(file)
67
  return model, vectorizer, label_encoder
68
 
 
69
  model, vectorizer, label_encoder = load_model()
70
 
71
  def predict_category(text):
72
  processed_text = [pre_process(text)]
73
+ text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82)
74
  prediction = model.predict(text_vectorized)
75
  category_idx = np.argmax(prediction, axis=1)[0]
76
  return label_encoder.inverse_transform([category_idx])[0]
77
 
78
+ # UI
79
+ st.markdown(
80
+ """
81
+ <style>
82
+ .title {
83
+ color: #ffffff;
84
+ font-size: 2.4em;
85
+ text-align: center;
86
+ font-weight: 700;
87
+ text-transform: uppercase;
88
+ text-shadow: 2px 2px 8px rgba(0, 0, 0, 1.0);
89
+ padding: 10px;
90
+ }
91
+ .subtitle {
92
+ color: #ffff;
93
+ font-size: 1.3em;
94
+ text-align: center;
95
+ font-weight: 600;
96
+ text-shadow: 1px 1px 6px rgba(0, 0, 0, 1.0);
97
+ padding: 5px;
98
+ }
99
+ .classify-button {
100
+ background-color: #3498db;
101
+ color: white;
102
+ font-size: 1.2em;
103
+ padding: 12px 24px;
104
+ border: none;
105
+ border-radius: 8px;
106
+ cursor: pointer;
107
+ display: block;
108
+ margin: 20px auto;
109
+ transition: 0.3s;
110
+ }
111
+ .classify-button:hover {
112
+ background-color: #2980b9;
113
+ }
114
+ .result-box {
115
+ background: linear-gradient(135deg, #6284FF 30%, #FF0000 70%);
116
+ padding: 20px;
117
+ border-radius: 10px;
118
+ text-align: center;
119
+ margin-top: 30px;
120
+ position: relative;
121
+ overflow: hidden;
122
+ border: 2px solid transparent;
123
+ background-clip: padding-box, border-box;
124
+ border-image: linear-gradient(135deg, #6284FF 30%, #FF0000 70%);
125
+ border-image-slice: 0;
126
+ transition: transform 0.3s ease-in-out, box-shadow 0.3s ease-in-out;
127
+ }
128
+ .result-box:hover {
129
+ transform: scale(1.05);
130
+ box-shadow: 0px 10px 30px rgba(98, 132, 255, 0.8),
131
+ 0px 10px 30px rgba(255, 0, 0, 0.8);
132
+ }
133
+ .result-text {
134
+ font-size: 1.8em;
135
+ color: #ffffff;
136
+ font-weight: 900;
137
+ text-shadow: 3px 3px 10px rgba(0, 0, 0, 0.5);
138
+ animation: fadeIn 0.8s ease-in-out;
139
+ }
140
+ </style>
141
+ """,
142
+ unsafe_allow_html=True
143
+ )
144
 
145
+ st.markdown("<div class='title'>📰 News Classifier</div>", unsafe_allow_html=True)
146
+ st.markdown("<div class='subtitle'>Enter a news headline or article snippet to analyze its category.</div>", unsafe_allow_html=True)
147
 
148
+ user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...")
 
149
 
150
+ if st.button("Analyze 🏷️"):
151
+ if user_input.strip():
152
+ category = predict_category(user_input)
153
+ st.markdown(f"<div class='result-box'><span class='result-text'>🗂️ Predicted Category: <strong>{category}</strong></span></div>", unsafe_allow_html=True)
154
  else:
155
+ st.warning("⚠️ Please enter some text to analyze.")