Spaces:

Mpavan45
/

News_Classifier

Sleeping

App Files Files Community

Mpavan45 commited on Mar 19, 2025

Commit

d15065c

verified ·

1 Parent(s): e18387b

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -66

app.py CHANGED Viewed

@@ -10,73 +10,12 @@ import tensorflow as tf
 import keras
 from keras.utils import pad_sequences
 import pickle
 # Streamlit UI
 st.set_page_config(page_title="PressGuard", page_icon="🛡️")
-# Radium color effect for the title
-st.markdown("""
-    <style>
-    .radium {
-        font-size: 60px;
-        font-weight: bold;
-        color: #f4ff81;  /* Radium-like light greenish-yellow color */
-        text-shadow: 0 0 5px #f4ff81, 0 0 10px #f4ff81, 0 0 20px #f4ff81, 0 0 30px #f4ff81;
-        text-align: center;
-    }
-    .tagline {
-        font-size: 20px;
-        color: #ffffff;
-        text-align: center;
-        margin-bottom: 30px;
-    }
-    </style>
-    <div class='radium'>🛡️ PressGuard</div>
-    <div class='tagline'>Classify and Filter Trustworthy News</div>
-""", unsafe_allow_html=True)
-# Download necessary resources
-nltk.download('punkt')
-nltk.download('stopwords')
-nltk.download('wordnet')
-# Initialize stopwords and lemmatizer
-stop_words = set(stopwords.words('english')).union({"pm"})
-lemmatizer = WordNetLemmatizer()
-def pre_process(x):
-    x = x.lower()
-    x = re.sub("<.*?>", "", x)
-    x = re.sub("http[s]?://.+?\\S+", "", x)
-    x = re.sub("[@#].+?\\S", "", x)
-    x = re.sub(r"\\_+", " ", x)
-    x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
-    x = emoji.demojize(x)
-    x = re.sub(":.*?:", "", x)
-    x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
-    words = word_tokenize(x)
-    words = [word for word in words if word not in stop_words]
-    x = " ".join([lemmatizer.lemmatize(word) for word in words])
-    return x
-@st.cache_resource
-def load_model():
-    model = keras.models.load_model("model_m3_new.keras")
-    vectorizer = keras.models.load_model("vec_text_m3_new.keras")
-    with open("label_encoder_m5.pkl", 'rb') as file:
-        label_encoder = pickle.load(file)
-    return model, vectorizer, label_encoder
-model, vectorizer, label_encoder = load_model()
-def predict_category(text):
-    processed_text = [pre_process(text)]
-    text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=128)
-    prediction = model.predict(text_vectorized)
-    category_idx = np.argmax(prediction, axis=1)[0]
-    return label_encoder.inverse_transform([category_idx])[0]
-# Custom CSS with Radium Color Effect for the Prompt
 st.markdown(
     """
     <style>
@@ -86,9 +25,11 @@ st.markdown(
             background-repeat: no-repeat;
             background-attachment: fixed;
         }
         .centered-container {
             text-align: center;
         }
         .title {
             font-size: 60px;
             font-weight: bold;
@@ -104,7 +45,6 @@ st.markdown(
             animation: elegantFadeSlide 1.5s ease-out forwards;
         }
-        /* Radium Effect for the Prompt */
         .prompt-box {
             font-size: 22px;
             font-weight: bold;
@@ -177,11 +117,71 @@ st.markdown(
     """,
     unsafe_allow_html=True
 )
-# Option 3: Using single string (Best for simplicity)
 st.markdown("<div class='centered-container'><h1 class='title'>PressGuard</h1></div>", unsafe_allow_html=True)
 st.markdown("<div class='prompt-box'>Paste the article content below to analyze its category with Newsense AI</div>", unsafe_allow_html=True)
-# User input
 input_text = st.text_area("Enter News Article:", height=200)
 if st.button("Analyze", key="analyze-btn", help="Click to classify the news article"):

 import keras
 from keras.utils import pad_sequences
 import pickle
+import os
 # Streamlit UI
 st.set_page_config(page_title="PressGuard", page_icon="🛡️")
+# Background Image and Enhanced Styling
 st.markdown(
     """
     <style>
             background-repeat: no-repeat;
             background-attachment: fixed;
         }
         .centered-container {
             text-align: center;
         }
         .title {
             font-size: 60px;
             font-weight: bold;
             animation: elegantFadeSlide 1.5s ease-out forwards;
         }
         .prompt-box {
             font-size: 22px;
             font-weight: bold;
     """,
     unsafe_allow_html=True
 )
+# Title and Prompt
 st.markdown("<div class='centered-container'><h1 class='title'>PressGuard</h1></div>", unsafe_allow_html=True)
 st.markdown("<div class='prompt-box'>Paste the article content below to analyze its category with Newsense AI</div>", unsafe_allow_html=True)
+# Check if NLTK resources are already downloaded
+nltk_data_path = os.path.expanduser('~/nltk_data')
+if not os.path.exists(nltk_data_path):
+    os.makedirs(nltk_data_path)
+try:
+    nltk.data.find('tokenizers/punkt')
+except LookupError:
+    nltk.download('punkt', download_dir=nltk_data_path)
+try:
+    nltk.data.find('corpora/stopwords')
+except LookupError:
+    nltk.download('stopwords', download_dir=nltk_data_path)
+try:
+    nltk.data.find('corpora/wordnet')
+except LookupError:
+    nltk.download('wordnet', download_dir=nltk_data_path)
+# Initialize stopwords and lemmatizer
+stop_words = set(stopwords.words('english')).union({"pm"})
+lemmatizer = WordNetLemmatizer()
+# Preprocessing Function
+def pre_process(x):
+    x = x.lower()
+    x = re.sub("<.*?>", "", x)
+    x = re.sub("http[s]?://.+?\\S+", "", x)
+    x = re.sub("[@#].+?\\S", "", x)
+    x = re.sub(r"\\_+", " ", x)
+    x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
+    x = emoji.demojize(x)
+    x = re.sub(":.*?:", "", x)
+    x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
+    words = word_tokenize(x)
+    words = [word for word in words if word not in stop_words]
+    x = " ".join([lemmatizer.lemmatize(word) for word in words])
+    return x
+# Load Model
+@st.cache_resource
+def load_model():
+    model = keras.models.load_model("model_m3_new.keras")
+    vectorizer = keras.models.load_model("vec_text_m3_new.keras")
+    with open("label_encoder_m5.pkl", 'rb') as file:
+        label_encoder = pickle.load(file)
+    return model, vectorizer, label_encoder
+model, vectorizer, label_encoder = load_model()
+# Prediction Function
+def predict_category(text):
+    processed_text = [pre_process(text)]
+    text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=128)
+    prediction = model.predict(text_vectorized)
+    category_idx = np.argmax(prediction, axis=1)[0]
+    return label_encoder.inverse_transform([category_idx])[0]
+# User Input
 input_text = st.text_area("Enter News Article:", height=200)
 if st.button("Analyze", key="analyze-btn", help="Click to classify the news article"):