Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| import re | |
| import emoji | |
| import nltk | |
| from nltk.tokenize import word_tokenize | |
| from nltk.corpus import stopwords | |
| from nltk.stem import WordNetLemmatizer | |
| import tensorflow as tf | |
| import keras | |
| from keras.utils import pad_sequences | |
| import pickle | |
| import os | |
| # Streamlit UI | |
| st.set_page_config(page_title="PressGuard", page_icon="🛡️") | |
| # Radium color effect for the title | |
| st.markdown(""" | |
| <style> | |
| .radium { | |
| font-size: 60px; | |
| font-weight: bold; | |
| color: #f4ff81; /* Radium-like light greenish-yellow color */ | |
| text-shadow: 0 0 5px #f4ff81, 0 0 10px #f4ff81, 0 0 20px #f4ff81, 0 0 30px #ccff66; | |
| text-align: center; | |
| } | |
| .tagline { | |
| font-size: 20px; | |
| color: #ffffff; | |
| text-align: center; | |
| margin-bottom: 30px; | |
| } | |
| </style> | |
| <div class='radium'>🛡️ PressGuard</div> | |
| <div class='tagline'>Classify and Filter Trustworthy News</div> | |
| """, unsafe_allow_html=True) | |
| st.markdown( | |
| """ | |
| <style> | |
| body { | |
| background-image: url('https://cdn-uploads.huggingface.co/production/uploads/675fab3a2d0851e23d23cad3/ut9wBSlRR2CCpw95V5ej8.jpeg'); | |
| background-size: cover; | |
| background-repeat: no-repeat; | |
| background-attachment: fixed; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # Background Image and Enhanced Styling | |
| st.markdown( | |
| """ | |
| <style> | |
| body { | |
| background-image: url('https://cdn-uploads.huggingface.co/production/uploads/675fab3a2d0851e23d23cad3/ut9wBSlRR2CCpw95V5ej8.jpeg'); | |
| background-size: cover; | |
| background-repeat: no-repeat; | |
| background-attachment: fixed; | |
| } | |
| .centered-container { | |
| text-align: center; | |
| } | |
| .title { | |
| font-size: 60px; | |
| font-weight: bold; | |
| color: white; | |
| background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000); | |
| padding: 20px; | |
| border-radius: 20px; | |
| box-shadow: 0 8px 25px rgba(136, 14, 79, 0.5), | |
| 0 4px 15px rgba(74, 35, 90, 0.6), | |
| inset 0 2px 10px rgba(49, 27, 146, 0.4); | |
| display: inline-block; | |
| margin-bottom: 20px; | |
| animation: elegantFadeSlide 1.5s ease-out forwards; | |
| } | |
| .prompt-box { | |
| font-size: 22px; | |
| font-weight: bold; | |
| color: white; | |
| text-align: center; | |
| background: linear-gradient(135deg, #33ccff, #ff99cc, #33ff99, #ffcc00); | |
| background-size: 400% 400%; | |
| animation: gradientAnimation 8s ease infinite; | |
| padding: 15px; | |
| border-radius: 15px; | |
| box-shadow: 0 0 15px rgba(255, 255, 255, 0.7), | |
| 0 0 25px rgba(136, 14, 79, 0.7), | |
| 0 0 35px rgba(49, 27, 146, 0.7); | |
| transition: all 0.4s ease-in-out; | |
| } | |
| .prompt-box:hover { | |
| transform: scale(1.05) rotate(1deg); | |
| box-shadow: 0 0 25px rgba(255, 255, 255, 0.9), | |
| 0 0 35px rgba(136, 14, 79, 0.9), | |
| 0 0 45px rgba(49, 27, 146, 0.9); | |
| } | |
| @keyframes gradientAnimation { | |
| 0% { background-position: 0% 50%; } | |
| 50% { background-position: 100% 50%; } | |
| 100% { background-position: 0% 50%; } | |
| } | |
| .analyze-button { | |
| width: 180px; | |
| height: 60px; | |
| border-radius: 50px; | |
| background: linear-gradient(45deg, #880E4F, #4A235A, #311B92, #000000); | |
| font-size: 20px; | |
| font-weight: bold; | |
| color: white; | |
| border: none; | |
| box-shadow: 0 8px 25px rgba(136, 14, 79, 0.5), | |
| 0 4px 15px rgba(74, 35, 90, 0.6), | |
| 0 2px 10px rgba(49, 27, 146, 0.7), | |
| inset 0 1px 5px rgba(0, 0, 0, 0.4); | |
| cursor: pointer; | |
| transition: all 0.4s ease-in-out; | |
| } | |
| .analyze-button:hover { | |
| transform: scale(1.1); | |
| background: linear-gradient(225deg, #880E4F, #4A235A, #311B92, #000000); | |
| box-shadow: 0 12px 35px rgba(49, 27, 146, 0.8), | |
| 0 8px 25px rgba(74, 35, 90, 0.7), | |
| 0 4px 15px rgba(136, 14, 79, 0.6); | |
| } | |
| .result-box { | |
| text-align: center; | |
| font-size: 28px; | |
| font-weight: bold; | |
| background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000); | |
| color: white; | |
| padding: 30px; | |
| border-radius: 20px; | |
| display: inline-block; | |
| margin-top: 30px; | |
| box-shadow: 0 6px 20px rgba(74, 35, 90, 0.5), | |
| 0 3px 15px rgba(136, 14, 79, 0.4), | |
| inset 0 2px 10px rgba(49, 27, 146, 0.3); | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # Title and Prompt | |
| st.markdown("<div class='prompt-box'>Paste the article content below to analyze its category with PressGuard🛡️</div>", unsafe_allow_html=True) | |
| # Check if NLTK resources are already downloaded | |
| nltk_data_path = os.path.expanduser('~/nltk_data') | |
| if not os.path.exists(nltk_data_path): | |
| os.makedirs(nltk_data_path) | |
| try: | |
| nltk.data.find('tokenizers/punkt') | |
| except LookupError: | |
| nltk.download('punkt', download_dir=nltk_data_path) | |
| try: | |
| nltk.data.find('corpora/stopwords') | |
| except LookupError: | |
| nltk.download('stopwords', download_dir=nltk_data_path) | |
| try: | |
| nltk.data.find('corpora/wordnet') | |
| except LookupError: | |
| nltk.download('wordnet', download_dir=nltk_data_path) | |
| # Initialize stopwords and lemmatizer | |
| stop_words = set(stopwords.words('english')).union({"pm"}) | |
| lemmatizer = WordNetLemmatizer() | |
| import nltk | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| # Preprocessing Function | |
| def pre_process(x): | |
| x = x.lower() | |
| x = re.sub("<.*?>", "", x) | |
| x = re.sub("http[s]?://.+?\\S+", "", x) | |
| x = re.sub("[@#].+?\\S", "", x) | |
| x = re.sub(r"\\_+", " ", x) | |
| x = re.sub("^[A-Za-z.].*\\s-\\s", "", x) | |
| x = emoji.demojize(x) | |
| x = re.sub(":.*?:", "", x) | |
| x = re.sub("[^a-zA-Z0-9\\s_]", "", x) | |
| words = word_tokenize(x) | |
| words = [word for word in words if word not in stop_words] | |
| x = " ".join([lemmatizer.lemmatize(word) for word in words]) | |
| return x | |
| # Load Model | |
| def load_model(): | |
| model = keras.models.load_model("model_m3_new.keras") | |
| vectorizer = keras.models.load_model("vec_text_m3_new.keras") | |
| with open("label_encoder_m5.pkl", 'rb') as file: | |
| label_encoder = pickle.load(file) | |
| return model, vectorizer, label_encoder | |
| model, vectorizer, label_encoder = load_model() | |
| # Prediction Function | |
| def predict_category(text): | |
| processed_text = [pre_process(text)] | |
| text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=128) | |
| prediction = model.predict(text_vectorized) | |
| category_idx = np.argmax(prediction, axis=1)[0] | |
| return label_encoder.inverse_transform([category_idx])[0] | |
| # User Input | |
| input_text = st.text_area("Enter News Article:", height=200) | |
| if st.button("Analyze", key="analyze-btn", help="Click to classify the news article"): | |
| if input_text: | |
| category = predict_category(input_text) | |
| st.markdown(f"<div class='result-box'>Predicted Category: {category}</div>", unsafe_allow_html=True) | |
| else: | |
| st.warning("Please enter some text to analyze.") | |