Spaces:

Prageeth-1
/

News_Classification_App

Sleeping

App Files Files Community

Prageeth-1 commited on Mar 28, 2025

Commit

9bc69b8

verified ·

1 Parent(s): 3d13f8a

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -42

app.py CHANGED Viewed

@@ -3,51 +3,17 @@ import pandas as pd
 import numpy as np
 import re
 import nltk
-import os
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
-from nltk.tokenize import word_tokenize
 from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 import io
-st.set_page_config(
-    page_title="News Analysis Dashboard",
-    page_icon="📰",
-    layout="wide",
-    initial_sidebar_state="expanded"
-)
-st.cache_resource
-def setup_nltk():
-    # Set NLTK data path
-    nltk_data_path = os.path.join(os.getcwd(), 'nltk_data')
-    os.makedirs(nltk_data_path, exist_ok=True)
-    nltk.data.path.append(nltk_data_path)
-    # Download required resources with retries
-    required = ['punkt', 'stopwords', 'wordnet', 'omw-1.4']
-    for resource in required:
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                nltk.data.find(f'tokenizers/punkt/PY3/english.pickle')
-                break
-            except LookupError:
-                try:
-                    nltk.download(resource, download_dir=nltk_data_path)
-                    # Special handling for punkt
-                    if resource == 'punkt':
-                        nltk.download('punkt_tab', download_dir=nltk_data_path)
-                except Exception as e:
-                    if attempt == max_retries - 1:
-                        st.error(f"Failed to download NLTK resource {resource} after {max_retries} attempts")
-                        raise
-                    continue
-# Run initialization before anything else
-setup_nltk()
 # Initialize lemmatizer
 lemmatizer = WordNetLemmatizer()
@@ -91,7 +57,13 @@ def generate_wordcloud(text, title=None):
     plt.title(title, fontsize=20)
     st.pyplot(plt)
 # Custom CSS
 st.markdown("""
@@ -143,11 +115,9 @@ with tab1:
         # Load the fine-tuned news classifier
         classifier = pipeline("text-classification", model="Prageeth-1/News_classification.2")
-        df['preprocessed_text'] = df['content'].apply(preprocess_text)
         # Classify each article and store the predictions
-        df["predicted_category"] = df["preprocessed_text"].apply(lambda text: classifier(text)[0]["label"])
         # Preprocess and classify

 import numpy as np
 import re
 import nltk
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 import io
+# Download NLTK resources
+nltk.download('punkt')
+nltk.download('stopwords')
+nltk.download('wordnet')
 # Initialize lemmatizer
 lemmatizer = WordNetLemmatizer()
     plt.title(title, fontsize=20)
     st.pyplot(plt)
+# Set page config
+st.set_page_config(
+    page_title="News Analysis Dashboard",
+    page_icon="📰",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
 # Custom CSS
 st.markdown("""
         # Load the fine-tuned news classifier
         classifier = pipeline("text-classification", model="Prageeth-1/News_classification.2")
         # Classify each article and store the predictions
+        df["predicted_category"] = df["content"].apply(lambda text: classifier(text)[0]["label"])
         # Preprocess and classify