Spaces:

dinusha11
/

News_Classification_Analysis_App

Sleeping

App Files Files Community

dinusha11 commited on Mar 29, 2025

Commit

2d580de

verified ·

1 Parent(s): 4b61c3a

Upload 2 files

Browse files

Files changed (2) hide show

app.py +198 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import streamlit as st
+import pandas as pd
+import torch
+from transformers import pipeline, AutoTokenizer
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+# Load the fine-tuned DistilBERT model from Hugging Face
+MODEL_NAME = "dinusha11/finetuned-distilbert-news"
+# Load tokenizer and classification pipeline
+@st.cache_resource
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    classifier = pipeline("text-classification", model=MODEL_NAME, tokenizer=tokenizer,
+                          device=0 if torch.cuda.is_available() else -1)
+    return classifier
+classifier = load_model()
+# Load QA pipeline
+@st.cache_resource
+def load_qa_pipeline():
+    return pipeline("question-answering")
+qa_pipeline = load_qa_pipeline()
+# Load Sentiment Analysis pipeline
+@st.cache_resource
+def load_sentiment_pipeline():
+    return pipeline("sentiment-analysis")
+sentiment_pipeline = load_sentiment_pipeline()
+# Function to preprocess text
+def preprocess_text(text):
+    return text.strip()
+# Function for Q&A
+def get_answer(question, context):
+    return qa_pipeline(question=question, context=context)['answer']
+# Function to generate word cloud
+def generate_wordcloud(text):
+    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
+    return wordcloud
+# Function to analyze sentiment
+def analyze_sentiment(text):
+    return sentiment_pipeline(text[:512])[0]['label']
+# Custom CSS Styling
+st.markdown("""
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #f8f9fa;
+        }
+        .css-1aumxhk {
+            display: none;
+        }
+        .main-title {
+            text-align: center;
+            font-size: 36px;
+            color: #2b2d42;
+        }
+        .stButton>button {
+            width: 100%;
+            border-radius: 10px;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Sidebar Navigation
+st.sidebar.title("Navigation")
+page = st.sidebar.radio("Go to:", ["Home", "News Classification", "Q&A", "Word Cloud", "Sentiment Analysis"])
+# Home Page
+if page == "Home":
+    st.title("📰 News Classification & Analysis App")
+    st.write("Welcome to the AI-powered news classification and analysis platform.")
+    st.write("""
+        - 📌 **Upload a CSV** containing news articles.
+        - 🔍 **Get Classification** into Business, Opinion, Political Gossip, Sports, or World News.
+        - 🧠 **Ask AI Questions** on news content.
+        - ☁ **Visualize Data** with a Word Cloud.
+        - 📊 **Analyze Sentiment** of news articles.
+    """)
+    st.success("Get started by navigating to 'News Classification' from the sidebar!")
+# News Classification Page
+elif page == "News Classification":
+    st.title("📝 Classify News Articles")
+    uploaded_file = st.file_uploader("📂 Upload a CSV file", type=["csv"], key="file_uploader")
+    if uploaded_file:
+        df = pd.read_csv(uploaded_file)
+        if 'content' not in df.columns:
+            st.error("The CSV file must contain a 'content' column.")
+        else:
+            df['processed_content'] = df['content'].apply(preprocess_text)
+            df['class'] = df['processed_content'].apply(lambda x: classifier(x[:512])[0]['label'])
+            st.success("✅ Classification completed!")
+            with st.expander("📋 View Classified News"):
+                st.dataframe(df[['content', 'class']])
+            # Download button
+            output_csv = df[['content', 'class']].to_csv(index=False).encode('utf-8')
+            st.download_button("⬇ Download Classified Data", data=output_csv, file_name="output.csv", mime="text/csv")
+# Q&A Section
+elif page == "Q&A":
+    st.title("🧠 Ask Questions About News Content")
+    uploaded_file_qa = st.file_uploader("📂 Upload CSV for Q&A", type=["csv"], key="qa_file_uploader")
+    if uploaded_file_qa:
+        df_qa = pd.read_csv(uploaded_file_qa)
+        if 'content' not in df_qa.columns:
+            st.error("The CSV file must contain a 'content' column.")
+        else:
+            st.write("📰 **Available News Articles:**")
+            selected_article = st.selectbox("Select an article", df_qa['content'])
+            question = st.text_input("🔍 Ask a question about this article:")
+            if question and selected_article.strip():
+                try:
+                    answer = get_answer(question, selected_article)
+                    st.success(f"**Answer:** {answer}")
+                except Exception as e:
+                    st.error(f"Error processing question: {str(e)}")
+# Word Cloud Section
+elif page == "Word Cloud":
+    st.title("☁ Word Cloud Visualization")
+    uploaded_file_wc = st.file_uploader("📂 Upload CSV for Word Cloud", type=["csv"], key="wc_file_uploader")
+    if uploaded_file_wc:
+        df_wc = pd.read_csv(uploaded_file_wc)
+        if 'content' not in df_wc.columns:
+            st.error("The CSV file must contain a 'content' column.")
+        else:
+            all_text = " ".join(df_wc['content'].dropna().astype(str))  # Ensure no NaN values
+            if all_text:
+                wordcloud = generate_wordcloud(all_text)
+                fig, ax = plt.subplots(figsize=(10, 5))
+                ax.imshow(wordcloud, interpolation="bilinear")
+                ax.axis("off")
+                st.pyplot(fig)
+            else:
+                st.error("The 'content' column is empty or contains invalid data.")
+# Sentiment Analysis Section
+elif page == "Sentiment Analysis":
+    st.title("📊 Sentiment Analysis")
+    uploaded_file_sentiment = st.file_uploader("📂 Upload CSV for Sentiment Analysis", type=["csv"], key="sentiment_file_uploader")
+    if uploaded_file_sentiment:
+        df_sentiment = pd.read_csv(uploaded_file_sentiment)
+        if 'content' not in df_sentiment.columns:
+            st.error("The CSV file must contain a 'content' column.")
+        else:
+            df_sentiment['sentiment'] = df_sentiment['content'].apply(lambda x: analyze_sentiment(x[:512]))
+            st.success("✅ Sentiment Analysis Completed!")
+            with st.expander("📋 View Sentiment Results"):
+                st.dataframe(df_sentiment[['content', 'sentiment']])
+            # Sentiment distribution visualization
+            sentiment_counts = df_sentiment['sentiment'].value_counts()
+            fig, ax = plt.subplots()
+            sentiment_counts.plot(kind='bar', color=['green', 'red', 'gray'], ax=ax)
+            ax.set_title("Sentiment Distribution")
+            ax.set_xlabel("Sentiment")
+            ax.set_ylabel("Count")
+            st.pyplot(fig)
+            # Download button
+            output_csv_sentiment = df_sentiment[['content', 'sentiment']].to_csv(index=False).encode('utf-8')
+            st.download_button("⬇ Download Sentiment Data", data=output_csv_sentiment, file_name="sentiment_output.csv", mime="text/csv")

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+streamlit
+transformers
+pandas
+wordcloud
+matplotlib
+torch
+sentencepiece
+huggingface_hub