Spaces:

dinusha11
/

News_Classification_Analysis_App

Sleeping

File size: 6,850 Bytes

cd4fcbb

import streamlit as st
import pandas as pd
import torch
from transformers import pipeline, AutoTokenizer
import matplotlib.pyplot as plt
from wordcloud import WordCloud

# Load the fine-tuned DistilBERT model from Hugging Face
MODEL_NAME = "dinusha11/finetuned-distilbert-news"

# Label mapping
label_mapping = {
    "LABEL_0": "Business",
    "LABEL_1": "Opinion",
    "LABEL_2": "Sports",
    "LABEL_3": "Political_gossip",
    "LABEL_4": "World_news"
}

# Load tokenizer and classification pipeline
@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    classifier = pipeline("text-classification", model=MODEL_NAME, tokenizer=tokenizer,
                          device=0 if torch.cuda.is_available() else -1)
    return classifier

classifier = load_model()

# Load QA pipeline
@st.cache_resource
def load_qa_pipeline():
    return pipeline("question-answering")

qa_pipeline = load_qa_pipeline()

# Load Sentiment Analysis pipeline
@st.cache_resource
def load_sentiment_pipeline():
    return pipeline("sentiment-analysis")

sentiment_pipeline = load_sentiment_pipeline()

# Function to preprocess text
def preprocess_text(text):
    return text.strip()

# Function for Q&A
def get_answer(question, context):
    return qa_pipeline(question=question, context=context)['answer']

# Function to generate word cloud
def generate_wordcloud(text):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    return wordcloud

# Function to analyze sentiment
def analyze_sentiment(text):
    return sentiment_pipeline(text[:512])[0]['label']

# Custom CSS Styling
st.markdown("""

    <style>

        body {

            font-family: Arial, sans-serif;

            background-color: #f8f9fa;

        }

        .css-1aumxhk {

            display: none;

        }

        .main-title {

            text-align: center;

            font-size: 36px;

            color: #2b2d42;

        }

        .stButton>button {

            width: 100%;

            border-radius: 10px;

        }

    </style>

""", unsafe_allow_html=True)

# Sidebar Navigation
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to:", ["Home", "News Classification", "Q&A", "Word Cloud", "Sentiment Analysis"])

# Home Page
if page == "Home":
    st.title("📰 News Classification & Analysis App")
    st.write("Welcome to the AI-powered news classification and analysis platform.")
    st.write("""

        - 📌 **Upload a CSV** containing news articles.

        - 🔍 **Get Classification** into Business, Opinion, Political Gossip, Sports, or World News.

        - 🧠 **Ask AI Questions** on news content.

        - ☁ **Visualize Data** with a Word Cloud.

        - 📊 **Analyze Sentiment** of news articles.

    """)
    st.success("Get started by navigating to 'News Classification' from the sidebar!")

# News Classification Page
elif page == "News Classification":
    st.title("📝 Classify News Articles")
    uploaded_file = st.file_uploader("📂 Upload a CSV file", type=["csv"], key="file_uploader")

    if uploaded_file:
        df = pd.read_csv(uploaded_file)
        if 'content' not in df.columns:
            st.error("The CSV file must contain a 'content' column.")
        else:
            df['processed_content'] = df['content'].apply(preprocess_text)
            df['class'] = df['processed_content'].apply(lambda x: label_mapping[classifier(x[:512])[0]['label']])
            st.success("✅ Classification completed!")

            with st.expander("📋 View Classified News"):
                st.dataframe(df[['content', 'class']])

            # Download button
            output_csv = df[['content', 'class']].to_csv(index=False).encode('utf-8')
            st.download_button("⬇ Download Classified Data", data=output_csv, file_name="output.csv", mime="text/csv")

# Q&A Section
elif page == "Q&A":
    st.title("🧠 Ask Questions About News Content")
    uploaded_file_qa = st.file_uploader("📂 Upload CSV for Q&A", type=["csv"], key="qa_file_uploader")

    if uploaded_file_qa:
        df_qa = pd.read_csv(uploaded_file_qa)
        if 'content' not in df_qa.columns:
            st.error("The CSV file must contain a 'content' column.")
        else:
            st.write("📰 **Available News Articles:**")
            selected_article = st.selectbox("Select an article", df_qa['content'])

            question = st.text_input("🔍 Ask a question about this article:")

            if question and selected_article.strip():
                try:
                    answer = get_answer(question, selected_article)
                    st.success(f"**Answer:** {answer}")
                except Exception as e:
                    st.error(f"Error processing question: {str(e)}")

# Word Cloud Section
elif page == "Word Cloud":
    st.title("☁ Word Cloud Visualization")
    uploaded_file_wc = st.file_uploader("📂 Upload CSV for Word Cloud", type=["csv"], key="wc_file_uploader")

    if uploaded_file_wc:
        df_wc = pd.read_csv(uploaded_file_wc)
        if 'content' not in df_wc.columns:
            st.error("The CSV file must contain a 'content' column.")
        else:
            all_text = " ".join(df_wc['content'].dropna().astype(str))
            if all_text:
                wordcloud = generate_wordcloud(all_text)
                fig, ax = plt.subplots(figsize=(10, 5))
                ax.imshow(wordcloud, interpolation="bilinear")
                ax.axis("off")
                st.pyplot(fig)
            else:
                st.error("The 'content' column is empty or contains invalid data.")

# Sentiment Analysis Section
elif page == "Sentiment Analysis":
    st.title("📊 Sentiment Analysis")
    uploaded_file_sentiment = st.file_uploader("📂 Upload CSV for Sentiment Analysis", type=["csv"], key="sentiment_file_uploader")

    if uploaded_file_sentiment:
        df_sentiment = pd.read_csv(uploaded_file_sentiment)
        if 'content' not in df_sentiment.columns:
            st.error("The CSV file must contain a 'content' column.")
        else:
            df_sentiment['sentiment'] = df_sentiment['content'].apply(lambda x: analyze_sentiment(x[:512]))
            st.success("✅ Sentiment Analysis Completed!")

            with st.expander("📋 View Sentiment Results"):
                st.dataframe(df_sentiment[['content', 'sentiment']])

            # Download button
            output_csv_sentiment = df_sentiment[['content', 'sentiment']].to_csv(index=False).encode('utf-8')
            st.download_button("⬇ Download Sentiment Data", data=output_csv_sentiment, file_name="sentiment_output.csv", mime="text/csv")