File size: 6,850 Bytes
cd4fcbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import streamlit as st
import pandas as pd
import torch
from transformers import pipeline, AutoTokenizer
import matplotlib.pyplot as plt
from wordcloud import WordCloud

# Load the fine-tuned DistilBERT model from Hugging Face
MODEL_NAME = "dinusha11/finetuned-distilbert-news"

# Label mapping
label_mapping = {
    "LABEL_0": "Business",
    "LABEL_1": "Opinion",
    "LABEL_2": "Sports",
    "LABEL_3": "Political_gossip",
    "LABEL_4": "World_news"
}

# Load tokenizer and classification pipeline
@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    classifier = pipeline("text-classification", model=MODEL_NAME, tokenizer=tokenizer,
                          device=0 if torch.cuda.is_available() else -1)
    return classifier

classifier = load_model()

# Load QA pipeline
@st.cache_resource
def load_qa_pipeline():
    return pipeline("question-answering")

qa_pipeline = load_qa_pipeline()

# Load Sentiment Analysis pipeline
@st.cache_resource
def load_sentiment_pipeline():
    return pipeline("sentiment-analysis")

sentiment_pipeline = load_sentiment_pipeline()

# Function to preprocess text
def preprocess_text(text):
    return text.strip()

# Function for Q&A
def get_answer(question, context):
    return qa_pipeline(question=question, context=context)['answer']

# Function to generate word cloud
def generate_wordcloud(text):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    return wordcloud

# Function to analyze sentiment
def analyze_sentiment(text):
    return sentiment_pipeline(text[:512])[0]['label']

# Custom CSS Styling
st.markdown("""

    <style>

        body {

            font-family: Arial, sans-serif;

            background-color: #f8f9fa;

        }

        .css-1aumxhk {

            display: none;

        }

        .main-title {

            text-align: center;

            font-size: 36px;

            color: #2b2d42;

        }

        .stButton>button {

            width: 100%;

            border-radius: 10px;

        }

    </style>

""", unsafe_allow_html=True)

# Sidebar Navigation
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to:", ["Home", "News Classification", "Q&A", "Word Cloud", "Sentiment Analysis"])

# Home Page
if page == "Home":
    st.title("πŸ“° News Classification & Analysis App")
    st.write("Welcome to the AI-powered news classification and analysis platform.")
    st.write("""

        - πŸ“Œ **Upload a CSV** containing news articles.

        - πŸ” **Get Classification** into Business, Opinion, Political Gossip, Sports, or World News.

        - 🧠 **Ask AI Questions** on news content.

        - ☁ **Visualize Data** with a Word Cloud.

        - πŸ“Š **Analyze Sentiment** of news articles.

    """)
    st.success("Get started by navigating to 'News Classification' from the sidebar!")

# News Classification Page
elif page == "News Classification":
    st.title("πŸ“ Classify News Articles")
    uploaded_file = st.file_uploader("πŸ“‚ Upload a CSV file", type=["csv"], key="file_uploader")

    if uploaded_file:
        df = pd.read_csv(uploaded_file)
        if 'content' not in df.columns:
            st.error("The CSV file must contain a 'content' column.")
        else:
            df['processed_content'] = df['content'].apply(preprocess_text)
            df['class'] = df['processed_content'].apply(lambda x: label_mapping[classifier(x[:512])[0]['label']])
            st.success("βœ… Classification completed!")

            with st.expander("πŸ“‹ View Classified News"):
                st.dataframe(df[['content', 'class']])

            # Download button
            output_csv = df[['content', 'class']].to_csv(index=False).encode('utf-8')
            st.download_button("⬇ Download Classified Data", data=output_csv, file_name="output.csv", mime="text/csv")

# Q&A Section
elif page == "Q&A":
    st.title("🧠 Ask Questions About News Content")
    uploaded_file_qa = st.file_uploader("πŸ“‚ Upload CSV for Q&A", type=["csv"], key="qa_file_uploader")

    if uploaded_file_qa:
        df_qa = pd.read_csv(uploaded_file_qa)
        if 'content' not in df_qa.columns:
            st.error("The CSV file must contain a 'content' column.")
        else:
            st.write("πŸ“° **Available News Articles:**")
            selected_article = st.selectbox("Select an article", df_qa['content'])

            question = st.text_input("πŸ” Ask a question about this article:")

            if question and selected_article.strip():
                try:
                    answer = get_answer(question, selected_article)
                    st.success(f"**Answer:** {answer}")
                except Exception as e:
                    st.error(f"Error processing question: {str(e)}")

# Word Cloud Section
elif page == "Word Cloud":
    st.title("☁ Word Cloud Visualization")
    uploaded_file_wc = st.file_uploader("πŸ“‚ Upload CSV for Word Cloud", type=["csv"], key="wc_file_uploader")

    if uploaded_file_wc:
        df_wc = pd.read_csv(uploaded_file_wc)
        if 'content' not in df_wc.columns:
            st.error("The CSV file must contain a 'content' column.")
        else:
            all_text = " ".join(df_wc['content'].dropna().astype(str))
            if all_text:
                wordcloud = generate_wordcloud(all_text)
                fig, ax = plt.subplots(figsize=(10, 5))
                ax.imshow(wordcloud, interpolation="bilinear")
                ax.axis("off")
                st.pyplot(fig)
            else:
                st.error("The 'content' column is empty or contains invalid data.")

# Sentiment Analysis Section
elif page == "Sentiment Analysis":
    st.title("πŸ“Š Sentiment Analysis")
    uploaded_file_sentiment = st.file_uploader("πŸ“‚ Upload CSV for Sentiment Analysis", type=["csv"], key="sentiment_file_uploader")

    if uploaded_file_sentiment:
        df_sentiment = pd.read_csv(uploaded_file_sentiment)
        if 'content' not in df_sentiment.columns:
            st.error("The CSV file must contain a 'content' column.")
        else:
            df_sentiment['sentiment'] = df_sentiment['content'].apply(lambda x: analyze_sentiment(x[:512]))
            st.success("βœ… Sentiment Analysis Completed!")

            with st.expander("πŸ“‹ View Sentiment Results"):
                st.dataframe(df_sentiment[['content', 'sentiment']])

            # Download button
            output_csv_sentiment = df_sentiment[['content', 'sentiment']].to_csv(index=False).encode('utf-8')
            st.download_button("⬇ Download Sentiment Data", data=output_csv_sentiment, file_name="sentiment_output.csv", mime="text/csv")