import streamlit as st import pandas as pd import torch from transformers import pipeline, AutoTokenizer import matplotlib.pyplot as plt from wordcloud import WordCloud # Load the fine-tuned DistilBERT model from Hugging Face MODEL_NAME = "dinusha11/finetuned-distilbert-news" # Label mapping label_mapping = { "LABEL_0": "Business", "LABEL_1": "Opinion", "LABEL_2": "Sports", "LABEL_3": "Political_gossip", "LABEL_4": "World_news" } # Load tokenizer and classification pipeline @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) classifier = pipeline("text-classification", model=MODEL_NAME, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) return classifier classifier = load_model() # Load QA pipeline @st.cache_resource def load_qa_pipeline(): return pipeline("question-answering") qa_pipeline = load_qa_pipeline() # Load Sentiment Analysis pipeline @st.cache_resource def load_sentiment_pipeline(): return pipeline("sentiment-analysis") sentiment_pipeline = load_sentiment_pipeline() # Function to preprocess text def preprocess_text(text): return text.strip() # Function for Q&A def get_answer(question, context): return qa_pipeline(question=question, context=context)['answer'] # Function to generate word cloud def generate_wordcloud(text): wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) return wordcloud # Function to analyze sentiment def analyze_sentiment(text): return sentiment_pipeline(text[:512])[0]['label'] # Custom CSS Styling st.markdown(""" """, unsafe_allow_html=True) # Sidebar Navigation st.sidebar.title("Navigation") page = st.sidebar.radio("Go to:", ["Home", "News Classification", "Q&A", "Word Cloud", "Sentiment Analysis"]) # Home Page if page == "Home": st.title("📰 News Classification & Analysis App") st.write("Welcome to the AI-powered news classification and analysis platform.") st.write(""" - 📌 **Upload a CSV** containing news articles. - 🔍 **Get Classification** into Business, Opinion, Political Gossip, Sports, or World News. - 🧠 **Ask AI Questions** on news content. - ☁ **Visualize Data** with a Word Cloud. - 📊 **Analyze Sentiment** of news articles. """) st.success("Get started by navigating to 'News Classification' from the sidebar!") # News Classification Page elif page == "News Classification": st.title("📝 Classify News Articles") uploaded_file = st.file_uploader("📂 Upload a CSV file", type=["csv"], key="file_uploader") if uploaded_file: df = pd.read_csv(uploaded_file) if 'content' not in df.columns: st.error("The CSV file must contain a 'content' column.") else: df['processed_content'] = df['content'].apply(preprocess_text) df['class'] = df['processed_content'].apply(lambda x: label_mapping[classifier(x[:512])[0]['label']]) st.success("✅ Classification completed!") with st.expander("📋 View Classified News"): st.dataframe(df[['content', 'class']]) # Download button output_csv = df[['content', 'class']].to_csv(index=False).encode('utf-8') st.download_button("⬇ Download Classified Data", data=output_csv, file_name="output.csv", mime="text/csv") # Q&A Section elif page == "Q&A": st.title("🧠 Ask Questions About News Content") uploaded_file_qa = st.file_uploader("📂 Upload CSV for Q&A", type=["csv"], key="qa_file_uploader") if uploaded_file_qa: df_qa = pd.read_csv(uploaded_file_qa) if 'content' not in df_qa.columns: st.error("The CSV file must contain a 'content' column.") else: st.write("📰 **Available News Articles:**") selected_article = st.selectbox("Select an article", df_qa['content']) question = st.text_input("🔍 Ask a question about this article:") if question and selected_article.strip(): try: answer = get_answer(question, selected_article) st.success(f"**Answer:** {answer}") except Exception as e: st.error(f"Error processing question: {str(e)}") # Word Cloud Section elif page == "Word Cloud": st.title("☁ Word Cloud Visualization") uploaded_file_wc = st.file_uploader("📂 Upload CSV for Word Cloud", type=["csv"], key="wc_file_uploader") if uploaded_file_wc: df_wc = pd.read_csv(uploaded_file_wc) if 'content' not in df_wc.columns: st.error("The CSV file must contain a 'content' column.") else: all_text = " ".join(df_wc['content'].dropna().astype(str)) if all_text: wordcloud = generate_wordcloud(all_text) fig, ax = plt.subplots(figsize=(10, 5)) ax.imshow(wordcloud, interpolation="bilinear") ax.axis("off") st.pyplot(fig) else: st.error("The 'content' column is empty or contains invalid data.") # Sentiment Analysis Section elif page == "Sentiment Analysis": st.title("📊 Sentiment Analysis") uploaded_file_sentiment = st.file_uploader("📂 Upload CSV for Sentiment Analysis", type=["csv"], key="sentiment_file_uploader") if uploaded_file_sentiment: df_sentiment = pd.read_csv(uploaded_file_sentiment) if 'content' not in df_sentiment.columns: st.error("The CSV file must contain a 'content' column.") else: df_sentiment['sentiment'] = df_sentiment['content'].apply(lambda x: analyze_sentiment(x[:512])) st.success("✅ Sentiment Analysis Completed!") with st.expander("📋 View Sentiment Results"): st.dataframe(df_sentiment[['content', 'sentiment']]) # Download button output_csv_sentiment = df_sentiment[['content', 'sentiment']].to_csv(index=False).encode('utf-8') st.download_button("⬇ Download Sentiment Data", data=output_csv_sentiment, file_name="sentiment_output.csv", mime="text/csv")