Spaces:

dinusha11
/

News_Classification_Analysis_App

Sleeping

App Files Files Community

dinusha11 commited on Mar 29, 2025

Commit

9e0643c

verified ·

1 Parent(s): 8ddf433

Upload 2 files

Browse files

Files changed (2) hide show

app.py +146 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import streamlit as st
+import pandas as pd
+import torch
+from transformers import pipeline, AutoTokenizer
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+# Load the fine-tuned DistilBERT model from Hugging Face
+MODEL_NAME = "dinusha11/finetuned-distilbert-news"
+# Load tokenizer and classification pipeline
+@st.cache_resource
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    classifier = pipeline("text-classification", model=MODEL_NAME, tokenizer=tokenizer,
+                          device=0 if torch.cuda.is_available() else -1)
+    return classifier
+classifier = load_model()
+# Load QA pipeline
+@st.cache_resource
+def load_qa_pipeline():
+    return pipeline("question-answering")
+qa_pipeline = load_qa_pipeline()
+# Function to preprocess text
+def preprocess_text(text):
+    return text.strip()
+# Function for Q&A
+def get_answer(question, context):
+    return qa_pipeline(question=question, context=context)['answer']
+# Function to generate word cloud
+def generate_wordcloud(text):
+    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
+    return wordcloud
+# Custom CSS Styling
+st.markdown("""
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #f8f9fa;
+        }
+        .css-1aumxhk {
+            display: none;
+        }
+        .main-title {
+            text-align: center;
+            font-size: 36px;
+            color: #2b2d42;
+        }
+        .stButton>button {
+            width: 100%;
+            border-radius: 10px;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Sidebar Navigation
+st.sidebar.title("Navigation")
+page = st.sidebar.radio("Go to:", ["Home", "News Classification", "Q&A", "Word Cloud"])
+# Home Page
+if page == "Home":
+    st.title("📰 News Classification & Analysis App")
+    st.write("Welcome to the AI-powered news classification and analysis platform.")
+    st.write("""
+        - 📌 **Upload a CSV** containing news articles.
+        - 🔍 **Get Classification** into Business, Opinion, Political Gossip, Sports, or World News.
+        - 🧠 **Ask AI Questions** on news content.
+        - ☁ **Visualize Data** with a Word Cloud.
+    """)
+    st.success("Get started by navigating to 'News Classification' from the sidebar!")
+# News Classification Page
+elif page == "News Classification":
+    st.title("📝 Classify News Articles")
+    uploaded_file = st.file_uploader("📂 Upload a CSV file", type=["csv"], key="file_uploader")
+    if uploaded_file:
+        df = pd.read_csv(uploaded_file)
+        if 'content' not in df.columns:
+            st.error("The CSV file must contain a 'content' column.")
+        else:
+            df['processed_content'] = df['content'].apply(preprocess_text)
+            df['class'] = df['processed_content'].apply(lambda x: classifier(x[:512])[0]['label'])
+            st.success("✅ Classification completed!")
+            with st.expander("📋 View Classified News"):
+                st.dataframe(df[['content', 'class']])
+            # Download button
+            output_csv = df[['content', 'class']].to_csv(index=False).encode('utf-8')
+            st.download_button("⬇ Download Classified Data", data=output_csv, file_name="output.csv", mime="text/csv")
+# Q&A Section
+elif page == "Q&A":
+    st.title("🧠 Ask Questions About News Content")
+    uploaded_file_qa = st.file_uploader("📂 Upload CSV for Q&A", type=["csv"], key="qa_file_uploader")
+    if uploaded_file_qa:
+        df_qa = pd.read_csv(uploaded_file_qa)
+        if 'content' not in df_qa.columns:
+            st.error("The CSV file must contain a 'content' column.")
+        else:
+            st.write("📰 **Available News Articles:**")
+            selected_article = st.selectbox("Select an article", df_qa['content'])
+            question = st.text_input("🔍 Ask a question about this article:")
+            if question:
+                answer = get_answer(question, selected_article)
+                st.success(f"**Answer:** {answer}")
+# Word Cloud Section
+elif page == "Word Cloud":
+    st.title("☁ Word Cloud Visualization")
+    uploaded_file_wc = st.file_uploader("📂 Upload CSV for Word Cloud", type=["csv"], key="wc_file_uploader")
+    if uploaded_file_wc:
+        df_wc = pd.read_csv(uploaded_file_wc)
+        if 'content' not in df_wc.columns:
+            st.error("The CSV file must contain a 'content' column.")
+        else:
+            all_text = " ".join(df_wc['content'].astype(str))
+            wordcloud = generate_wordcloud(all_text)
+            fig, ax = plt.subplots(figsize=(10, 5))
+            ax.imshow(wordcloud, interpolation="bilinear")
+            ax.axis("off")
+            st.pyplot(fig)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit
+transformers
+pandas