Spaces:

dinusha11
/

News_Classification_Analysis_App

Sleeping

App Files Files Community

News_Classification_Analysis_App / app.py

dinusha11

Upload 2 files

cd4fcbb verified 12 months ago

raw

history blame contribute delete

6.85 kB

	import streamlit as st
	import pandas as pd
	import torch
	from transformers import pipeline, AutoTokenizer
	import matplotlib.pyplot as plt
	from wordcloud import WordCloud

	# Load the fine-tuned DistilBERT model from Hugging Face
	MODEL_NAME = "dinusha11/finetuned-distilbert-news"

	# Label mapping
	label_mapping = {
	"LABEL_0": "Business",
	"LABEL_1": "Opinion",
	"LABEL_2": "Sports",
	"LABEL_3": "Political_gossip",
	"LABEL_4": "World_news"
	}

	# Load tokenizer and classification pipeline
	@st.cache_resource
	def load_model():
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	classifier = pipeline("text-classification", model=MODEL_NAME, tokenizer=tokenizer,
	device=0 if torch.cuda.is_available() else -1)
	return classifier

	classifier = load_model()

	# Load QA pipeline
	@st.cache_resource
	def load_qa_pipeline():
	return pipeline("question-answering")

	qa_pipeline = load_qa_pipeline()

	# Load Sentiment Analysis pipeline
	@st.cache_resource
	def load_sentiment_pipeline():
	return pipeline("sentiment-analysis")

	sentiment_pipeline = load_sentiment_pipeline()

	# Function to preprocess text
	def preprocess_text(text):
	return text.strip()

	# Function for Q&A
	def get_answer(question, context):
	return qa_pipeline(question=question, context=context)['answer']

	# Function to generate word cloud
	def generate_wordcloud(text):
	wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
	return wordcloud

	# Function to analyze sentiment
	def analyze_sentiment(text):
	return sentiment_pipeline(text[:512])[0]['label']

	# Custom CSS Styling
	st.markdown("""
	<style>
	body {
	font-family: Arial, sans-serif;
	background-color: #f8f9fa;
	}
	.css-1aumxhk {
	display: none;
	}
	.main-title {
	text-align: center;
	font-size: 36px;
	color: #2b2d42;
	}
	.stButton>button {
	width: 100%;
	border-radius: 10px;
	}
	</style>
	""", unsafe_allow_html=True)

	# Sidebar Navigation
	st.sidebar.title("Navigation")
	page = st.sidebar.radio("Go to:", ["Home", "News Classification", "Q&A", "Word Cloud", "Sentiment Analysis"])

	# Home Page
	if page == "Home":
	st.title("📰 News Classification & Analysis App")
	st.write("Welcome to the AI-powered news classification and analysis platform.")
	st.write("""
	- 📌 Upload a CSV containing news articles.
	- 🔍 Get Classification into Business, Opinion, Political Gossip, Sports, or World News.
	- 🧠 Ask AI Questions on news content.
	- ☁ Visualize Data with a Word Cloud.
	- 📊 Analyze Sentiment of news articles.
	""")
	st.success("Get started by navigating to 'News Classification' from the sidebar!")

	# News Classification Page
	elif page == "News Classification":
	st.title("📝 Classify News Articles")
	uploaded_file = st.file_uploader("📂 Upload a CSV file", type=["csv"], key="file_uploader")

	if uploaded_file:
	df = pd.read_csv(uploaded_file)
	if 'content' not in df.columns:
	st.error("The CSV file must contain a 'content' column.")
	else:
	df['processed_content'] = df['content'].apply(preprocess_text)
	df['class'] = df['processed_content'].apply(lambda x: label_mapping[classifier(x[:512])[0]['label']])
	st.success("✅ Classification completed!")

	with st.expander("📋 View Classified News"):
	st.dataframe(df[['content', 'class']])

	# Download button
	output_csv = df[['content', 'class']].to_csv(index=False).encode('utf-8')
	st.download_button("⬇ Download Classified Data", data=output_csv, file_name="output.csv", mime="text/csv")

	# Q&A Section
	elif page == "Q&A":
	st.title("🧠 Ask Questions About News Content")
	uploaded_file_qa = st.file_uploader("📂 Upload CSV for Q&A", type=["csv"], key="qa_file_uploader")

	if uploaded_file_qa:
	df_qa = pd.read_csv(uploaded_file_qa)
	if 'content' not in df_qa.columns:
	st.error("The CSV file must contain a 'content' column.")
	else:
	st.write("📰 Available News Articles:")
	selected_article = st.selectbox("Select an article", df_qa['content'])

	question = st.text_input("🔍 Ask a question about this article:")

	if question and selected_article.strip():
	try:
	answer = get_answer(question, selected_article)
	st.success(f"Answer: {answer}")
	except Exception as e:
	st.error(f"Error processing question: {str(e)}")

	# Word Cloud Section
	elif page == "Word Cloud":
	st.title("☁ Word Cloud Visualization")
	uploaded_file_wc = st.file_uploader("📂 Upload CSV for Word Cloud", type=["csv"], key="wc_file_uploader")

	if uploaded_file_wc:
	df_wc = pd.read_csv(uploaded_file_wc)
	if 'content' not in df_wc.columns:
	st.error("The CSV file must contain a 'content' column.")
	else:
	all_text = " ".join(df_wc['content'].dropna().astype(str))
	if all_text:
	wordcloud = generate_wordcloud(all_text)
	fig, ax = plt.subplots(figsize=(10, 5))
	ax.imshow(wordcloud, interpolation="bilinear")
	ax.axis("off")
	st.pyplot(fig)
	else:
	st.error("The 'content' column is empty or contains invalid data.")

	# Sentiment Analysis Section
	elif page == "Sentiment Analysis":
	st.title("📊 Sentiment Analysis")
	uploaded_file_sentiment = st.file_uploader("📂 Upload CSV for Sentiment Analysis", type=["csv"], key="sentiment_file_uploader")

	if uploaded_file_sentiment:
	df_sentiment = pd.read_csv(uploaded_file_sentiment)
	if 'content' not in df_sentiment.columns:
	st.error("The CSV file must contain a 'content' column.")
	else:
	df_sentiment['sentiment'] = df_sentiment['content'].apply(lambda x: analyze_sentiment(x[:512]))
	st.success("✅ Sentiment Analysis Completed!")

	with st.expander("📋 View Sentiment Results"):
	st.dataframe(df_sentiment[['content', 'sentiment']])

	# Download button
	output_csv_sentiment = df_sentiment[['content', 'sentiment']].to_csv(index=False).encode('utf-8')
	st.download_button("⬇ Download Sentiment Data", data=output_csv_sentiment, file_name="sentiment_output.csv", mime="text/csv")