dinusha11's picture
Upload 2 files
cd4fcbb verified
import streamlit as st
import pandas as pd
import torch
from transformers import pipeline, AutoTokenizer
import matplotlib.pyplot as plt
from wordcloud import WordCloud
# Load the fine-tuned DistilBERT model from Hugging Face
MODEL_NAME = "dinusha11/finetuned-distilbert-news"
# Label mapping
label_mapping = {
"LABEL_0": "Business",
"LABEL_1": "Opinion",
"LABEL_2": "Sports",
"LABEL_3": "Political_gossip",
"LABEL_4": "World_news"
}
# Load tokenizer and classification pipeline
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
classifier = pipeline("text-classification", model=MODEL_NAME, tokenizer=tokenizer,
device=0 if torch.cuda.is_available() else -1)
return classifier
classifier = load_model()
# Load QA pipeline
@st.cache_resource
def load_qa_pipeline():
return pipeline("question-answering")
qa_pipeline = load_qa_pipeline()
# Load Sentiment Analysis pipeline
@st.cache_resource
def load_sentiment_pipeline():
return pipeline("sentiment-analysis")
sentiment_pipeline = load_sentiment_pipeline()
# Function to preprocess text
def preprocess_text(text):
return text.strip()
# Function for Q&A
def get_answer(question, context):
return qa_pipeline(question=question, context=context)['answer']
# Function to generate word cloud
def generate_wordcloud(text):
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
return wordcloud
# Function to analyze sentiment
def analyze_sentiment(text):
return sentiment_pipeline(text[:512])[0]['label']
# Custom CSS Styling
st.markdown("""
<style>
body {
font-family: Arial, sans-serif;
background-color: #f8f9fa;
}
.css-1aumxhk {
display: none;
}
.main-title {
text-align: center;
font-size: 36px;
color: #2b2d42;
}
.stButton>button {
width: 100%;
border-radius: 10px;
}
</style>
""", unsafe_allow_html=True)
# Sidebar Navigation
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to:", ["Home", "News Classification", "Q&A", "Word Cloud", "Sentiment Analysis"])
# Home Page
if page == "Home":
st.title("πŸ“° News Classification & Analysis App")
st.write("Welcome to the AI-powered news classification and analysis platform.")
st.write("""
- πŸ“Œ **Upload a CSV** containing news articles.
- πŸ” **Get Classification** into Business, Opinion, Political Gossip, Sports, or World News.
- 🧠 **Ask AI Questions** on news content.
- ☁ **Visualize Data** with a Word Cloud.
- πŸ“Š **Analyze Sentiment** of news articles.
""")
st.success("Get started by navigating to 'News Classification' from the sidebar!")
# News Classification Page
elif page == "News Classification":
st.title("πŸ“ Classify News Articles")
uploaded_file = st.file_uploader("πŸ“‚ Upload a CSV file", type=["csv"], key="file_uploader")
if uploaded_file:
df = pd.read_csv(uploaded_file)
if 'content' not in df.columns:
st.error("The CSV file must contain a 'content' column.")
else:
df['processed_content'] = df['content'].apply(preprocess_text)
df['class'] = df['processed_content'].apply(lambda x: label_mapping[classifier(x[:512])[0]['label']])
st.success("βœ… Classification completed!")
with st.expander("πŸ“‹ View Classified News"):
st.dataframe(df[['content', 'class']])
# Download button
output_csv = df[['content', 'class']].to_csv(index=False).encode('utf-8')
st.download_button("⬇ Download Classified Data", data=output_csv, file_name="output.csv", mime="text/csv")
# Q&A Section
elif page == "Q&A":
st.title("🧠 Ask Questions About News Content")
uploaded_file_qa = st.file_uploader("πŸ“‚ Upload CSV for Q&A", type=["csv"], key="qa_file_uploader")
if uploaded_file_qa:
df_qa = pd.read_csv(uploaded_file_qa)
if 'content' not in df_qa.columns:
st.error("The CSV file must contain a 'content' column.")
else:
st.write("πŸ“° **Available News Articles:**")
selected_article = st.selectbox("Select an article", df_qa['content'])
question = st.text_input("πŸ” Ask a question about this article:")
if question and selected_article.strip():
try:
answer = get_answer(question, selected_article)
st.success(f"**Answer:** {answer}")
except Exception as e:
st.error(f"Error processing question: {str(e)}")
# Word Cloud Section
elif page == "Word Cloud":
st.title("☁ Word Cloud Visualization")
uploaded_file_wc = st.file_uploader("πŸ“‚ Upload CSV for Word Cloud", type=["csv"], key="wc_file_uploader")
if uploaded_file_wc:
df_wc = pd.read_csv(uploaded_file_wc)
if 'content' not in df_wc.columns:
st.error("The CSV file must contain a 'content' column.")
else:
all_text = " ".join(df_wc['content'].dropna().astype(str))
if all_text:
wordcloud = generate_wordcloud(all_text)
fig, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wordcloud, interpolation="bilinear")
ax.axis("off")
st.pyplot(fig)
else:
st.error("The 'content' column is empty or contains invalid data.")
# Sentiment Analysis Section
elif page == "Sentiment Analysis":
st.title("πŸ“Š Sentiment Analysis")
uploaded_file_sentiment = st.file_uploader("πŸ“‚ Upload CSV for Sentiment Analysis", type=["csv"], key="sentiment_file_uploader")
if uploaded_file_sentiment:
df_sentiment = pd.read_csv(uploaded_file_sentiment)
if 'content' not in df_sentiment.columns:
st.error("The CSV file must contain a 'content' column.")
else:
df_sentiment['sentiment'] = df_sentiment['content'].apply(lambda x: analyze_sentiment(x[:512]))
st.success("βœ… Sentiment Analysis Completed!")
with st.expander("πŸ“‹ View Sentiment Results"):
st.dataframe(df_sentiment[['content', 'sentiment']])
# Download button
output_csv_sentiment = df_sentiment[['content', 'sentiment']].to_csv(index=False).encode('utf-8')
st.download_button("⬇ Download Sentiment Data", data=output_csv_sentiment, file_name="sentiment_output.csv", mime="text/csv")