Spaces:

Prageeth-1
/

News_Classification_App

Sleeping

App Files Files Community

Prageeth-1 commited on Mar 25, 2025

Commit

29621a6

verified ·

1 Parent(s): 9282558

Create app.py

Browse files

Files changed (1) hide show

app.py +273 -0

app.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import re
+import nltk
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+import io
+# Download NLTK resources
+nltk.download('punkt')
+nltk.download('stopwords')
+nltk.download('wordnet')
+# Initialize lemmatizer
+lemmatizer = WordNetLemmatizer()
+# Load models (cache them to avoid reloading on every interaction)
+@st.cache_resource
+def load_classification_model():
+    model_name = "your-username/daily-mirror-news-classifier"  # Replace with your model path
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSequenceClassification.from_pretrained(model_name)
+    return pipeline("text-classification", model=model, tokenizer=tokenizer)
+@st.cache_resource
+def load_qa_model():
+    return pipeline("question-answering", model="deepset/roberta-base-squad2")
+# Preprocessing function (same as in Section 01)
+def preprocess_text(text):
+    # Lowercase
+    text = text.lower()
+    # Remove URLs
+    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
+    # Remove special characters and numbers
+    text = re.sub(r'[^a-zA-Z\s]', '', text)
+    # Tokenize
+    tokens = word_tokenize(text)
+    # Remove stopwords
+    stop_words = set(stopwords.words('english'))
+    tokens = [token for token in tokens if token not in stop_words]
+    # Lemmatization
+    tokens = [lemmatizer.lemmatize(token) for token in tokens]
+    # Join tokens back to string
+    return ' '.join(tokens)
+# Function to generate word cloud
+def generate_wordcloud(text, title=None):
+    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
+    plt.figure(figsize=(10, 5))
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis("off")
+    plt.title(title, fontsize=20)
+    st.pyplot(plt)
+# Set page config
+st.set_page_config(
+    page_title="News Analysis Dashboard",
+    page_icon="📰",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS
+st.markdown("""
+    <style>
+    .main {
+        background-color: #f5f5f5;
+    }
+    .stButton>button {
+        background-color: #4CAF50;
+        color: white;
+    }
+    .stDownloadButton>button {
+        background-color: #2196F3;
+        color: white;
+    }
+    .stTextInput>div>div>input {
+        background-color: #ffffff;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+# App title and description
+st.title("📰 Daily Mirror News Analyzer")
+st.markdown("""
+    Analyze news excerpts with our powerful AI tools:
+    - Classify news articles into categories
+    - Get answers to your questions about the news content
+    - Visualize key themes
+    """)
+# Create tabs for different functionalities
+tab1, tab2, tab3 = st.tabs(["📋 News Classification", "❓ Q&A Pipeline", "✨ Advanced Features"])
+with tab1:
+    st.header("News Classification Pipeline")
+    st.write("Upload a CSV file containing news excerpts to classify them into categories.")
+    # File uploader
+    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    if uploaded_file is not None:
+        # Read CSV file
+        df = pd.read_csv(uploaded_file)
+        # Check if 'excerpt' column exists
+        if 'excerpt' not in df.columns:
+            st.error("The CSV file must contain an 'excerpt' column with news content.")
+        else:
+            # Show preview
+            st.subheader("File Preview")
+            st.write(df.head())
+            # Classify button
+            if st.button("Classify News Excerpts"):
+                with st.spinner("Classifying news excerpts..."):
+                    # Load classification model
+                    classifier = load_classification_model()
+                    # Preprocess and classify
+                    df['preprocessed_text'] = df['excerpt'].apply(preprocess_text)
+                    predictions = classifier(df['preprocessed_text'].tolist())
+                    # Add predictions to dataframe
+                    df['class'] = [pred['label'] for pred in predictions]
+                    df['confidence'] = [pred['score'] for pred in predictions]
+                    # Show results
+                    st.subheader("Classification Results")
+                    st.write(df)
+                    # Show distribution
+                    st.subheader("Class Distribution")
+                    class_dist = df['class'].value_counts()
+                    st.bar_chart(class_dist)
+                    # Generate word cloud for each class
+                    st.subheader("Word Clouds by Category")
+                    classes = df['class'].unique()
+                    cols = st.columns(len(classes))
+                    for i, class_name in enumerate(classes):
+                        with cols[i]:
+                            st.markdown(f"**{class_name}**")
+                            class_text = ' '.join(df[df['class'] == class_name]['excerpt'])
+                            generate_wordcloud(class_text)
+                    # Download button
+                    st.subheader("Download Results")
+                    csv = df.to_csv(index=False).encode('utf-8')
+                    st.download_button(
+                        label="Download output.csv",
+                        data=csv,
+                        file_name='output.csv',
+                        mime='text/csv'
+                    )
+with tab2:
+    st.header("Question Answering Pipeline")
+    st.write("Ask questions about news content and get answers from our AI model.")
+    # Option to upload file or enter text manually
+    input_option = st.radio("Choose input method:", ("Upload CSV", "Enter Text Manually"))
+    context = ""
+    if input_option == "Upload CSV":
+        qa_file = st.file_uploader("Upload news content (CSV)", type="csv")
+        if qa_file is not None:
+            qa_df = pd.read_csv(qa_file)
+            if 'excerpt' not in qa_df.columns:
+                st.error("CSV must contain an 'excerpt' column")
+            else:
+                context = ' '.join(qa_df['excerpt'].tolist())
+                st.write(f"Loaded {len(qa_df)} news excerpts")
+    else:
+        context = st.text_area("Paste news content here:", height=200)
+    question = st.text_input("Enter your question:")
+    if st.button("Get Answer") and context and question:
+        with st.spinner("Searching for answers..."):
+            qa_pipeline = load_qa_model()
+            result = qa_pipeline(question=question, context=context)
+            st.subheader("Answer")
+            st.success(result['answer'])
+            st.subheader("Details")
+            st.write(f"Confidence: {result['score']:.2f}")
+            st.write(f"Context: {result['context']}")
+with tab3:
+    st.header("Advanced Features")
+    st.write("Explore additional functionalities to enhance your news analysis.")
+    # Sentiment Analysis
+    st.subheader("📊 Sentiment Analysis")
+    sentiment_text = st.text_area("Enter text for sentiment analysis:", height=100)
+    if st.button("Analyze Sentiment"):
+        with st.spinner("Analyzing sentiment..."):
+            sentiment_pipeline = pipeline("sentiment-analysis")
+            result = sentiment_pipeline(sentiment_text)[0]
+            st.write(f"Label: {result['label']}")
+            st.write(f"Confidence: {result['score']:.2f}")
+            if result['label'] == 'POSITIVE':
+                st.success("This text appears positive!")
+            else:
+                st.warning("This text appears negative.")
+    # Named Entity Recognition
+    st.subheader("🏷️ Named Entity Recognition")
+    ner_text = st.text_area("Enter text for entity recognition:", height=100)
+    if st.button("Extract Entities"):
+        with st.spinner("Identifying entities..."):
+            ner_pipeline = pipeline("ner", grouped_entities=True)
+            results = ner_pipeline(ner_text)
+            entities = []
+            for entity in results:
+                entities.append({
+                    "Entity": entity['entity_group'],
+                    "Word": entity['word'],
+                    "Score": entity['score']
+                })
+            st.table(pd.DataFrame(entities))
+    # Text Summarization
+    st.subheader("✍️ Text Summarization")
+    summary_text = st.text_area("Enter text to summarize:", height=150)
+    if st.button("Generate Summary"):
+        with st.spinner("Generating summary..."):
+            summarizer = pipeline("summarization")
+            summary = summarizer(summary_text, max_length=130, min_length=30)
+            st.write(summary[0]['summary_text'])
+# Sidebar with additional info
+with st.sidebar:
+    st.image("https://via.placeholder.com/150x50?text=Daily+Mirror", width=150)
+    st.title("About")
+    st.write("""
+        This app helps analyze news content using AI-powered tools:
+        - Classify news into categories
+        - Answer questions about news content
+        - Perform advanced text analysis
+        """)
+    st.title("Instructions")
+    st.write("""
+        1. Upload a CSV file with 'excerpt' column
+        2. Click classify to categorize news
+        3. Download results as CSV
+        4. Use Q&A tab to ask questions
+        """)
+    st.title("Model Information")
+    st.write("""
+        - Classification: Fine-tuned DistilBERT
+        - Q&A: RoBERTa-base
+        - Sentiment: DistilBERT-base
+        """)
+    st.markdown("[View model on Hugging Face](https://huggingface.co/your-username/daily-mirror-news-classifier)")
+# Footer
+st.markdown("---")
+st.markdown("© 2023 Daily Mirror News Analyzer | Powered by Hugging Face Transformers")