Spaces:

JEPHONETORRE
/

KaggleSentimentAnalyzer

Sleeping

File size: 4,354 Bytes

44f9ee7

import streamlit as st
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import matplotlib.pyplot as plt
from datasets import Dataset
import asyncio

# Handle asyncio loop issues
try:
    asyncio.get_running_loop()
except RuntimeError:  # No running event loop
    asyncio.run(asyncio.sleep(0))

# Load pre-trained model and tokenizer
MODEL_PATH = "distilbert-base-uncased-finetuned-sst-2-english"  # Default Hugging Face sentiment model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

# Define a sentiment analysis function
def sentiment_analysis(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment = torch.argmax(probabilities, dim=1).item()
    confidence = torch.max(probabilities, dim=1).values.item()
    return ("POSITIVE" if sentiment == 1 else "NEGATIVE", confidence)

# Streamlit app
st.title("Twitter Sentiment Analysis App")
st.write("Analyze sentiments in Twitter-like text data using a pre-trained model.")

# Tabs for navigation
tab1, tab2 = st.tabs(["Analyze Sentiments", "Sample Dataset"])

with tab1:
    st.header("Analyze Sentiments")
    st.write("Upload a dataset to analyze sentiments of text data.")

    # File uploader
    data_file = st.file_uploader("Upload your CSV file", type=["csv"])

    if data_file is not None:
        # Read the dataset
        data = pd.read_csv(data_file)

        # Display the dataset
        st.subheader("Dataset Preview")
        st.write(data.head())

        # Check for text column selection
        text_column = st.selectbox("Select the column containing text for analysis:", data.columns)

        if st.button("Analyze Sentiment"):
            # Clean the text column: Remove NaN values and ensure text input is string type
            data[text_column] = data[text_column].fillna("").astype(str)

            # Perform sentiment analysis
            st.write("Analyzing sentiments...")
            results = data[text_column].apply(lambda x: sentiment_analysis(x))
            data['Sentiment'] = results.apply(lambda x: x[0])
            data['Confidence'] = results.apply(lambda x: x[1])

            # Display results
            st.subheader("Analysis Results")
            st.write(data[[text_column, 'Sentiment', 'Confidence']])

            # Plot sentiment distribution
            st.subheader("Sentiment Distribution")
            sentiment_counts = data['Sentiment'].value_counts()
            fig, ax = plt.subplots()
            sentiment_counts.plot(kind='bar', ax=ax, color=['green', 'blue', 'red'])
            ax.set_title("Sentiment Distribution")
            ax.set_xlabel("Sentiment")
            ax.set_ylabel("Count")
            st.pyplot(fig)

        # Option to download results
        st.subheader("Download Results")
        csv = data.to_csv(index=False)
        st.download_button(
            label="Download Sentiment Analysis Results",
            data=csv,
            file_name="sentiment_analysis_results.csv",
            mime="text/csv",
        )
    else:
        st.write("Please upload a dataset to proceed.")

with tab2:
    st.header("Sample Dataset")
    st.write("Download a sample dataset to try out the app.")

    # Provide a sample dataset for download
    sample_data = pd.DataFrame({
        "Tweet": [
            "I love this product! It's amazing.",
            "This is the worst service I have ever received.",
            "I'm not sure how I feel about this.",
            "Absolutely fantastic experience!",
            "Terrible. Would not recommend."
        ]
    })
    st.write(sample_data)

    sample_csv = sample_data.to_csv(index=False)
    st.download_button(
        label="Download Sample Dataset",
        data=sample_csv,
        file_name="sample_twitter_dataset.csv",
        mime="text/csv",
    )

    st.write("Follow these steps:")
    st.markdown("""
    1. Go to the **Analyze Sentiments** tab.
    2. Upload the sample dataset or your own dataset in CSV format.
    3. Select the column containing the text to analyze.
    4. Click **Analyze Sentiment** to view results and download them.
    """)