File size: 4,354 Bytes
44f9ee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import streamlit as st
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import matplotlib.pyplot as plt
from datasets import Dataset
import asyncio

# Handle asyncio loop issues
try:
    asyncio.get_running_loop()
except RuntimeError:  # No running event loop
    asyncio.run(asyncio.sleep(0))

# Load pre-trained model and tokenizer
MODEL_PATH = "distilbert-base-uncased-finetuned-sst-2-english"  # Default Hugging Face sentiment model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

# Define a sentiment analysis function
def sentiment_analysis(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment = torch.argmax(probabilities, dim=1).item()
    confidence = torch.max(probabilities, dim=1).values.item()
    return ("POSITIVE" if sentiment == 1 else "NEGATIVE", confidence)

# Streamlit app
st.title("Twitter Sentiment Analysis App")
st.write("Analyze sentiments in Twitter-like text data using a pre-trained model.")

# Tabs for navigation
tab1, tab2 = st.tabs(["Analyze Sentiments", "Sample Dataset"])

with tab1:
    st.header("Analyze Sentiments")
    st.write("Upload a dataset to analyze sentiments of text data.")

    # File uploader
    data_file = st.file_uploader("Upload your CSV file", type=["csv"])

    if data_file is not None:
        # Read the dataset
        data = pd.read_csv(data_file)

        # Display the dataset
        st.subheader("Dataset Preview")
        st.write(data.head())

        # Check for text column selection
        text_column = st.selectbox("Select the column containing text for analysis:", data.columns)

        if st.button("Analyze Sentiment"):
            # Clean the text column: Remove NaN values and ensure text input is string type
            data[text_column] = data[text_column].fillna("").astype(str)

            # Perform sentiment analysis
            st.write("Analyzing sentiments...")
            results = data[text_column].apply(lambda x: sentiment_analysis(x))
            data['Sentiment'] = results.apply(lambda x: x[0])
            data['Confidence'] = results.apply(lambda x: x[1])

            # Display results
            st.subheader("Analysis Results")
            st.write(data[[text_column, 'Sentiment', 'Confidence']])

            # Plot sentiment distribution
            st.subheader("Sentiment Distribution")
            sentiment_counts = data['Sentiment'].value_counts()
            fig, ax = plt.subplots()
            sentiment_counts.plot(kind='bar', ax=ax, color=['green', 'blue', 'red'])
            ax.set_title("Sentiment Distribution")
            ax.set_xlabel("Sentiment")
            ax.set_ylabel("Count")
            st.pyplot(fig)

        # Option to download results
        st.subheader("Download Results")
        csv = data.to_csv(index=False)
        st.download_button(
            label="Download Sentiment Analysis Results",
            data=csv,
            file_name="sentiment_analysis_results.csv",
            mime="text/csv",
        )
    else:
        st.write("Please upload a dataset to proceed.")

with tab2:
    st.header("Sample Dataset")
    st.write("Download a sample dataset to try out the app.")

    # Provide a sample dataset for download
    sample_data = pd.DataFrame({
        "Tweet": [
            "I love this product! It's amazing.",
            "This is the worst service I have ever received.",
            "I'm not sure how I feel about this.",
            "Absolutely fantastic experience!",
            "Terrible. Would not recommend."
        ]
    })
    st.write(sample_data)

    sample_csv = sample_data.to_csv(index=False)
    st.download_button(
        label="Download Sample Dataset",
        data=sample_csv,
        file_name="sample_twitter_dataset.csv",
        mime="text/csv",
    )

    st.write("Follow these steps:")
    st.markdown("""
    1. Go to the **Analyze Sentiments** tab.
    2. Upload the sample dataset or your own dataset in CSV format.
    3. Select the column containing the text to analyze.
    4. Click **Analyze Sentiment** to view results and download them.
    """)