Spaces:

stanlys96
/

NLP-Sentiment-Analysis

Sleeping

File size: 3,019 Bytes

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from wordcloud import WordCloud

def app():
  df_original = pd.read_csv("data.csv", delimiter=";")
  df = df_original.copy()
  df.drop_duplicates(inplace=True)

  temp_a = df.copy()
  temp_a['text_length'] = temp_a['text'].apply(len)
  st.header('Exploratory Data Analysis', divider='rainbow')
  eda_list = ["Text Length Distribution", "Sentiment Distribution", "Word Clouds", "Stopwords Boxplot Distributions"]
  val = st.sidebar.radio("Choose plot to show", eda_list)
  stop_words = set(stopwords.words('english'))
  def plot_wordcloud(sentiment):
    text = ' '.join(df[df['feeling'] == sentiment]['text'])
    wordcloud = WordCloud(stopwords=stop_words, background_color='white').generate(text)
    plt.figure(figsize=(10, 6))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f"Word Cloud for {sentiment} Sentiment")
    st.pyplot(plt)
  if val == "Text Length Distribution":
    # Plot distribution
    st.header('Text Length Distribution')
    plt.figure(figsize=(10, 6))
    plt.hist(temp_a['text_length'], bins=30, color='skyblue')
    plt.title('Text Length Distribution')
    plt.xlabel('Text Length (characters)')
    plt.ylabel('Frequency')
    st.pyplot(plt)
    st.write("Insight: text length distribution of our data appears to be right-skewed.")
  elif val == "Sentiment Distribution":
    sentiment_counts = df['feeling'].value_counts()
    st.header('Sentiment Distribution')
    # Plot pie chart
    plt.figure(figsize=(8, 6))
    plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette('viridis', len(sentiment_counts)))
    plt.title('Sentiment Distribution')
    plt.axis('equal')
    st.pyplot(plt)
    st.write("Insight: joy and sadness dominate the sentiment dataset, with joy taking the first place in 33.8%")

    plt.figure(figsize=(8, 6))
    sns.countplot(data=df, x='feeling', palette='viridis')
    plt.title('Sentiment Distribution')
    plt.xlabel('Sentiment')
    plt.ylabel('Count')
    st.pyplot(plt)
    st.write("Insight: surprise sentiment has the lowest value of around 900 data")
  elif val == "Word Clouds":
    st.header('Word Clouds')
    plot_wordcloud('joy')
    plot_wordcloud('sadness')
    plot_wordcloud('anger')
    plot_wordcloud('love')
    plot_wordcloud('surprise')
    plot_wordcloud('fear')
    pass
  elif val == "Stopwords Boxplot Distributions":
    temp_b = df.copy()
    temp_b['stopword_count'] = temp_b['text'].apply(lambda x: sum(1 for word in x.split() if word.lower() in stop_words))
    st.header('Stopword Count Boxplot')
    # Plot stopword count distribution by sentiment
    sns.boxplot(data=temp_b, x='feeling', y='stopword_count')
    plt.title('Stopword Count by Sentiment')
    st.pyplot(plt)
    st.write("Insight: each sentiment has quite a number of stop words outliers")