Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import nltk | |
| nltk.download('stopwords') | |
| from nltk.corpus import stopwords | |
| from wordcloud import WordCloud | |
| def app(): | |
| df_original = pd.read_csv("data.csv", delimiter=";") | |
| df = df_original.copy() | |
| df.drop_duplicates(inplace=True) | |
| temp_a = df.copy() | |
| temp_a['text_length'] = temp_a['text'].apply(len) | |
| st.header('Exploratory Data Analysis', divider='rainbow') | |
| eda_list = ["Text Length Distribution", "Sentiment Distribution", "Word Clouds", "Stopwords Boxplot Distributions"] | |
| val = st.sidebar.radio("Choose plot to show", eda_list) | |
| stop_words = set(stopwords.words('english')) | |
| def plot_wordcloud(sentiment): | |
| text = ' '.join(df[df['feeling'] == sentiment]['text']) | |
| wordcloud = WordCloud(stopwords=stop_words, background_color='white').generate(text) | |
| plt.figure(figsize=(10, 6)) | |
| plt.imshow(wordcloud, interpolation='bilinear') | |
| plt.axis('off') | |
| plt.title(f"Word Cloud for {sentiment} Sentiment") | |
| st.pyplot(plt) | |
| if val == "Text Length Distribution": | |
| # Plot distribution | |
| st.header('Text Length Distribution') | |
| plt.figure(figsize=(10, 6)) | |
| plt.hist(temp_a['text_length'], bins=30, color='skyblue') | |
| plt.title('Text Length Distribution') | |
| plt.xlabel('Text Length (characters)') | |
| plt.ylabel('Frequency') | |
| st.pyplot(plt) | |
| st.write("Insight: text length distribution of our data appears to be right-skewed.") | |
| elif val == "Sentiment Distribution": | |
| sentiment_counts = df['feeling'].value_counts() | |
| st.header('Sentiment Distribution') | |
| # Plot pie chart | |
| plt.figure(figsize=(8, 6)) | |
| plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette('viridis', len(sentiment_counts))) | |
| plt.title('Sentiment Distribution') | |
| plt.axis('equal') | |
| st.pyplot(plt) | |
| st.write("Insight: joy and sadness dominate the sentiment dataset, with joy taking the first place in 33.8%") | |
| plt.figure(figsize=(8, 6)) | |
| sns.countplot(data=df, x='feeling', palette='viridis') | |
| plt.title('Sentiment Distribution') | |
| plt.xlabel('Sentiment') | |
| plt.ylabel('Count') | |
| st.pyplot(plt) | |
| st.write("Insight: surprise sentiment has the lowest value of around 900 data") | |
| elif val == "Word Clouds": | |
| st.header('Word Clouds') | |
| plot_wordcloud('joy') | |
| plot_wordcloud('sadness') | |
| plot_wordcloud('anger') | |
| plot_wordcloud('love') | |
| plot_wordcloud('surprise') | |
| plot_wordcloud('fear') | |
| pass | |
| elif val == "Stopwords Boxplot Distributions": | |
| temp_b = df.copy() | |
| temp_b['stopword_count'] = temp_b['text'].apply(lambda x: sum(1 for word in x.split() if word.lower() in stop_words)) | |
| st.header('Stopword Count Boxplot') | |
| # Plot stopword count distribution by sentiment | |
| sns.boxplot(data=temp_b, x='feeling', y='stopword_count') | |
| plt.title('Stopword Count by Sentiment') | |
| st.pyplot(plt) | |
| st.write("Insight: each sentiment has quite a number of stop words outliers") |