Spaces:
Sleeping
Sleeping
Commit ·
eafd889
1
Parent(s): 6c55a40
Update eda.py
Browse files
eda.py
CHANGED
|
@@ -8,15 +8,30 @@ def run():
|
|
| 8 |
Function for EDA page
|
| 9 |
'''
|
| 10 |
st.title('Exploration Data Analysis Section')
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
# ============================= Simple Analysis ========================
|
| 13 |
|
| 14 |
eda=pd.read_csv('eda.csv')
|
|
|
|
| 15 |
# basic summary analysis
|
| 16 |
emotion_counts = eda['Emotion'].value_counts()
|
| 17 |
|
| 18 |
eda['Comment Length'] = eda['Comment'].apply(len)
|
| 19 |
eda['Word Count'] = eda['Comment'].apply(lambda x: len(x.split()))
|
|
|
|
| 20 |
# emotion distribution
|
| 21 |
fig_emotions = px.bar(emotion_counts,
|
| 22 |
x=emotion_counts.index,
|
|
@@ -24,7 +39,7 @@ def run():
|
|
| 24 |
labels={'x': 'Emotion', 'y': 'Count'},
|
| 25 |
title='Distribution of Emotions')
|
| 26 |
fig_emotions.update_traces(marker_line_width=1, marker_line_color='black')
|
| 27 |
-
fig_emotions.update_layout(xaxis_title='Emotions', yaxis_title='Count', width=
|
| 28 |
|
| 29 |
# comment distribution
|
| 30 |
fig_comment_length = px.histogram(eda,
|
|
@@ -42,9 +57,21 @@ def run():
|
|
| 42 |
marginal='box',
|
| 43 |
title='Distribution of Word Count')
|
| 44 |
fig_word_count.update_traces(marker_line_width=1, marker_line_color='black')
|
| 45 |
-
fig_word_count.update_layout(xaxis_title='Word Count', yaxis_title='Count', width=
|
| 46 |
|
| 47 |
# Display the figures in Streamlit
|
| 48 |
st.plotly_chart(fig_emotions)
|
| 49 |
st.plotly_chart(fig_comment_length)
|
| 50 |
st.plotly_chart(fig_word_count)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
Function for EDA page
|
| 9 |
'''
|
| 10 |
st.title('Exploration Data Analysis Section')
|
| 11 |
+
# ============================= Showing Data ==========================
|
| 12 |
+
df = pd.read_csv('Emotion_classify_Data.csv')
|
| 13 |
+
horizontal_radio_css =
|
| 14 |
+
"""
|
| 15 |
+
<style>
|
| 16 |
+
div.row-widget.stRadio > div{flex-direction:row;}
|
| 17 |
+
</style>
|
| 18 |
+
"""
|
| 19 |
+
st.markdown(horizontal_radio_css, unsafe_allow_html=True)
|
| 20 |
+
data_show = st.radio("**Viewing Options**", ['Top 10 Entries', 'Bottom 10 Entries'])
|
| 21 |
+
if image_show == 'Top 10 Entries':
|
| 22 |
+
st.table(df.head(10))
|
| 23 |
+
else:
|
| 24 |
+
st.table(df.tail(10))
|
| 25 |
# ============================= Simple Analysis ========================
|
| 26 |
|
| 27 |
eda=pd.read_csv('eda.csv')
|
| 28 |
+
|
| 29 |
# basic summary analysis
|
| 30 |
emotion_counts = eda['Emotion'].value_counts()
|
| 31 |
|
| 32 |
eda['Comment Length'] = eda['Comment'].apply(len)
|
| 33 |
eda['Word Count'] = eda['Comment'].apply(lambda x: len(x.split()))
|
| 34 |
+
|
| 35 |
# emotion distribution
|
| 36 |
fig_emotions = px.bar(emotion_counts,
|
| 37 |
x=emotion_counts.index,
|
|
|
|
| 39 |
labels={'x': 'Emotion', 'y': 'Count'},
|
| 40 |
title='Distribution of Emotions')
|
| 41 |
fig_emotions.update_traces(marker_line_width=1, marker_line_color='black')
|
| 42 |
+
fig_emotions.update_layout(xaxis_title='Emotions', yaxis_title='Count', width=700)
|
| 43 |
|
| 44 |
# comment distribution
|
| 45 |
fig_comment_length = px.histogram(eda,
|
|
|
|
| 57 |
marginal='box',
|
| 58 |
title='Distribution of Word Count')
|
| 59 |
fig_word_count.update_traces(marker_line_width=1, marker_line_color='black')
|
| 60 |
+
fig_word_count.update_layout(xaxis_title='Word Count', yaxis_title='Count', width=700)
|
| 61 |
|
| 62 |
# Display the figures in Streamlit
|
| 63 |
st.plotly_chart(fig_emotions)
|
| 64 |
st.plotly_chart(fig_comment_length)
|
| 65 |
st.plotly_chart(fig_word_count)
|
| 66 |
+
with st.expander('Explanation'):
|
| 67 |
+
st.caption(
|
| 68 |
+
'''
|
| 69 |
+
The visualization above shows:
|
| 70 |
+
- The dataset has a balance target class which are `anger, joy, and fear`
|
| 71 |
+
- The distribution of comment length skewed to the right with the majority of the data is within `30 to 130` comment length
|
| 72 |
+
- The outliers are the comment lengths that is above `244`
|
| 73 |
+
- The same goes to the word count, the distribution is skewed to the right with the majority of the data is within the range of `5 to 30` word count
|
| 74 |
+
- This gives insight about how expressive the people are in the dataset. Most people quite concise but there are a few who's very expressive and open about what they feel.
|
| 75 |
+
'''
|
| 76 |
+
)
|
| 77 |
+
# ============================= Word Cloud ==================================
|