achmaddhani commited on
Commit
eafd889
·
1 Parent(s): 6c55a40

Update eda.py

Browse files
Files changed (1) hide show
  1. eda.py +30 -3
eda.py CHANGED
@@ -8,15 +8,30 @@ def run():
8
  Function for EDA page
9
  '''
10
  st.title('Exploration Data Analysis Section')
11
-
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # ============================= Simple Analysis ========================
13
 
14
  eda=pd.read_csv('eda.csv')
 
15
  # basic summary analysis
16
  emotion_counts = eda['Emotion'].value_counts()
17
 
18
  eda['Comment Length'] = eda['Comment'].apply(len)
19
  eda['Word Count'] = eda['Comment'].apply(lambda x: len(x.split()))
 
20
  # emotion distribution
21
  fig_emotions = px.bar(emotion_counts,
22
  x=emotion_counts.index,
@@ -24,7 +39,7 @@ def run():
24
  labels={'x': 'Emotion', 'y': 'Count'},
25
  title='Distribution of Emotions')
26
  fig_emotions.update_traces(marker_line_width=1, marker_line_color='black')
27
- fig_emotions.update_layout(xaxis_title='Emotions', yaxis_title='Count', width=800)
28
 
29
  # comment distribution
30
  fig_comment_length = px.histogram(eda,
@@ -42,9 +57,21 @@ def run():
42
  marginal='box',
43
  title='Distribution of Word Count')
44
  fig_word_count.update_traces(marker_line_width=1, marker_line_color='black')
45
- fig_word_count.update_layout(xaxis_title='Word Count', yaxis_title='Count', width=600)
46
 
47
  # Display the figures in Streamlit
48
  st.plotly_chart(fig_emotions)
49
  st.plotly_chart(fig_comment_length)
50
  st.plotly_chart(fig_word_count)
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  Function for EDA page
9
  '''
10
  st.title('Exploration Data Analysis Section')
11
+ # ============================= Showing Data ==========================
12
+ df = pd.read_csv('Emotion_classify_Data.csv')
13
+ horizontal_radio_css =
14
+ """
15
+ <style>
16
+ div.row-widget.stRadio > div{flex-direction:row;}
17
+ </style>
18
+ """
19
+ st.markdown(horizontal_radio_css, unsafe_allow_html=True)
20
+ data_show = st.radio("**Viewing Options**", ['Top 10 Entries', 'Bottom 10 Entries'])
21
+ if image_show == 'Top 10 Entries':
22
+ st.table(df.head(10))
23
+ else:
24
+ st.table(df.tail(10))
25
  # ============================= Simple Analysis ========================
26
 
27
  eda=pd.read_csv('eda.csv')
28
+
29
  # basic summary analysis
30
  emotion_counts = eda['Emotion'].value_counts()
31
 
32
  eda['Comment Length'] = eda['Comment'].apply(len)
33
  eda['Word Count'] = eda['Comment'].apply(lambda x: len(x.split()))
34
+
35
  # emotion distribution
36
  fig_emotions = px.bar(emotion_counts,
37
  x=emotion_counts.index,
 
39
  labels={'x': 'Emotion', 'y': 'Count'},
40
  title='Distribution of Emotions')
41
  fig_emotions.update_traces(marker_line_width=1, marker_line_color='black')
42
+ fig_emotions.update_layout(xaxis_title='Emotions', yaxis_title='Count', width=700)
43
 
44
  # comment distribution
45
  fig_comment_length = px.histogram(eda,
 
57
  marginal='box',
58
  title='Distribution of Word Count')
59
  fig_word_count.update_traces(marker_line_width=1, marker_line_color='black')
60
+ fig_word_count.update_layout(xaxis_title='Word Count', yaxis_title='Count', width=700)
61
 
62
  # Display the figures in Streamlit
63
  st.plotly_chart(fig_emotions)
64
  st.plotly_chart(fig_comment_length)
65
  st.plotly_chart(fig_word_count)
66
+ with st.expander('Explanation'):
67
+ st.caption(
68
+ '''
69
+ The visualization above shows:
70
+ - The dataset has a balance target class which are `anger, joy, and fear`
71
+ - The distribution of comment length skewed to the right with the majority of the data is within `30 to 130` comment length
72
+ - The outliers are the comment lengths that is above `244`
73
+ - The same goes to the word count, the distribution is skewed to the right with the majority of the data is within the range of `5 to 30` word count
74
+ - This gives insight about how expressive the people are in the dataset. Most people quite concise but there are a few who's very expressive and open about what they feel.
75
+ '''
76
+ )
77
+ # ============================= Word Cloud ==================================