jmansfield89 commited on
Commit
c025aa9
·
1 Parent(s): af6e65f

Update app.py

Browse files

Update to cleaner code with functions for most all processes

Files changed (1) hide show
  1. app.py +77 -24
app.py CHANGED
@@ -4,39 +4,92 @@ import pandas as pd
4
  import sys
5
  from streamlit import cli as stcli
6
  import plotly.express as px
7
- import numpy as np
8
 
9
 
10
- # DATA IMPORT
11
- # Import data from "df_redacted.csv" as a dataframe
12
- df_redacted = pd.read_csv('df_redacted.csv')
13
-
14
- # DATA MANIPULATION
15
  # Create variable for Tweet being analyzed in this app
16
- tweet_url = "https://twitter.com/Meta/status/1453795115701440524"
17
 
18
- # Create new dataframe, reset the index, and rename columns
19
- sentiment_counts = pd.DataFrame(df_redacted['sentiment_score'].value_counts(dropna=False))
20
- sentiment_counts = sentiment_counts.reset_index()
21
- sentiment_counts.columns = ['Sentiment', 'Count']
22
 
23
- # Find sentiment category with the highest count
24
- sentiment = sentiment_counts.loc[sentiment_counts['Count'].idxmax(), 'Sentiment']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- # DISPLAY DATA
27
- # Display the count for each sentiment category and overall sentiment of the tweet replies
28
- def main():
29
- # Header display section of app
30
- st.markdown("**Objective:** Understand public sentiment of a Tweet by analyzing the sentiment of each reply.")
31
- st.markdown("**Analysis:** This app runs sentiment analysis on the replies to a Facebook Tweet announcing their "
32
- "rebranding to Meta on 10/28/2021. Link to Tweet: {}".format(tweet_url))
33
- st.markdown("**Results:** Most frequent sentiment category for this Tweet's replies: **{}**".format(sentiment))
34
-
35
- # Display histogram of count for each sentiment category
36
- fig = px.bar(sentiment_counts, x='Sentiment', y='Count', title='Tweet Replies Sentiment Counts by Category')
37
  st.plotly_chart(fig, use_container_width=True)
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  if __name__ == '__main__':
41
  if st._is_running_with_streamlit:
42
  main()
 
4
  import sys
5
  from streamlit import cli as stcli
6
  import plotly.express as px
 
7
 
8
 
9
+ # GLOBAL VARIABLES
 
 
 
 
10
  # Create variable for Tweet being analyzed in this app
11
+ tweet_url = 'https://twitter.com/Meta/status/1453795115701440524'
12
 
 
 
 
 
13
 
14
+ # FUNCTIONS
15
+ def data_import():
16
+ """
17
+ Imports data from "df_redacted.csv" as a dataframe.
18
+ """
19
+ df_redacted = pd.read_csv('df_redacted.csv')
20
+ return df_redacted
21
+
22
+
23
+ def data_manipulation(df_redacted):
24
+ """
25
+ Manipulates the data imported from the CSV file to prepare for bar chart.
26
+ """
27
+
28
+ # Create new dataframe, reset the index, and rename columns
29
+ sentiment_counts = pd.DataFrame(df_redacted['sentiment_score'].value_counts(dropna=False))
30
+ sentiment_counts = sentiment_counts.reset_index()
31
+ sentiment_counts.columns = ['Sentiment', 'Count']
32
+
33
+ # Find sentiment category with the highest count
34
+ sentiment = sentiment_counts.loc[sentiment_counts['Count'].idxmax(), 'Sentiment']
35
+
36
+ return sentiment_counts, sentiment
37
+
38
+
39
+ def display_header(tweet_url, sentiment):
40
+ """
41
+ Displays the header section of the app.
42
+ """
43
+ st.markdown('**Objective:** Understand public sentiment of a Tweet by analyzing the sentiment of each reply.')
44
+ st.markdown('**Analysis:** This app runs sentiment analysis on 10,948 replies to a Facebook Tweet announcing '
45
+ 'their rebranding to Meta on 10/28/2021. Link to Tweet: {}'.format(tweet_url))
46
+ st.markdown('**Results:** Most frequent sentiment category for the replies to this Tweet: **{}**'.format(sentiment))
47
+
48
+
49
+ def display_chart(sentiment_counts):
50
+ """
51
+ Displays the chosen chart for the data.
52
+ """
53
+ # Display count for each sentiment category
54
+ fig = px.bar(sentiment_counts,
55
+ x='Sentiment',
56
+ y='Count',
57
+ title='Tweet Replies Sentiment Counts by Category')
58
 
 
 
 
 
 
 
 
 
 
 
 
59
  st.plotly_chart(fig, use_container_width=True)
60
 
61
 
62
+ def display_footer():
63
+ """
64
+ Displays the footer section of the app.
65
+ """
66
+ st.markdown('**Notes:** ')
67
+ st.markdown('- The VADER model was used to analyze the sentiment of each reply: '
68
+ 'https://github.com/cjhutto/vaderSentiment')
69
+ st.markdown('- Due to Twitter developer policies, I am not able to share the data set of downloaded Tweet replies '
70
+ 'so my DATA EXTRACTION and DATA CLEANSING steps are not shown at this time.')
71
+ st.markdown('**Plans for Version 2.0:**')
72
+ st.markdown('- Formulate method for cleaning Tweet replies, such as removing those that are from bots or are spam.')
73
+ st.markdown('- Analyze the sentiment of replies using the BERTweet model, which would be more appropriate for this '
74
+ 'project since it was trained on a corpus of Tweets: https://github.com/VinAIResearch/BERTweet')
75
+
76
+
77
+ def main():
78
+ """
79
+ Main function for the app which calls all other functions to display the app.
80
+ """
81
+ # DATA IMPORT
82
+ df_redacted = data_import()
83
+
84
+ # DATA MANIPULATION
85
+ sentiment_counts, sentiment = data_manipulation(df_redacted)
86
+
87
+ # DISPLAY DATA
88
+ display_header(tweet_url, sentiment)
89
+ display_chart(sentiment_counts)
90
+ display_footer()
91
+
92
+
93
  if __name__ == '__main__':
94
  if st._is_running_with_streamlit:
95
  main()