Spaces:

darthPanda
/

Social_media_sentiment_tracker

Runtime error

App Files Files Community

darthPanda commited on Mar 11, 2023

Commit

d09b322

1 Parent(s): f746f70

first

Browse files

Files changed (4) hide show

app.py +138 -25
helper_functions.py +118 -65
requirements.txt +1 -0
static/yt_mask.png +0 -0

app.py CHANGED Viewed

@@ -10,36 +10,68 @@ import plotly.io as pio
 import plotly
 # Whenever the search button is clicked, the search_callback function is called
-def search_callback():
-    if twitter_agree:
-        if len(st.session_state.search_term) == 0:
             st.error("Please enter a search term")
             return
         try:
-            st.session_state.df = hf.get_tweets(st.session_state.search_term, st.session_state.num_tweets)
             st.session_state.df = hf.get_sentiment(st.session_state.df)
         except:
             st.error("Please enter a valid search term")
             return
 def twitter_form():
     with st.form(key="search_form"):
         st.subheader("Search Parameters")
-        st.text_input("Enter a User handle (like _@elonmusk_), Hashtag (like _#Bitcoin_) or Topic (like _climate change_)", key="search_term")
         st.slider("Number of tweets", min_value=100, max_value=500, key="num_tweets")
-        st.form_submit_button(label="Search", on_click=search_callback)
         st.markdown(
             "Note: it may take a while to load the results, especially with large number of tweets"
         )
 with st.sidebar:
     st.title("Social Media Sentiment Analyzer")
-    st.subheader("Choose your platform")
-    twitter_agree = st.checkbox('Twitter')
-    if twitter_agree:
         twitter_form()
     st.markdown(
     "<div style='position: fixed; bottom: 0;'>Created by Taaha Bajwa</div>",
@@ -91,6 +123,56 @@ if "df" in st.session_state:
             wordcloud = hf.plot_wordcloud(tweet_df, colormap=wc_color)
             st.pyplot(wordcloud)
     adjust_tab_font = """
     <style>
     button[data-baseweb="tab"] > div[data-testid="stMarkdownContainer"] > p {
@@ -101,19 +183,50 @@ if "df" in st.session_state:
     st.write(adjust_tab_font, unsafe_allow_html=True)
-    try:
-        tab1, tab2, tab3, tab4 = st.tabs(["All", "Positive 😊", "Negative ☹️", "Neutral 😐"])
-        with tab1:
-            tweet_df = st.session_state.df
-            make_dashboard(tweet_df, bar_color="#1F77B4", wc_color="Blues")
-        with tab2:
-            tweet_df = st.session_state.df.query("Sentiment == 'Positive'")
-            make_dashboard(tweet_df, bar_color="#54A24B", wc_color="Greens")
-        with tab3:
-            tweet_df = st.session_state.df.query("Sentiment == 'Negative'")
-            make_dashboard(tweet_df, bar_color="#FF7F0E", wc_color="Oranges")
-        with tab4:
-            tweet_df = st.session_state.df.query("Sentiment == 'Neutral'")
-            make_dashboard(tweet_df, bar_color="#1F77B4", wc_color="Blues")
-    except:
-        st.error("No plots to display.")

 import plotly
 # Whenever the search button is clicked, the search_callback function is called
+def search_callback_twitter():
+    if platform == "Twitter":
+        if len(st.session_state.search_term_twitter) == 0:
             st.error("Please enter a search term")
             return
         try:
+            st.session_state.df = hf.get_tweets(st.session_state.search_term_twitter, st.session_state.num_tweets)
             st.session_state.df = hf.get_sentiment(st.session_state.df)
         except:
             st.error("Please enter a valid search term")
             return
+def search_callback_youtube():
+    if platform == "Youtube":
+        if len(st.session_state.search_term_youtube) == 0:
+            st.error("Please enter a valid url")
+            return
+        try:
+            st.session_state.df = hf.get_youtube_comments(st.session_state.search_term_youtube, st.session_state.num_comments)
+            st.session_state.df = hf.get_sentiment_youtube(st.session_state.df)
+        except:
+            st.error("Please enter a valid url")
+            return
 def twitter_form():
     with st.form(key="search_form"):
         st.subheader("Search Parameters")
+        st.text_input("Enter a User handle (like _@elonmusk_), Hashtag (like _#Bitcoin_) or Topic (like _climate change_)", key="search_term_twitter")
         st.slider("Number of tweets", min_value=100, max_value=500, key="num_tweets")
+        st.form_submit_button(label="Search", on_click=search_callback_twitter)
         st.markdown(
             "Note: it may take a while to load the results, especially with large number of tweets"
         )
+def youtube_form():
+    with st.form(key="search_form"):
+        st.subheader("Search Parameters")
+        st.text_input("Enter a Video link to analyse comments", key="search_term_youtube")
+        st.slider("Number of Comments", min_value=100, max_value=500, key="num_comments")
+        st.form_submit_button(label="Search", on_click=search_callback_youtube)
+        st.markdown(
+            "Note: it may take a while to load the results, especially with large number of comments"
+        )
 with st.sidebar:
     st.title("Social Media Sentiment Analyzer")
+    #st.subheader("Choose your platform")
+    platform = st.radio(
+        "Choose your platform 👇",
+        ["Twitter", "Youtube"],
+        # key="visibility",
+        # label_visibility=st.session_state.visibility,
+        # disabled=st.session_state.disabled,
+        horizontal=True,
+    )
+    if platform == "Twitter":
         twitter_form()
+    if platform == "Youtube":
+        youtube_form()
     st.markdown(
     "<div style='position: fixed; bottom: 0;'>Created by Taaha Bajwa</div>",
             wordcloud = hf.plot_wordcloud(tweet_df, colormap=wc_color)
             st.pyplot(wordcloud)
+    def make_dashboard_youtube(tweet_df, bar_color, wc_color):
+        tweet_df = tweet_df.rename(columns={"Comment": "Tweet"})
+        # first row
+        col1, col2, col3 = st.columns([28, 34, 38])
+        with col1:
+            sentiment_plot = hf.plot_sentiment(tweet_df)
+            sentiment_plot.update_layout(height=350, title_x=0.5)
+            st.plotly_chart(sentiment_plot, theme=None, use_container_width=True)
+        with col2:
+            top_unigram = hf.get_top_n_gram(tweet_df, ngram_range=(1, 1), n=10)
+            unigram_plot = hf.plot_n_gram(
+                top_unigram, title="Top 10 Occuring Words", color=bar_color
+            )
+            unigram_plot.update_layout(height=350)
+            st.plotly_chart(unigram_plot, theme=None, use_container_width=True)
+        with col3:
+            top_bigram = hf.get_top_n_gram(tweet_df, ngram_range=(2, 2), n=10)
+            bigram_plot = hf.plot_n_gram(
+                top_bigram, title="Top 10 Occuring Bigrams", color=bar_color
+            )
+            bigram_plot.update_layout(height=350)
+            st.plotly_chart(bigram_plot, theme=None, use_container_width=True)
+        # second row
+        col1, col2 = st.columns([60, 40])
+        with col1:
+            def sentiment_color(sentiment):
+                if sentiment == "Positive":
+                    return "background-color: #54A24B; color: white"
+                elif sentiment == "Negative":
+                    return "background-color: #FF7F0E"
+                else:
+                    return "background-color: #1F77B4"
+            tweet_df_temp = tweet_df[["Sentiment", "Tweet"]]
+            tweet_df_temp = tweet_df_temp.rename(columns={"Tweet": "Comment"})
+            st.dataframe(
+                tweet_df_temp[["Sentiment", "Comment"]].style.applymap(
+                    sentiment_color, subset=["Sentiment"]
+                ),
+                height=350,
+            )
+        with col2:
+            wordcloud = hf.plot_wordcloud(tweet_df, colormap=wc_color, mask_url='static/yt_mask.png')
+            try:
+                st.pyplot(wordcloud)
+            except:
+                st.write("Wordcloud not available for this search term")
     adjust_tab_font = """
     <style>
     button[data-baseweb="tab"] > div[data-testid="stMarkdownContainer"] > p {
     st.write(adjust_tab_font, unsafe_allow_html=True)
+    if platform == "Twitter" and st.session_state.search_term_twitter != "":
+        try:
+            tab1, tab2, tab3, tab4 = st.tabs(["All", "Positive 😊", "Negative ☹️", "Neutral 😐"])
+            with tab1:
+                tweet_df = st.session_state.df
+                make_dashboard(tweet_df, bar_color="#1F77B4", wc_color="Blues")
+            with tab2:
+                tweet_df = st.session_state.df.query("Sentiment == 'Positive'")
+                make_dashboard(tweet_df, bar_color="#54A24B", wc_color="Greens")
+            with tab3:
+                tweet_df = st.session_state.df.query("Sentiment == 'Negative'")
+                make_dashboard(tweet_df, bar_color="#FF7F0E", wc_color="Oranges")
+            with tab4:
+                tweet_df = st.session_state.df.query("Sentiment == 'Neutral'")
+                make_dashboard(tweet_df, bar_color="#1F77B4", wc_color="Blues")
+        except:
+            st.error("No plots to display.")
+    elif platform == "Youtube" and st.session_state.search_term_youtube != "":
+        try:
+            tab1, tab2, tab3, tab4 = st.tabs(["All", "Positive 😊", "Negative ☹️", "Neutral 😐"])
+            with tab1:
+                tweet_df = st.session_state.df
+                if tweet_df.shape[0] > 0:
+                    make_dashboard_youtube(tweet_df, bar_color="#1F77B4", wc_color="Blues")
+                else:
+                    st.write("No comments found.")
+            with tab2:
+                tweet_df = st.session_state.df.query("Sentiment == 'Positive'")
+                if tweet_df.shape[0] > 0:
+                    make_dashboard_youtube(tweet_df, bar_color="#54A24B", wc_color="Greens")
+                else:
+                    st.write("No positive comments found.")
+            with tab3:
+                tweet_df = st.session_state.df.query("Sentiment == 'Negative'")
+                if tweet_df.shape[0] > 0:
+                    make_dashboard_youtube(tweet_df, bar_color="#FF7F0E", wc_color="Oranges")
+                else:
+                    st.write("No negative comments found.")
+            with tab4:
+                tweet_df = st.session_state.df.query("Sentiment == 'Neutral'")
+                if tweet_df.shape[0] > 0:
+                    make_dashboard_youtube(tweet_df, bar_color="#1F77B4", wc_color="Blues")
+                else:
+                    st.write("No neutral comments found.")
+        except:
+            st.error("No plots to display.")

helper_functions.py CHANGED Viewed

@@ -6,10 +6,15 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from transformers import pipeline
 import plotly.express as px
 import plotly.io as pio
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 from wordcloud import WordCloud
 from PIL import Image
 @st.cache(allow_output_mutation=True)
 def get_nltk():
@@ -108,6 +113,45 @@ def get_tweets(query, max_tweets):
     tweets_df.drop('Datetime', axis=1, inplace=True)
     return tweets_df
 def text_preprocessing(text):
     stopwords = set()
     with open("static/en_stopwords.txt", "r") as file:
@@ -127,7 +171,6 @@ def text_preprocessing(text):
         cleaned_text = re.sub(entity_pattern, " ", cleaned_text)
         cleaned_text = re.sub(non_alpha, " ", cleaned_text)
         tokens = word_tokenize(cleaned_text)
-        #print('tokens')
         # provide POS tag for lemmatization to yield better result
         word_tag_tuples = pos_tag(tokens, tagset="universal")
         tag_dict = {"NOUN": "n", "VERB": "v", "ADJ": "a", "ADV": "r"}
@@ -183,73 +226,83 @@ def plot_sentiment(tweet_df):
     fig.update_layout(showlegend=False)
     return fig
 def get_top_n_gram(tweet_df, ngram_range, n=10):
-    stopwords = set()
-    with open("static/en_stopwords_ngram.txt", "r") as file:
-        for word in file:
-            stopwords.add(word.rstrip("\n"))
-    stopwords = list(stopwords)
-    corpus = tweet_df["Tweet"]
-    vectorizer = CountVectorizer(
-        analyzer="word", ngram_range=ngram_range, stop_words=stopwords
-    )
-    X = vectorizer.fit_transform(corpus.astype(str).values)
-    words = vectorizer.get_feature_names_out()
-    words_count = np.ravel(X.sum(axis=0))
-    df = pd.DataFrame(zip(words, words_count))
-    df.columns = ["words", "counts"]
-    df = df.sort_values(by="counts", ascending=False).head(n)
-    df["words"] = df["words"].str.title()
-    return df
 def plot_n_gram(n_gram_df, title, color="#54A24B"):
-    fig = px.bar(
-        # n_gram_df,
-        # x="counts",
-        # y="words",
-        x=n_gram_df.counts,
-        y=n_gram_df.words,
-        title="<b>{}</b>".format(title),
-        text_auto=True,
-    )
-    fig.update_layout(plot_bgcolor="white")
-    fig.update_xaxes(title=None)
-    fig.update_yaxes(autorange="reversed", title=None)
-    fig.update_traces(hovertemplate="<b>%{y}</b><br>Count=%{x}", marker_color=color)
-    return fig
-def plot_wordcloud(tweet_df, colormap="Greens"):
-    stopwords = set()
-    with open("static/en_stopwords_ngram.txt", "r") as file:
-        for word in file:
-            stopwords.add(word.rstrip("\n"))
-    cmap = mpl.cm.get_cmap(colormap)(np.linspace(0, 1, 20))
-    cmap = mpl.colors.ListedColormap(cmap[10:15])
-    mask = np.array(Image.open("static/twitter_mask.png"))
-    font = "static/quartzo.ttf"
-    #tweet_df["Cleaned_Tweet"] = tweet_df["Tweet"].apply(lambda x: text_preprocessing(x))
-    tweet_df["Cleaned_Tweet"] = tweet_df["Tweet"].apply(text_preprocessing)
-    #print(tweet_df["Cleaned_Tweet"])
-    text = " ".join(tweet_df["Cleaned_Tweet"])
-    #print(text)
-    wc = WordCloud(
-        background_color="white",
-        font_path=font,
-        stopwords=stopwords,
-        max_words=90,
-        colormap=cmap,
-        mask=mask,
-        random_state=42,
-        collocations=False,
-        min_word_length=2,
-        max_font_size=200,
-    )
-    wc.generate(text)
-    fig = plt.figure(figsize=(8, 8))
-    ax = fig.add_subplot(1, 1, 1)
-    plt.imshow(wc, interpolation="bilinear")
-    plt.axis("off")
-    plt.title("Wordcloud", fontdict={"fontsize": 16}, fontweight="heavy", pad=20, y=1.0)
-    return fig

 from transformers import pipeline
 import plotly.express as px
 import plotly.io as pio
+import plotly.graph_objects as go
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 from wordcloud import WordCloud
 from PIL import Image
+import requests
+from itertools import islice
+from youtube_comment_downloader import *
 @st.cache(allow_output_mutation=True)
 def get_nltk():
     tweets_df.drop('Datetime', axis=1, inplace=True)
     return tweets_df
+def get_youtube_comments(url, num_comments):
+    pattern = '"playabilityStatus":{"status":"ERROR","reason":"Video unavailable"'
+    def try_site(url):
+        request = requests.get(url)
+        return False if pattern in request.text else True
+    video_exists = try_site(url)
+    if video_exists:
+        comment_list = []
+        downloader = YoutubeCommentDownloader()
+        comments = downloader.get_comments_from_url(url, sort_by=SORT_BY_POPULAR)
+        for comment in islice(comments, num_comments):
+            comment_list.append(comment['text'])
+        return comment_list
+    else:
+        raise Exception('Video does not exist')
+def get_sentiment_youtube(useful_sentence):
+    tokenizer = tokenizer_sentiment
+    model = model_sentiment
+    pipe = pipeline(model="ProsusAI/finbert")
+    classifier = pipeline(model="ProsusAI/finbert")
+    output=[]
+    i=0
+    useful_sentence_len = len(useful_sentence)
+    for temp in useful_sentence:
+        output.extend(classifier(temp))
+        i=i+1
+    df = pd.DataFrame.from_dict(useful_sentence)
+    df_temp = pd.DataFrame.from_dict(output)
+    df = pd.concat([df, df_temp], axis=1)
+    df = df.rename(columns={'label': 'Sentiment'})
+    df = df.rename(columns={0: 'Comment'})
+    df['Sentiment'] = df['Sentiment'].replace('positive', 'Positive')
+    df['Sentiment'] = df['Sentiment'].replace('negative', 'Negative')
+    df['Sentiment'] = df['Sentiment'].replace('neutral', 'Neutral')
+    return df
 def text_preprocessing(text):
     stopwords = set()
     with open("static/en_stopwords.txt", "r") as file:
         cleaned_text = re.sub(entity_pattern, " ", cleaned_text)
         cleaned_text = re.sub(non_alpha, " ", cleaned_text)
         tokens = word_tokenize(cleaned_text)
         # provide POS tag for lemmatization to yield better result
         word_tag_tuples = pos_tag(tokens, tagset="universal")
         tag_dict = {"NOUN": "n", "VERB": "v", "ADJ": "a", "ADV": "r"}
     fig.update_layout(showlegend=False)
     return fig
 def get_top_n_gram(tweet_df, ngram_range, n=10):
+    try:
+        stopwords = set()
+        with open("static/en_stopwords_ngram.txt", "r") as file:
+            for word in file:
+                stopwords.add(word.rstrip("\n"))
+        stopwords = list(stopwords)
+        corpus = tweet_df["Tweet"]
+        vectorizer = CountVectorizer(
+            analyzer="word", ngram_range=ngram_range, stop_words=stopwords
+        )
+        X = vectorizer.fit_transform(corpus.astype(str).values)
+        words = vectorizer.get_feature_names_out()
+        words_count = np.ravel(X.sum(axis=0))
+        df = pd.DataFrame(zip(words, words_count))
+        df.columns = ["words", "counts"]
+        df = df.sort_values(by="counts", ascending=False).head(n)
+        df["words"] = df["words"].str.title()
+        return df
+    except:
+        pass
 def plot_n_gram(n_gram_df, title, color="#54A24B"):
+    try:
+        fig = px.bar(
+            # n_gram_df,
+            # x="counts",
+            # y="words",
+            x=n_gram_df.counts,
+            y=n_gram_df.words,
+            title="<b>{}</b>".format(title),
+            text_auto=True,
+        )
+        fig.update_layout(plot_bgcolor="white")
+        fig.update_xaxes(title=None)
+        fig.update_yaxes(autorange="reversed", title=None)
+        fig.update_traces(hovertemplate="<b>%{y}</b><br>Count=%{x}", marker_color=color)
+        return fig
+    except:
+        fig = go.Figure()
+        return fig
+def plot_wordcloud(tweet_df, colormap="Greens", mask_url="static/twitter_mask.png"):
+    try:
+        stopwords = set()
+        with open("static/en_stopwords_ngram.txt", "r") as file:
+            for word in file:
+                stopwords.add(word.rstrip("\n"))
+        cmap = mpl.cm.get_cmap(colormap)(np.linspace(0, 1, 20))
+        cmap = mpl.colors.ListedColormap(cmap[10:15])
+        mask = np.array(Image.open(mask_url))
+        font = "static/quartzo.ttf"
+        tweet_df["Cleaned_Tweet"] = tweet_df["Tweet"].apply(text_preprocessing)
+        text = " ".join(tweet_df["Cleaned_Tweet"])
+        wc = WordCloud(
+            background_color="white",
+            font_path=font,
+            stopwords=stopwords,
+            max_words=90,
+            colormap=cmap,
+            mask=mask,
+            random_state=42,
+            collocations=False,
+            min_word_length=2,
+            max_font_size=200,
+        )
+        wc.generate(text)
+        fig = plt.figure(figsize=(8, 8))
+        ax = fig.add_subplot(1, 1, 1)
+        plt.imshow(wc, interpolation="bilinear")
+        plt.axis("off")
+        plt.title("Wordcloud", fontdict={"fontsize": 16}, fontweight="heavy", pad=20, y=1.0)
+        return fig
+    except:
+        fig = go.Figure()
+        return fig

requirements.txt CHANGED Viewed

@@ -7,3 +7,4 @@ plotly==5.9.0
 nltk
 scikit-learn
 wordcloud

 nltk
 scikit-learn
 wordcloud
+youtube-comment-downloader

static/yt_mask.png ADDED Viewed