Spaces:
Build error
Build error
Upload app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
import plotly.express as px
|
| 6 |
+
from stop_words import get_stop_words
|
| 7 |
+
from wordcloud import WordCloud
|
| 8 |
+
from datasets import load_dataset
|
| 9 |
+
import re
|
| 10 |
+
|
| 11 |
+
## import data
|
| 12 |
+
|
| 13 |
+
dataset = load_dataset("Santarabantoosoo/italian_long_covid_tweets")
|
| 14 |
+
data = pd.DataFrame.from_dict(dataset["train"])
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# load stop words
|
| 18 |
+
|
| 19 |
+
it_stop_words = load_dataset("Santarabantoosoo/italian-stopwords")
|
| 20 |
+
it_stop = pd.DataFrame.from_dict(it_stop_words["train"])
|
| 21 |
+
|
| 22 |
+
it_stop = it_stop.text.to_list()
|
| 23 |
+
|
| 24 |
+
## Optimize stop words according to Luca's repo
|
| 25 |
+
|
| 26 |
+
def format_input(user_key, stopwords):
|
| 27 |
+
'''
|
| 28 |
+
format user input request to lookup in the database of frequencies
|
| 29 |
+
|
| 30 |
+
input:
|
| 31 |
+
user_key is a string
|
| 32 |
+
stopwords is a list of strings
|
| 33 |
+
output:
|
| 34 |
+
key is a string
|
| 35 |
+
'''
|
| 36 |
+
|
| 37 |
+
key = user_key.lower()
|
| 38 |
+
key = re.sub(r'[^\w\s]', ' ', key)
|
| 39 |
+
|
| 40 |
+
key = ' '.join([el for el in key.split() if not (el in stopwords)])
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
return key
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
### Loading TFIDF
|
| 47 |
+
|
| 48 |
+
TFIDF_21_Jul_Oct = load_dataset("Santarabantoosoo/Long_Covid_word_frequency_TFIDF_21_Jul_Oct")
|
| 49 |
+
|
| 50 |
+
TFIDF_22_Feb_Apr = load_dataset("Santarabantoosoo/Long_Covid_word_frequency_TFIDF_22_Feb_Apr")
|
| 51 |
+
|
| 52 |
+
TFIDF_22_May_Jul = load_dataset("Santarabantoosoo/Long_Covid_word_frequency_TFIDF_22_May_Jul")
|
| 53 |
+
|
| 54 |
+
TFIDF_21_Nov_22_Jan = load_dataset("Santarabantoosoo/Long_Covid_word_frequency_TFIDF_21_Nov_22_Jan")
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
## Loading whole_text
|
| 58 |
+
|
| 59 |
+
whole_text_21_Jul_Oct = load_dataset("Santarabantoosoo/whole_text_TF_21_Jul_Oct")
|
| 60 |
+
|
| 61 |
+
whole_text_22_Feb_Apr = load_dataset("Santarabantoosoo/whole_text_TF_22_Feb_Apr")
|
| 62 |
+
|
| 63 |
+
whole_text_22_May_Jul = load_dataset("Santarabantoosoo/whole_text_TF_22_May_Jul")
|
| 64 |
+
|
| 65 |
+
whole_text_21_Nov_22_Jan = load_dataset("Santarabantoosoo/whole_text_TF_21_Nov_22_Jan")
|
| 66 |
+
|
| 67 |
+
TFIDF_21_Jul_Oct = pd.DataFrame.from_dict(TFIDF_21_Jul_Oct["train"])
|
| 68 |
+
|
| 69 |
+
TFIDF_22_Feb_Apr = pd.DataFrame.from_dict(TFIDF_22_Feb_Apr["train"])
|
| 70 |
+
|
| 71 |
+
TFIDF_22_May_Jul = pd.DataFrame.from_dict(TFIDF_22_May_Jul["train"])
|
| 72 |
+
|
| 73 |
+
TFIDF_21_Nov_22_Jan = pd.DataFrame.from_dict(TFIDF_21_Nov_22_Jan["train"])
|
| 74 |
+
|
| 75 |
+
whole_text_21_Jul_Oct = pd.DataFrame.from_dict(whole_text_21_Jul_Oct["train"])
|
| 76 |
+
|
| 77 |
+
whole_text_22_Feb_Apr = pd.DataFrame.from_dict(whole_text_22_Feb_Apr["train"])
|
| 78 |
+
|
| 79 |
+
whole_text_22_May_Jul = pd.DataFrame.from_dict(whole_text_22_May_Jul["train"])
|
| 80 |
+
|
| 81 |
+
whole_text_21_Nov_22_Jan = pd.DataFrame.from_dict(whole_text_21_Nov_22_Jan["train"])
|
| 82 |
+
|
| 83 |
+
ser_TFIDF = []
|
| 84 |
+
|
| 85 |
+
ser_TFIDF.append(TFIDF_21_Jul_Oct.transpose()[0])
|
| 86 |
+
ser_TFIDF.append(TFIDF_22_Feb_Apr.transpose()[0])
|
| 87 |
+
ser_TFIDF.append(TFIDF_22_May_Jul.transpose()[0])
|
| 88 |
+
ser_TFIDF.append(TFIDF_21_Nov_22_Jan.transpose()[0])
|
| 89 |
+
|
| 90 |
+
ser_whole_text = []
|
| 91 |
+
|
| 92 |
+
ser_whole_text.append(whole_text_21_Jul_Oct.transpose()[0])
|
| 93 |
+
ser_whole_text.append(whole_text_22_Feb_Apr.transpose()[0])
|
| 94 |
+
ser_whole_text.append(whole_text_22_May_Jul.transpose()[0])
|
| 95 |
+
ser_whole_text.append(whole_text_21_Nov_22_Jan.transpose()[0])
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def plot_time_series(choice, keyword, user_keys):
|
| 99 |
+
|
| 100 |
+
x = np.arange(2,10,2)
|
| 101 |
+
|
| 102 |
+
y = [[] for j in range(len(keyword))]
|
| 103 |
+
|
| 104 |
+
for j in range(len(keyword)):
|
| 105 |
+
i=0
|
| 106 |
+
while i < len(choice):
|
| 107 |
+
try:
|
| 108 |
+
y[j].append(choice[i][keyword[j]])
|
| 109 |
+
i += 1
|
| 110 |
+
except:
|
| 111 |
+
y[j].append(0.0)
|
| 112 |
+
i += 1
|
| 113 |
+
|
| 114 |
+
y[j] = np.array(y[j])
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
x_ticks_labels = ['Q1','Q2','Q3','Q4']
|
| 118 |
+
|
| 119 |
+
fig, ax = plt.subplots(1,1)
|
| 120 |
+
|
| 121 |
+
for j in range(len(keyword)):
|
| 122 |
+
ax.plot(x,y[j], label = user_keys[j].lower())
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
# Set number of ticks for x-axis
|
| 126 |
+
ax.set_xticks(x)
|
| 127 |
+
ax.set_xticklabels(x_ticks_labels, fontsize=12)
|
| 128 |
+
|
| 129 |
+
leg = plt.legend(loc='best')
|
| 130 |
+
plt.xlabel('Time')
|
| 131 |
+
plt.title("keywords quartely analysis (July 2021 - July 2022)")
|
| 132 |
+
plt.ylabel(f'Freq. from {user_keys}')
|
| 133 |
+
return fig
|
| 134 |
+
|
| 135 |
+
# Wordcloud with anger tweets
|
| 136 |
+
angry_tweets = data['tweet'][data["emotion"] == 'anger']
|
| 137 |
+
angry_tweets = angry_tweets.apply(format_input, args = [it_stop])
|
| 138 |
+
stop_words = ["https", 'http', "co", "RT"] + list(it_stop)
|
| 139 |
+
anger_wordcloud = WordCloud(max_font_size=50, max_words=50, background_color="white", stopwords = stop_words).generate(str(angry_tweets))
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
# Wordcloud with sad tweets
|
| 143 |
+
sad_tweets = data['tweet'][data["emotion"] == 'sadness']
|
| 144 |
+
sad_tweets = sad_tweets.apply(format_input, args = [it_stop])
|
| 145 |
+
stop_words = ["https", 'http', "co", "RT"] + list(it_stop)
|
| 146 |
+
sad_wordcloud = WordCloud(max_font_size=50, max_words=50, background_color="white", stopwords = stop_words).generate(str(sad_tweets))
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
# Wordcloud with joy tweets
|
| 150 |
+
joy_tweets = data['tweet'][data["emotion"] == 'joy']
|
| 151 |
+
joy_tweets = joy_tweets.apply(format_input, args = [it_stop])
|
| 152 |
+
stop_words = ["https", 'http', "co", "RT"] + list(it_stop)
|
| 153 |
+
joy_wordcloud = WordCloud(max_font_size=50, max_words=50, background_color="white", stopwords = stop_words).generate(str(joy_tweets))
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# Wordcloud with fear tweets
|
| 157 |
+
fear_tweets = data['tweet'][data["emotion"] == 'fear']
|
| 158 |
+
fear_tweets = fear_tweets.apply(format_input, args = [it_stop])
|
| 159 |
+
stop_words = ["https", 'http', "co", "RT"] + list(it_stop)
|
| 160 |
+
fear_wordcloud = WordCloud(max_font_size=50, max_words=50, background_color="white", stopwords = stop_words).generate(str(fear_tweets))
|
| 161 |
+
|
| 162 |
+
## COmbine all plots in a single plot
|
| 163 |
+
|
| 164 |
+
wc_fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2)
|
| 165 |
+
|
| 166 |
+
# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
|
| 167 |
+
|
| 168 |
+
wc_fig.tight_layout()
|
| 169 |
+
|
| 170 |
+
ax1.imshow(sad_wordcloud, interpolation="bilinear")
|
| 171 |
+
|
| 172 |
+
ax1.axis("off")
|
| 173 |
+
|
| 174 |
+
ax1.set_title('Sadness', {'fontsize': 30})
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
ax2.imshow(joy_wordcloud, interpolation="bilinear")
|
| 178 |
+
|
| 179 |
+
ax2.axis("off")
|
| 180 |
+
|
| 181 |
+
ax2.set_title('Joy', {'fontsize': 30})
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
ax3.imshow(fear_wordcloud, interpolation="bilinear")
|
| 185 |
+
|
| 186 |
+
ax3.axis("off")
|
| 187 |
+
|
| 188 |
+
ax3.set_title('Fear', {'fontsize': 30})
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
ax4.imshow(anger_wordcloud, interpolation="bilinear")
|
| 193 |
+
|
| 194 |
+
ax4.axis("off")
|
| 195 |
+
|
| 196 |
+
ax4.set_title('Anger', {'fontsize': 30})
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
# plot a pie plot for emotions' distribution
|
| 200 |
+
|
| 201 |
+
number_tweets_per_day = data.groupby(['date', 'emotion']).agg({'id': 'count'}).reset_index()
|
| 202 |
+
|
| 203 |
+
number_tweets_per_day["tweet_date"] = pd.to_datetime(number_tweets_per_day["date"])
|
| 204 |
+
|
| 205 |
+
time_fig = px.line(number_tweets_per_day, x = 'tweet_date', y = 'id', labels = {'id': 'count'}, color = 'emotion',
|
| 206 |
+
color_discrete_sequence=px.colors.qualitative.G10)
|
| 207 |
+
|
| 208 |
+
# create a lineplot for emotions
|
| 209 |
+
|
| 210 |
+
sentiment_counts = data.groupby('emotion').agg({'id' : 'size'}).reset_index()
|
| 211 |
+
sentiment_counts.rename(columns = {'id':'count'}, inplace = True)
|
| 212 |
+
sent_fig = px.pie(sentiment_counts, values='count', names='emotion', title='Tweets within each emotion', labels = {'id': 'count'},
|
| 213 |
+
color_discrete_sequence=px.colors.qualitative.G10)
|
| 214 |
+
sent_fig
|
| 215 |
+
|
| 216 |
+
def display_plot(image_choice):
|
| 217 |
+
|
| 218 |
+
if image_choice == 'Sentiment distribution':
|
| 219 |
+
return sent_fig
|
| 220 |
+
|
| 221 |
+
elif image_choice == 'Time series':
|
| 222 |
+
return time_fig
|
| 223 |
+
|
| 224 |
+
elif image_choice == 'Word clouds':
|
| 225 |
+
return wc_fig
|
| 226 |
+
|
| 227 |
+
def display_freq_plot(choice, *args):
|
| 228 |
+
|
| 229 |
+
user_keys = [arg for arg in args]
|
| 230 |
+
|
| 231 |
+
# clean input strings to match keywords in the database
|
| 232 |
+
keyword = []
|
| 233 |
+
for key in user_keys:
|
| 234 |
+
keyword.append(format_input(key, it_stop))
|
| 235 |
+
|
| 236 |
+
if choice == "TFIDF":
|
| 237 |
+
return plot_time_series(ser_TFIDF, keyword, user_keys)
|
| 238 |
+
|
| 239 |
+
elif choice == "Whole_text":
|
| 240 |
+
return plot_time_series(ser_whole_text, keyword, user_keys)
|
| 241 |
+
|
| 242 |
+
def display_output(tweet_index):
|
| 243 |
+
topics = "<ol>\
|
| 244 |
+
<li>Discussion about scientific studies</li>\
|
| 245 |
+
<li>Anxiety about pandemic and the information about it OR Specific people in the context of LC</li>\
|
| 246 |
+
<li>Discussion about LC impact in terms of time periods</li>\
|
| 247 |
+
<li>Discussion about LC impact on patient life (impact on life so far or scope for lifelong impact)</li>\
|
| 248 |
+
<li>Treatment scenario</li>\
|
| 249 |
+
<li>Impact/Consequences of LC on children</li>\
|
| 250 |
+
</ol>"
|
| 251 |
+
item = topic_dist_list[tweet_index]
|
| 252 |
+
distribution = f'<html><body><h3>Topics Distribution</h3>({item[0][0]+1}, {item[0][1]}), ({item[1][0]+1}, {item[1][1]}), ({item[2][0]+1}, {item[2][1]}), ({item[3][0]+1}, {item[3][1]}), ({item[4][0]+1}, {item[4][1]}), ({item[5][0]+1}, {item[5][1]})\
|
| 253 |
+
</body></html>'
|
| 254 |
+
return gr.HTML.update(distribution, visible=True)
|
| 255 |
+
|
| 256 |
+
def display_output_Q2_Q4(tweet_index):
|
| 257 |
+
item = topic_dist_list_Q2_Q4[tweet_index]
|
| 258 |
+
distribution = f'<html><body><h3>Topics Distribution</h3>({item[0][0]+1}, {item[0][1]}), ({item[1][0]+1}, {item[1][1]}), ({item[2][0]+1}, {item[2][1]}), ({item[3][0]+1}, {item[3][1]}), ({item[4][0]+1}, {item[4][1]}), ({item[5][0]+1}, {item[5][1]})\
|
| 259 |
+
</body></html>'
|
| 260 |
+
return gr.HTML.update(distribution, visible=True)
|
| 261 |
+
|
| 262 |
+
# with gr.Blocks() as demo:
|
| 263 |
+
# gr.Markdown("## Choose your adventure")
|
| 264 |
+
|
| 265 |
+
# with gr.Tabs():
|
| 266 |
+
|
| 267 |
+
# with gr.TabItem("Topic modeling"):
|
| 268 |
+
# gr.Markdown("Nothing here yet")
|
| 269 |
+
|
| 270 |
+
# with gr.TabItem("Word frequency"):
|
| 271 |
+
|
| 272 |
+
# inputs = [gr.Radio(choices = ['TFIDF', 'Whole_text'], label = 'Choose ur method'),
|
| 273 |
+
# gr.Textbox(label = 'word 1'),
|
| 274 |
+
# gr.Textbox(label = 'word 2'),
|
| 275 |
+
# gr.Textbox(label = 'word 3'),
|
| 276 |
+
# gr.Textbox(label = 'word 4')]
|
| 277 |
+
# plot_output = gr.Plot(elem_id = 1)
|
| 278 |
+
# freq_button = gr.Button("Submit")
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
# with gr.TabItem("Sentiment analysis"):
|
| 282 |
+
# text_input = gr.Radio(choices = ['Sentiment distribution', 'Word clouds', 'Time series'], label = 'Choose ur plot')
|
| 283 |
+
# sent_plot = gr.Plot()
|
| 284 |
+
# sent_button = gr.Button("Submit")
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
# sent_button.click(display_plot, inputs=text_input, outputs= sent_plot)
|
| 288 |
+
# freq_button.click(display_freq_plot, inputs=inputs, outputs=plot_output)
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
with gr.Blocks() as demo:
|
| 292 |
+
gr.Markdown("## Choose your adventure")
|
| 293 |
+
|
| 294 |
+
with gr.Tabs():
|
| 295 |
+
|
| 296 |
+
with gr.TabItem("Topic modeling"):
|
| 297 |
+
gr.Markdown(
|
| 298 |
+
"""
|
| 299 |
+
## <div style="text-align: center;">Topic modeling analysis on Twitter</div>
|
| 300 |
+
"""
|
| 301 |
+
)
|
| 302 |
+
with gr.Tabs():
|
| 303 |
+
with gr.TabItem("July-Semptember 2021"):
|
| 304 |
+
with gr.Row():
|
| 305 |
+
gr.Image("./wordclouds_Q1 data.png", label="July-September 2021")
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
tweets_list = ['C\'è uno studio a riguardo condotto proprio sui più giovani che identifica il long covid alla stregua di ogni strascico di malattie infettive polmonari. Il long covid è dannoso come una polmonite in quanto a effetti a lungo termine. Se lo ritrovo te lo passo, ora sono fuori...',
|
| 310 |
+
'Mio cugino è guarito dal covid dopo 4 mesi di ospedale, di cui più di 2 intubato, grazie alla testardaggine dei medici che hanno fatto di tutto per salvargli la vita a 57 anni. Ora è nella fase long covid per recuperare i danni fisici riportati',
|
| 311 |
+
'È importante parlare di #LongCovid e sensibilizzare tutti, giovani compresi, che non è un gioco ma una malattia debilitante/invalidante che può stravolgere la vita. Io 39 anni e #LongCovid da 18 mesi (con 4 figli piccoli). #countlongcovid',
|
| 312 |
+
'Il Long Covid è una diretta conseguenza di quelli che nei primi tempi sono stati abbandonati a se stessi giorni e giorni e curati solo quando molto aggravati, in ospedale. Se ti curi tempestivamente non hai nessuna conseguenza.',
|
| 313 |
+
'Non sai di cosa parli sono stato un mese attaccato ad un respiratore e sono salvo per miracolo. Ma questo è niente in confronto con il #LongCovid che mi porto dietro da mesi e mesi. Siete dei criminali a pensare ch\'è meglio curare che prevenire. Dei pazzi da rinchiudere',
|
| 314 |
+
'A chi dice ""Il COVID è innocuo per i bambini"". Oltre ad alcuni decessi 500+ bambini sono morti di COVID negli USA 2020) c\'è #LongCOVID. Se ne parla in questo studio: ""Studio inglese rileva che il COVID a lungo colpisce fino a 1 bambino su 7 mesi dopo l\'infezione']
|
| 315 |
+
|
| 316 |
+
q1_data_topic_list=['0. Discussion about scientific studies','1. Anxiety about pandemic and the information about it OR Specific people in the context of LC',
|
| 317 |
+
'2. Discussion about LC impact in terms of time periods','3. Discussion about LC impact on patient life (impact on life so far or scope for lifelong impact)' ,
|
| 318 |
+
'4. Treatment scenario', '5. Impact/Consequences of LC on children']
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
topic_dist_list=[[(0, 0.2181524), (1, 0.13380228), (2, 0.021277282), (3, 0.48123622), (4, 0.01883339), (5, 0.12669843)],
|
| 322 |
+
[(0, 0.0145399235), (1, 0.01287178), (2, 0.43158862), (3, 0.24750596), (4, 0.264914), (5, 0.028579665)],
|
| 323 |
+
[(0, 0.016303344), (1, 0.014450405), (2, 0.36162496), (3, 0.48426068), (4, 0.023487965), (5, 0.09987263)],
|
| 324 |
+
[(0, 0.018612841), (1, 0.016472807), (2, 0.44922927), (3, 0.033633586), (4, 0.026889767), (5, 0.45516175)],
|
| 325 |
+
[(0, 0.016305258), (1, 0.014453228), (2, 0.7628153), (3, 0.029092493), (4, 0.14613572), (5, 0.031198042)],
|
| 326 |
+
[(0, 0.016303508), (1, 0.014449066), (2, 0.15605325), (3, 0.029179793), (4, 0.023376595), (5, 0.7606378)]]
|
| 327 |
+
|
| 328 |
+
topics = '<html><body>\
|
| 329 |
+
<h3><b>Topics July to Sept, 2021</b></h3>\
|
| 330 |
+
<ol type="1">\
|
| 331 |
+
<li>1. Discussion about scientific studies</li>\
|
| 332 |
+
<li>2. Anxiety about pandemic and the information about it OR Specific people in the context of LC</li>\
|
| 333 |
+
<li>3. Discussion about LC impact in terms of time periods</li>\
|
| 334 |
+
<li>4. Discussion about LC impact on patient life (impact on life so far or scope for lifelong impact)</li>\
|
| 335 |
+
<li>5. Treatment scenario</li>\
|
| 336 |
+
<li>6. Impact/Consequences of LC on children</li>\
|
| 337 |
+
</ol>\
|
| 338 |
+
</body></html>'
|
| 339 |
+
|
| 340 |
+
Q1_topics = gr.HTML(topics, visible=True)
|
| 341 |
+
|
| 342 |
+
gr.Markdown(
|
| 343 |
+
"""
|
| 344 |
+
### Test our topic modeling model : select a tweet and check the topics distribution !
|
| 345 |
+
"""
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
tweet = gr.Dropdown(tweets_list, label="Example tweets", interactive=True, type="index")
|
| 349 |
+
|
| 350 |
+
model_output = gr.HTML("", visible=False)
|
| 351 |
+
tweet.change(display_output, tweet, model_output)
|
| 352 |
+
|
| 353 |
+
with gr.TabItem("October 2021-July 2022"):
|
| 354 |
+
|
| 355 |
+
topic_dist_list_Q2_Q4=[[(0, 0.4377157), (1, 0.05924045), (2, 0.1525337), (3, 0.1941842), (4, 0.075339705), (5, 0.08098622)],
|
| 356 |
+
[(0, 0.16064012), (1, 0.063850455), (2, 0.08664099), (3, 0.2870743), (4, 0.081202514), (5, 0.32059166)],
|
| 357 |
+
[(0, 0.14904374), (1, 0.059243646), (2, 0.08039133), (3, 0.26638654), (4, 0.07534457), (5, 0.36959016)],
|
| 358 |
+
[(0, 0.14897935), (1, 0.059245925), (2, 0.08039324), (3, 0.41068354), (4, 0.14752874), (5, 0.15316921)],
|
| 359 |
+
[(0, 0.089826144), (1, 0.069229595), (2, 0.09393969), (3, 0.5643193), (4, 0.08804329), (5, 0.09464199)],
|
| 360 |
+
[(0, 0.08284077), (1, 0.29718927), (2, 0.08663448), (3, 0.36485678), (4, 0.08119658), (5, 0.08728213)]]
|
| 361 |
+
|
| 362 |
+
with gr.Row():
|
| 363 |
+
gr.Image("./wordclouds_Q2-Q2 data.png", label="October 2021-July 2022")
|
| 364 |
+
|
| 365 |
+
Q2_Q4_topics = '<html><body>\
|
| 366 |
+
<h3><b>Topics October 2021 to July 2022</b></h3>\
|
| 367 |
+
<ol type="1">\
|
| 368 |
+
<li>1. Variants</li>\
|
| 369 |
+
<li>2. Vaccine side-effects (and general anti-vax/ anti-LC narrative)</li>\
|
| 370 |
+
<li>3. Aftermath of LC or vaccine</li>\
|
| 371 |
+
<li>4. Impact of LC in terms of time OR Risks/Symptoms of LC</li>\
|
| 372 |
+
<li>5. Anger or anxiety about LC information</li>\
|
| 373 |
+
<li>6. Discussion or Information about the science/knowledge surrounding LC</li>\
|
| 374 |
+
</ol>\
|
| 375 |
+
</body></html>'
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
Q2_Q4_topics_html = gr.HTML(Q2_Q4_topics, visible=True)
|
| 379 |
+
|
| 380 |
+
tweet_list_Q2_Q4=["Omicron e Long Covid: palpitazioni e perdita d'udito tra i sintomi - #Omicron #Covid: #palpitazioni ",
|
| 381 |
+
'Long Covid e trombosi. La correlazione è spiegata da Giovanni Esposito, Presidente GISE, in un articolo sul sito https://t.co/8TdI9nhDHY e avvalorata da uno studio svedese pubblicato sul British Medical Journal. https://t.co/UebaXUtfbz',
|
| 382 |
+
'Peccato che il ""long COVID"" che è proprio ciò di cui parla l\'esimio dottore citato determini una alterazione o soppressione del sistema immunitario di cui si sa ancora poco ma che può portare a conseguenze fatali per il paziente.',
|
| 383 |
+
'Il Long covid rappresentava un problema solo fino ad aprile 2021, i vaccini hanno molto ridotto l\'impatto e la gravità delle patologie a lungo termine, in pratica si può dire che il long covid non esiste più',
|
| 384 |
+
'Sicuro, 100-150 morti al giorno, 6 ondate l anno, rischio long covid, rischio evoluzionario, e via dicendo — finitissimo',
|
| 385 |
+
'le cure le fai giorno dopo giorno... ci sono casi di long-covid dopo 6 mesi dall\'infezione. [Vaccino > >Cure] è un dato di fatto',
|
| 386 |
+
'A parte il rischio di sviluppare il #longcovid, il pericolo grave di lasciar circolare il virus e di farlo diventare endemico come preconizza il governo e lo sciagurato #speranza non è nel decorso del singolo caso ma nell\'aumento proporzionale dell\'insorgere di nuove varianti']
|
| 387 |
+
|
| 388 |
+
gr.Markdown(
|
| 389 |
+
"""
|
| 390 |
+
### Test our topic modeling model : select a tweet and check the topics distribution !
|
| 391 |
+
"""
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
tweet_Q2_Q4 = gr.Dropdown(tweet_list_Q2_Q4, label="Example tweets", interactive=True, type="index")
|
| 395 |
+
|
| 396 |
+
model_output_Q2_Q4 = gr.HTML("", visible=False)
|
| 397 |
+
tweet_Q2_Q4.change(display_output_Q2_Q4, tweet_Q2_Q4, model_output_Q2_Q4)
|
| 398 |
+
with gr.TabItem("Word frequency"):
|
| 399 |
+
|
| 400 |
+
inputs = [gr.Radio(choices = ['TFIDF', 'Whole_text'], label = 'Choose ur method'),
|
| 401 |
+
gr.Textbox(label = 'word 1'),
|
| 402 |
+
gr.Textbox(label = 'word 2'),
|
| 403 |
+
gr.Textbox(label = 'word 3')]
|
| 404 |
+
plot_output = gr.Plot()
|
| 405 |
+
freq_button = gr.Button("Submit")
|
| 406 |
+
|
| 407 |
+
freq_button.click(display_freq_plot, inputs=inputs, outputs=plot_output)
|
| 408 |
+
gr.Examples(
|
| 409 |
+
examples= [['TFIDF', 'Stanchezza', "l'età", '#LongCovidKids'], ['Whole_text', 'nebbia mentale', 'mal di testa', 'Ansia']],
|
| 410 |
+
inputs= inputs)
|
| 411 |
+
|
| 412 |
+
with gr.TabItem("Sentiment analysis"):
|
| 413 |
+
text_input = gr.Radio(choices = ['Sentiment distribution', 'Word clouds', 'Time series'], label = 'Choose ur plot')
|
| 414 |
+
sent_plot = gr.Plot()
|
| 415 |
+
sent_button = gr.Button("Submit")
|
| 416 |
+
|
| 417 |
+
sent_button.click(display_plot, inputs=text_input, outputs= sent_plot)
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
demo.launch(debug=True, show_error = True);
|
| 421 |
+
|