User1342 commited on
Commit
33f843c
·
1 Parent(s): 236500d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +476 -0
app.py ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gc
2
+ import os
3
+ import random
4
+ import sys
5
+ import time
6
+
7
+ import gradio as gr
8
+ import plotly.graph_objects as go
9
+ import tweepy
10
+ from detoxify import Detoxify
11
+ from transformers import pipeline
12
+
13
+ try:
14
+ from news_classification.news_topic_text_classifier import news_topic_text_classifier
15
+ except:
16
+ os.system(
17
+ "{} -m pip install git+https://github.com/user1342/News-Article-Text-Classification.git".format(sys.executable))
18
+ from news_classification.news_topic_text_classifier import news_topic_text_classifier
19
+ news_model = news_topic_text_classifier()
20
+
21
+ # Twitter API keys
22
+ consumer_token = os.getenv('consumer_token')
23
+ consumer_secret = os.getenv('consumer_secret')
24
+ my_access_token = os.getenv('my_access_token')
25
+ my_access_secret = os.getenv('my_access_secret')
26
+ bearer = os.getenv('bearer')
27
+
28
+ html_data = '''<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <meta name="viewport"
29
+ content="width=device-width, initial-scale=1"> <link rel="stylesheet"
30
+ href="https://www.w3schools.com/w3css/4/w3.css"> <link rel="stylesheet"
31
+ href="https://fonts.googleapis.com/css?family=Poppins"> <link rel="stylesheet"
32
+ href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css"> <style> body,h1,h2,h3,h4,
33
+ h5 {font-family: "Poppins", sans-serif} body {font-size: 16px;} img {margin-bottom: -8px;} .mySlides {display: none;}
34
+ </style> </head> <body class="w3-content w3-black" style="max-width:1500px;"> <!-- The App Section --> <div
35
+ class="w3-padding-large w3-white"> <div class="w3-row-padding-large"> <div class="w3-col"> <h1
36
+ class="w3-jumbo"><b>Bubble Check-In 🐦💭</b></h1> <h1 class="w3-xxxlarge w3-text-blue"><b>Check-in-on someone's Twitter 'bubble'.</b></h1> <p><span class="w3-xlarge">Scroll down to use Bubble Check-In 1.0. ⬇
37
+ </span> Bubble Check-In is a tool designed to allow you to check-in-on the type of content someone on Twitter is
38
+ being exposed to - be that yourself, a friend, loved one, etc. The goal here is to empower us to look out for
39
+ each-other and identify early if someone is experiencing activity such as hate speech or extremism. We use a queue
40
+ system, which means <b> you may need to wait your turn to run WatchTower</b> - however, once you've clicked run,
41
+ you can close the tab as Bubble Check-In will continue in the background. Bubble Check-In is simple to use simply enter the username of the Twitter account you want to check-in-on and click run!</p>
42
+ <a href="https://www.jamesstevenson.me/cartographer-labs/"><button class="w3-button w3-light-grey w3-padding-large w3-section
43
+ " onclick="document.getElementById('download').style.display='block'"> <i class=""></i> Find Out More! 💬
44
+ </button></a> <a href="https://ko-fi.com/jamesstevenson"><button class="w3-button w3-light-grey w3-padding-large
45
+ w3-section " onclick="document.getElementById('download').style.display='block'"> <i class=""></i> Support The
46
+ Creator! ❤ </button></a> <a href="https://twitter.com/CartographerLab"><button class="w3-button w3-light-grey
47
+ w3-padding-large w3-section " onclick="document.getElementById('download').style.display='block'"> <i class=""></i>
48
+ Follow Us! 🐦 </button></a> </div> </div> </div> <!-- Modal --> <script> // Slideshow var slideIndex = 1; showDivs(
49
+ slideIndex); function plusDivs(n) { showDivs(slideIndex += n); } function showDivs(n) { var i; var x =
50
+ document.getElementsByClassName("mySlides"); if (n > x.length) {slideIndex = 1} if (n < 1) {slideIndex = x.length}
51
+ for (i = 0; i < x.length; i++) { x[i].style.display = "none"; } x[slideIndex-1].style.display = "block"; } </script>
52
+ <br> </body> </html> '''
53
+
54
+ # Setup the gradio block and add some generic CSS
55
+ block = gr.Blocks(
56
+ css=".container { max-width: 800px; margin: auto; } h1 { margin: 0px; padding: 5px 0; line-height: 50px; font-size: 60pt; }.close-heading {margin: 0px; padding: 0px;} .close-heading p { margin: 0px; padding: 0px;}",
57
+ title="WatchTower")
58
+
59
+
60
+ def check_connected_users(username):
61
+ '''
62
+ This function retrieves all of the mentions for the given user and all of the tweets from their following.
63
+ :param username: the target user
64
+ :return: a dict of user information relating to the following and mentions of the target user.
65
+ '''
66
+
67
+ client = tweepy.Client(
68
+ bearer_token=bearer,
69
+ consumer_key=consumer_token,
70
+ consumer_secret=consumer_secret,
71
+ access_token=my_access_token,
72
+ access_token_secret=my_access_secret
73
+ )
74
+
75
+ user_id = client.get_user(username=username).data.data["id"]
76
+ tweet_data_dict = {}
77
+ user_count = 0
78
+
79
+ # Get users that have mentioned the target user
80
+ success = False
81
+ users_mentions = []
82
+ while not success:
83
+ try:
84
+ users_mentions = client.get_users_mentions(id=user_id, tweet_fields=["author_id"], max_results=10).data
85
+ if users_mentions == None:
86
+ users_mentions = []
87
+ success = True
88
+ except tweepy.errors.TooManyRequests as e:
89
+
90
+ print("sleeping")
91
+ print(e)
92
+ time.sleep(120)
93
+ success = False
94
+ continue
95
+
96
+ mention_count = 0
97
+
98
+ for tweet in users_mentions:
99
+ success = False
100
+ while not success:
101
+ try:
102
+ mention_count = mention_count + 1
103
+ user = client.get_user(id=tweet.author_id).data
104
+ print("Processing user {}'s mentions. Mention {} of {}. Mention from user {}".format(username,
105
+ mention_count,
106
+ len(users_mentions),
107
+ user))
108
+
109
+ # Is this the first time adding a tweet from this user, if so act accordingly
110
+ if user not in tweet_data_dict:
111
+ tweet_data_dict[user] = {}
112
+ tweet_data_dict[user]["tweets"] = []
113
+
114
+ tweet_data_dict[user]["tweets"].append(tweet.data["text"])
115
+
116
+ # Adds the mention type to the user data
117
+ tweet_data_dict[user]["type"] = ["mentioned"]
118
+
119
+ # Used for wrapping error handling
120
+ success = True
121
+
122
+ except tweepy.errors.TooManyRequests as e:
123
+
124
+ print("sleeping")
125
+ print(e)
126
+ time.sleep(120)
127
+ success = False
128
+ continue
129
+
130
+ # Loop through all users that the target user is following
131
+ following = client.get_users_following(id=user_id, max_results=1000).data
132
+ # Only take at a maximum the last x following
133
+ if len(following) >= 10:
134
+ following = following[:10]
135
+
136
+ for user in following:
137
+ success = False
138
+ while not success:
139
+ try:
140
+ user_count = user_count + 1
141
+
142
+ # If the user hasn't already been observed in mentions then create a new list for tweets (if not it would have been created previously)
143
+ if user not in tweet_data_dict:
144
+ tweet_data_dict[user] = {}
145
+ tweet_data_dict[user]["tweets"] = []
146
+
147
+ # Adds the following type to the user data
148
+ if "type" not in tweet_data_dict[user]:
149
+ tweet_data_dict[user]["type"] = ["following"]
150
+ else:
151
+ tweet_data_dict[user]["type"].append("following")
152
+
153
+ tweets = client.get_users_tweets(id=user.id, max_results=5)
154
+ tweets = tweets[0]
155
+
156
+ if tweets is not None:
157
+ print("Processing user {}'s followers. {}, number {} of {}. Total user tweets {}.".format(username,
158
+ user,
159
+ user_count,
160
+ len(following),
161
+ len(tweets)))
162
+
163
+ for users_tweet in tweets:
164
+ tweet_data = str(users_tweet.text)
165
+ tweet_data_dict[user]["tweets"].append(tweet_data)
166
+
167
+ success = True
168
+ except tweepy.errors.TooManyRequests as e:
169
+
170
+ print("sleeping")
171
+ time.sleep(120)
172
+ print(e)
173
+ success = False
174
+ continue
175
+
176
+ # toxicity_score = Detoxify('original').predict(tweet_data)["toxicity"]
177
+ # toxicities.append(toxicity_score)
178
+
179
+ # tweet_data_dict[user]["average_toxicity"] = sum(toxicities) / len(toxicities)
180
+
181
+ # do processing such as sentiment, centrality, hate speech, etc
182
+ sentiment_pipeline = pipeline("sentiment-analysis")
183
+ for current_username in tweet_data_dict:
184
+ current_user_data = tweet_data_dict[current_username]
185
+ toxicities = {}
186
+ sentiments = {}
187
+ types = {}
188
+ user_tweets = current_user_data["tweets"]
189
+
190
+ # Only consider users with posts for analysis
191
+ if len(user_tweets) == 0:
192
+ continue
193
+ print("Processing metadata for {}'s tweets".format(current_username))
194
+ for tweet in user_tweets:
195
+
196
+ # Do hate speech average
197
+
198
+ if 'toxicity' not in toxicities:
199
+ toxicities['toxicity'] = []
200
+ toxicities['severe_toxicity'] = []
201
+ toxicities['obscene'] = []
202
+ toxicities['identity_attack'] = []
203
+ toxicities['insult'] = []
204
+ toxicities['threat'] = []
205
+ toxicities['sexual_explicit'] = []
206
+
207
+ scores = Detoxify('unbiased').predict([tweet])
208
+ toxicities['toxicity'].append(scores['toxicity'][0])
209
+ toxicities['severe_toxicity'].append(scores['severe_toxicity'][0])
210
+ toxicities['obscene'].append(scores['obscene'][0])
211
+ toxicities['identity_attack'].append(scores['identity_attack'][0])
212
+ toxicities['insult'].append(scores['insult'][0])
213
+ toxicities['threat'].append(scores['threat'][0])
214
+ toxicities['sexual_explicit'].append(scores['sexual_explicit'][0])
215
+
216
+ # Do sentiment analysis
217
+ sentiment_score = sentiment_pipeline(tweet)
218
+ sentiment_score = sentiment_score[0]
219
+ if "NEGATIVE" == sentiment_score["label"]:
220
+ if "NEGATIVE" not in sentiments:
221
+ sentiments["NEGATIVE"] = []
222
+ sentiments["NEGATIVE"].append(sentiment_score["score"])
223
+
224
+ elif "POSITIVE" == sentiment_score["label"]:
225
+ if "POSITIVE" not in sentiments:
226
+ sentiments["POSITIVE"] = []
227
+ sentiments["POSITIVE"].append(sentiment_score["score"])
228
+
229
+ # Do type of post (news)
230
+ type = news_model.get_category(tweet)
231
+ if type in types:
232
+ types[type] = types[type] + 1
233
+ else:
234
+ types[type] = 1
235
+
236
+ tweet_data_dict[current_username]["average_toxicity"] = sum(toxicities['toxicity']) / len(
237
+ toxicities['toxicity'])
238
+ tweet_data_dict[current_username]["average_severe_toxicity"] = sum(toxicities['severe_toxicity']) / len(
239
+ toxicities['severe_toxicity'])
240
+ tweet_data_dict[current_username]["average_obscene"] = sum(toxicities['obscene']) / len(toxicities['obscene'])
241
+ tweet_data_dict[current_username]["average_identity_attack"] = sum(toxicities['identity_attack']) / len(
242
+ toxicities['identity_attack'])
243
+ tweet_data_dict[current_username]["average_insult"] = sum(toxicities['insult']) / len(toxicities['insult'])
244
+ tweet_data_dict[current_username]["average_threat"] = sum(toxicities['threat']) / len(toxicities['threat'])
245
+ tweet_data_dict[current_username]["average_sexual_explicit"] = sum(toxicities['sexual_explicit']) / len(
246
+ toxicities['sexual_explicit'])
247
+ tweet_data_dict[current_username]["types"] = types
248
+ tweet_data_dict[current_username]["sentiments"] = sentiments
249
+
250
+ gc.collect()
251
+
252
+ return tweet_data_dict
253
+
254
+
255
+ def button_pressed(text_box):
256
+ '''
257
+ A function that is called when the 'run' button is pressed
258
+ :param text_box: a string which should relate to a Twitter users username
259
+ :return: several gradio elements used to populate plots and a summary label field
260
+ '''
261
+
262
+ tweet_data = check_connected_users(text_box)
263
+
264
+ total_types_count = {}
265
+ total_average_toxicity = []
266
+ total_average_severe_toxicity = []
267
+ total_average_obscene = []
268
+ total_average_identity_attack = []
269
+ total_identity_attack = []
270
+ total_average_insult = []
271
+ total_average_threat = []
272
+ total_average_sexual_explicit = []
273
+ total_average_pos_sentiment = []
274
+ total_average_neg_sentiment = []
275
+
276
+ mentions = 0
277
+ following = 0
278
+
279
+ tweets = 0
280
+
281
+ for user in tweet_data:
282
+ data = tweet_data[user]
283
+
284
+ tweets = tweets + len(data["tweets"])
285
+
286
+ if len(data["tweets"]) < 1:
287
+ continue
288
+
289
+ if "mentioned" in data["type"]:
290
+ mentions = mentions + 1
291
+ if "following" in data["type"]:
292
+ following = following + 1
293
+
294
+ types = data["types"]
295
+
296
+ # Get types
297
+ for type in types:
298
+ if type not in total_types_count:
299
+ total_types_count[type] = 1
300
+ else:
301
+ total_types_count[type] = total_types_count[type] + 1
302
+
303
+ total_average_toxicity.append(data["average_toxicity"])
304
+ total_average_severe_toxicity.append(data["average_severe_toxicity"])
305
+ total_average_obscene.append(data["average_obscene"])
306
+ total_average_identity_attack.append(data["average_identity_attack"])
307
+ total_average_insult.append(data["average_insult"])
308
+ total_average_threat.append(data["average_threat"])
309
+ total_average_sexual_explicit.append(data["average_sexual_explicit"])
310
+
311
+ if 'NEGATIVE' in data["sentiments"]:
312
+ for sentiment in data["sentiments"]["NEGATIVE"]:
313
+ total_average_neg_sentiment.append(sentiment)
314
+
315
+ if 'POSITIVE' in data["sentiments"]:
316
+ for sentiment in data["sentiments"]["POSITIVE"]:
317
+ total_average_pos_sentiment.append(sentiment)
318
+
319
+ # Comprise elements for hate speech plot
320
+ total_average_toxicity = sum(total_average_toxicity) / len(total_average_toxicity)
321
+ total_average_severe_toxicity = sum(total_average_severe_toxicity) / len(total_average_severe_toxicity)
322
+ total_average_obscene = sum(total_average_obscene) / len(total_average_obscene)
323
+ total_average_identity_attack = sum(total_average_identity_attack) / len(total_average_identity_attack)
324
+ total_average_insult = sum(total_average_insult) / len(total_average_insult)
325
+ total_average_threat = sum(total_average_threat) / len(total_average_threat)
326
+ total_average_sexual_explicit = sum(total_average_sexual_explicit) / len(total_average_sexual_explicit)
327
+
328
+ total_average_neg_sentiment = sum(total_average_neg_sentiment) / len(total_average_neg_sentiment)
329
+ total_average_pos_sentiment = sum(total_average_pos_sentiment) / len(total_average_pos_sentiment)
330
+
331
+ toxicity_plot = dict({
332
+ "data": [{"type": "bar",
333
+ "x": ["Average Toxicity", "Average Severe Toxicity", "Average Obscene", "Average Identity Attack",
334
+ "Average Insult", "Average Threat", "Average Sexual Explicit"],
335
+ "y": [total_average_toxicity, total_average_severe_toxicity, total_average_obscene,
336
+ total_average_identity_attack, total_average_insult, total_average_threat,
337
+ total_average_sexual_explicit]}],
338
+ "layout": {"title": {"text": "Hate Speech"}}
339
+ })
340
+
341
+ toxicity_plot_fig = go.Figure(toxicity_plot)
342
+
343
+ # Comprise elements for sentiment plot
344
+ sentiment_plot = dict({
345
+ "data": [{"type": "bar",
346
+ "x": ["Positive Sentiment Average", "Negative Sentiment Average"],
347
+ "y": [total_average_pos_sentiment, total_average_neg_sentiment]}],
348
+ "layout": {"title": {"text": "Sentiment"}}
349
+ })
350
+
351
+ sentiment_plot_fig = go.Figure(sentiment_plot)
352
+
353
+ # Comprise elements for 'type' plot
354
+ colours = []
355
+ keys = list(total_types_count.keys())
356
+ x_list = []
357
+ for key in keys:
358
+ x_list.append(key.replace("_", " ").title())
359
+
360
+ for iterator in range(0, len(keys)):
361
+ colours.append('rgb({}, {}, {})'.format(random.randint(1, 255), random.randint(1, 255), random.randint(1, 255)))
362
+
363
+ sizes = []
364
+ for value in total_types_count.values():
365
+ sizes.append(value * 20)
366
+
367
+ fig = go.Figure(data=[go.Scatter(
368
+ x=x_list, y=list(total_types_count.values()),
369
+ mode='markers',
370
+ marker=dict(
371
+ color=colours,
372
+ size=sizes
373
+ )
374
+ )])
375
+
376
+ # Comprise text for summary label
377
+ text = "A total number of {} recent tweets were reviewed, of which {} users were exposed to @{} via mentions and " \
378
+ "{} were exposed to @{} directly via following them.".format(tweets, mentions, text_box, following, text_box)
379
+
380
+ high_identifiers = []
381
+ extreme_identifiers = []
382
+
383
+ if total_average_toxicity > 75:
384
+ extreme_identifiers.append("toxic")
385
+ elif total_average_toxicity > 50:
386
+ high_identifiers.append("toxic")
387
+
388
+ if total_average_severe_toxicity > 75:
389
+ extreme_identifiers.append("severe toxic")
390
+ elif total_average_severe_toxicity > 50:
391
+ high_identifiers.append("severe toxic")
392
+
393
+ if total_average_obscene > 75:
394
+ extreme_identifiers.append("obscene")
395
+ elif total_average_obscene > 50:
396
+ high_identifiers.append("obscene")
397
+
398
+ if total_average_identity_attack > 75:
399
+ extreme_identifiers.append("identity based hate")
400
+ elif total_average_identity_attack > 50:
401
+ high_identifiers.append("identity based hate")
402
+
403
+ if total_average_insult > 75:
404
+ extreme_identifiers.append("insulting")
405
+ elif total_average_insult > 50:
406
+ high_identifiers.append("insulting")
407
+
408
+ if total_average_threat > 75:
409
+ extreme_identifiers.append("threatening")
410
+ elif total_average_threat > 50:
411
+ high_identifiers.append("threatening")
412
+
413
+ if total_average_sexual_explicit > 75:
414
+ extreme_identifiers.append("sexually explicit")
415
+ elif total_average_sexual_explicit > 50:
416
+ high_identifiers.append("sexually explicit")
417
+
418
+ if len(high_identifiers) > 0:
419
+ text = text + " @{} is observing a high amount of "
420
+ for identifier in high_identifiers:
421
+ text = text + " {},".format(identifier)
422
+
423
+ text = text[:len(text - 1)] + " language."
424
+
425
+ if len(extreme_identifiers) > 0:
426
+ text = text + " @{} is observing an extremely high amount of".format(text_box)
427
+ for identifier in extreme_identifiers:
428
+ text = text + " {},".format(identifier)
429
+
430
+ text = text[:len(text - 1)] + " language."
431
+
432
+ if total_average_neg_sentiment > 0.7:
433
+ text = text + " @{} is experiencing a high amount of low sentiment content.".format(text_box)
434
+ elif total_average_neg_sentiment > 0.9:
435
+ text = text + " '{} is experiencing a significantly high amount of low sentiment content.".format(text_box)
436
+
437
+ return [toxicity_plot_fig, sentiment_plot_fig, fig, text]
438
+
439
+
440
+ # The main chunk of code that uses Gradio blocks to create the UI
441
+ html_button = None
442
+ with block:
443
+ gr.HTML('''
444
+ <meta name="viewport" content="width=device-width, initial-scale=1">
445
+ <link rel="stylesheet" href="https://www.w3schools.com/w3css/4/w3.css">
446
+ ''')
447
+
448
+ # todo check if user signed in
449
+ gr.HTML(value=html_data)
450
+ with gr.Group():
451
+ with gr.Row().style(equal_height=True):
452
+ with gr.Box():
453
+ with gr.Row().style(equal_height=True):
454
+ text_input = gr.Text(label="Username", visible=True, max_lines=1)
455
+ btn = gr.Button("Run WatchTower").style(full_width=True).style()
456
+ gr.HTML(value="<br>")
457
+ output_label = gr.Label(label="Summary")
458
+ gr.HTML(value="<br>")
459
+ with gr.Row().style(equal_height=True):
460
+ toxicity_plot = gr.Plot(label="Hate Speech Graph")
461
+ sentiment_plot = gr.Plot(label="Sentiment Graph")
462
+ gr.HTML(value="<br>")
463
+ type_plot = gr.Plot(label="Content Type Graph")
464
+ btn.click(fn=button_pressed, inputs=[text_input], outputs=[toxicity_plot, sentiment_plot, type_plot, output_label])
465
+ gr.Markdown(
466
+ """___
467
+ <p style='text-align: center'>
468
+ Created by <a href="https://twitter.com/_JamesStevenson" target="_blank"</a> James Stevenson
469
+ <br/>
470
+ </p>"""
471
+ )
472
+
473
+ # block.attach_load_events()
474
+
475
+ # Launcg the page
476
+ block.launch(enable_queue=True)