SandhyaRaghav commited on
Commit
16ccd49
·
verified ·
1 Parent(s): 74a5772

initial commit

Browse files
Files changed (7) hide show
  1. .gitignore +6 -0
  2. .streamlit/config.toml +7 -0
  3. helper.py +132 -0
  4. preprocessor.py +48 -0
  5. requirements.txt +0 -0
  6. stop_hinglish.txt +1055 -0
  7. streamlit_app.py +141 -0
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .venv/
2
+ __pycache__/
3
+ .idea/
4
+ .ipynb_checkpoints/
5
+ *.pyc
6
+ *.txt~
.streamlit/config.toml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [browser]
2
+ gatherUsageStats = false
3
+
4
+ [server]
5
+ headless = true
6
+ enableCORS = false # Recommended for production deployments to avoid potential CORS issues
7
+ enableXsrfProtection = false # Recommended for production deployments
helper.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from urlextract import URLExtract
2
+ from wordcloud import WordCloud
3
+ import pandas as pd
4
+ from collections import Counter
5
+ import emoji
6
+
7
+
8
+ def fetch_stats(selected_user,df):
9
+ if selected_user != 'Overall':
10
+ df = df[df['user'] == selected_user]
11
+
12
+ # fetch the number of messages
13
+ num_messages = df.shape[0]
14
+
15
+ # fetch the total number of words
16
+ words = []
17
+ for message in df['message']:
18
+ words.extend(message.split())
19
+
20
+ # fetch number of media messages
21
+ num_media_messages = df[df['message'] == '<Media omitted>\n'].shape[0]
22
+
23
+ # fetch number of links shared
24
+ links = []
25
+ extract = URLExtract()
26
+ for message in df['message']:
27
+ links.extend(extract.find_urls(message))
28
+
29
+ return num_messages,len(words),num_media_messages,len(links)
30
+
31
+ def most_busy_users(df):
32
+ top_users = df['user'].value_counts().head()
33
+ user_percent = round((df['user'].value_counts(normalize=True) * 100), 2).reset_index()
34
+ user_percent.columns = ['name', 'percent']
35
+ return top_users, user_percent
36
+
37
+
38
+ def create_wordcloud(selected_user,df):
39
+
40
+ f = open('stop_hinglish.txt', 'r')
41
+ stop_words = f.read()
42
+
43
+ if selected_user != 'Overall':
44
+ df = df[df['user'] == selected_user]
45
+
46
+ temp = df[df['user'] != 'group_notification']
47
+ temp = temp[temp['message'] != '<Media omitted>\n']
48
+
49
+ def remove_stop_words(message):
50
+ y = []
51
+ for word in message.lower().split():
52
+ if word not in stop_words:
53
+ y.append(word)
54
+ return " ".join(y)
55
+
56
+ wc = WordCloud(width=500,height=500,min_font_size=10,background_color='white')
57
+ temp['message'] = temp['message'].apply(remove_stop_words)
58
+ df_wc = wc.generate(temp['message'].str.cat(sep=" "))
59
+ return df_wc
60
+
61
+ def most_common_words(selected_user,df):
62
+
63
+ f = open('stop_hinglish.txt','r')
64
+ stop_words = f.read()
65
+
66
+ if selected_user != 'Overall':
67
+ df = df[df['user'] == selected_user]
68
+
69
+ temp = df[df['user'] != 'group_notification']
70
+ temp = temp[temp['message'] != '<Media omitted>\n']
71
+
72
+ words = []
73
+
74
+ for message in temp['message']:
75
+ for word in message.lower().split():
76
+ if word not in stop_words:
77
+ words.append(word)
78
+
79
+ most_common_df = pd.DataFrame(Counter(words).most_common(20))
80
+ return most_common_df
81
+
82
+
83
+ def emoji_helper(selected_user,df):
84
+ if selected_user != 'Overall':
85
+ df = df[df['user'] == selected_user]
86
+
87
+ emojis = []
88
+ for message in df['message']:
89
+ #emojis.extend([c for c in message if c in emoji.EMOJI_DATA])
90
+ emojis.extend([c for c in message if emoji.is_emoji(c)])
91
+
92
+ emoji_df = pd.DataFrame(Counter(emojis).most_common(len(Counter(emojis))))
93
+
94
+ return emoji_df
95
+
96
+ def monthly_timeline(selected_user,df):
97
+
98
+ if selected_user != 'Overall':
99
+ df = df[df['user'] == selected_user]
100
+
101
+ timeline = df.groupby(['year', 'month_num', 'month']).count()['message'].reset_index()
102
+
103
+ time = []
104
+ for i in range(timeline.shape[0]):
105
+ time.append(timeline['month'][i] + "-" + str(timeline['year'][i]))
106
+
107
+ timeline['time'] = time
108
+
109
+ return timeline
110
+
111
+ def daily_timeline(selected_user,df):
112
+
113
+ if selected_user != 'Overall':
114
+ df = df[df['user'] == selected_user]
115
+
116
+ daily_timeline = df.groupby('only_date').count()['message'].reset_index()
117
+
118
+ return daily_timeline
119
+
120
+ def week_activity_map(selected_user,df):
121
+
122
+ if selected_user != 'Overall':
123
+ df = df[df['user'] == selected_user]
124
+
125
+ return df['day_name'].value_counts()
126
+
127
+ def month_activity_map(selected_user,df):
128
+
129
+ if selected_user != 'Overall':
130
+ df = df[df['user'] == selected_user]
131
+
132
+ return df['month'].value_counts()
preprocessor.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pandas as pd
3
+
4
+ def preprocess(data):
5
+ print("Preprocess started")
6
+
7
+ pattern = r'\d{1,2}/\d{1,2}/\d{2,4},\s(?:1[0-2]|0?[1-9]):[0-5][0-9][\s\u202f\u00a0]?(?:AM|PM|am|pm)\s-\s'
8
+ messages = re.split(pattern, data)[1:]
9
+ date = re.findall(pattern, data)
10
+ print(f"Found {len(messages)} messages and {len(date)} dates")
11
+
12
+ dates = [d.replace('\u202f', ' ').replace('\u00a0', ' ') for d in date]
13
+ df = pd.DataFrame({'user_message': messages, 'message_date': dates})
14
+
15
+ try:
16
+ df['message_date'] = pd.to_datetime(df['message_date'], format='%d/%m/%y, %I:%M %p - ')
17
+ except Exception as e:
18
+ print("Date parsing error:", e)
19
+ return None
20
+
21
+ df.rename(columns={'message_date': 'date'}, inplace=True)
22
+
23
+ users = []
24
+ messages_list = []
25
+ for message in df['user_message']:
26
+ entry = re.split(r'([\w\W]+?):\s', message)
27
+ if entry[1:]: # user exists
28
+ users.append(entry[1])
29
+ messages_list.append(" ".join(entry[2:]))
30
+ else:
31
+ users.append('group_notification')
32
+ messages_list.append(entry[0])
33
+
34
+ df['user'] = users
35
+ df['message'] = messages_list
36
+
37
+ df.drop(columns=['user_message'], inplace=True)
38
+ df['only_date'] = df['date'].dt.date
39
+ df['year'] = df['date'].dt.year
40
+ df['month_num'] = df['date'].dt.month
41
+ df['month'] = df['date'].dt.month_name()
42
+ df['day'] = df['date'].dt.day
43
+ df['day_name'] = df['date'].dt.day_name()
44
+ df['hour'] = df['date'].dt.hour
45
+ df['minute'] = df['date'].dt.minute
46
+
47
+ return df
48
+
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
stop_hinglish.txt ADDED
@@ -0,0 +1,1055 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .
2
+ ..
3
+ ...
4
+ ?
5
+ -
6
+ --
7
+ 1
8
+ 2
9
+ 3
10
+ 4
11
+ 5
12
+ 6
13
+ 7
14
+ 8
15
+ 9
16
+ 0
17
+ a
18
+ aadi
19
+ aaj
20
+ aap
21
+ aapne
22
+ aata
23
+ aati
24
+ aaya
25
+ aaye
26
+ ab
27
+ abbe
28
+ abbey
29
+ abe
30
+ abhi
31
+ able
32
+ about
33
+ above
34
+ accha
35
+ according
36
+ accordingly
37
+ acha
38
+ achcha
39
+ across
40
+ actually
41
+ after
42
+ afterwards
43
+ again
44
+ against
45
+ agar
46
+ ain
47
+ aint
48
+ ain't
49
+ aisa
50
+ aise
51
+ aisi
52
+ alag
53
+ all
54
+ allow
55
+ allows
56
+ almost
57
+ alone
58
+ along
59
+ already
60
+ also
61
+ although
62
+ always
63
+ am
64
+ among
65
+ amongst
66
+ an
67
+ and
68
+ andar
69
+ another
70
+ any
71
+ anybody
72
+ anyhow
73
+ anyone
74
+ anything
75
+ anyway
76
+ anyways
77
+ anywhere
78
+ ap
79
+ apan
80
+ apart
81
+ apna
82
+ apnaa
83
+ apne
84
+ apni
85
+ appear
86
+ are
87
+ aren
88
+ arent
89
+ aren't
90
+ around
91
+ arre
92
+ as
93
+ aside
94
+ ask
95
+ asking
96
+ at
97
+ aur
98
+ avum
99
+ aya
100
+ aye
101
+ baad
102
+ baar
103
+ bad
104
+ bahut
105
+ bana
106
+ banae
107
+ banai
108
+ banao
109
+ banaya
110
+ banaye
111
+ banayi
112
+ banda
113
+ bande
114
+ bandi
115
+ bane
116
+ bani
117
+ bas
118
+ bata
119
+ batao
120
+ bc
121
+ be
122
+ became
123
+ because
124
+ become
125
+ becomes
126
+ becoming
127
+ been
128
+ before
129
+ beforehand
130
+ behind
131
+ being
132
+ below
133
+ beside
134
+ besides
135
+ best
136
+ better
137
+ between
138
+ beyond
139
+ bhai
140
+ bheetar
141
+ bhi
142
+ bhitar
143
+ bht
144
+ bilkul
145
+ bohot
146
+ bol
147
+ bola
148
+ bole
149
+ boli
150
+ bolo
151
+ bolta
152
+ bolte
153
+ bolti
154
+ both
155
+ brief
156
+ bro
157
+ btw
158
+ but
159
+ by
160
+ came
161
+ can
162
+ cannot
163
+ cant
164
+ can't
165
+ cause
166
+ causes
167
+ certain
168
+ certainly
169
+ chahiye
170
+ chaiye
171
+ chal
172
+ chalega
173
+ chhaiye
174
+ clearly
175
+ c'mon
176
+ com
177
+ come
178
+ comes
179
+ could
180
+ couldn
181
+ couldnt
182
+ couldn't
183
+ d
184
+ de
185
+ dede
186
+ dega
187
+ degi
188
+ dekh
189
+ dekha
190
+ dekhe
191
+ dekhi
192
+ dekho
193
+ denge
194
+ dhang
195
+ di
196
+ did
197
+ didn
198
+ didnt
199
+ didn't
200
+ dijiye
201
+ diya
202
+ diyaa
203
+ diye
204
+ diyo
205
+ do
206
+ does
207
+ doesn
208
+ doesnt
209
+ doesn't
210
+ doing
211
+ done
212
+ dono
213
+ dont
214
+ don't
215
+ doosra
216
+ doosre
217
+ down
218
+ downwards
219
+ dude
220
+ dunga
221
+ dungi
222
+ during
223
+ dusra
224
+ dusre
225
+ dusri
226
+ dvaara
227
+ dvara
228
+ dwaara
229
+ dwara
230
+ each
231
+ edu
232
+ eg
233
+ eight
234
+ either
235
+ ek
236
+ else
237
+ elsewhere
238
+ enough
239
+ etc
240
+ even
241
+ ever
242
+ every
243
+ everybody
244
+ everyone
245
+ everything
246
+ everywhere
247
+ ex
248
+ exactly
249
+ example
250
+ except
251
+ far
252
+ few
253
+ fifth
254
+ fir
255
+ first
256
+ five
257
+ followed
258
+ following
259
+ follows
260
+ for
261
+ forth
262
+ four
263
+ from
264
+ further
265
+ furthermore
266
+ gaya
267
+ gaye
268
+ gayi
269
+ get
270
+ gets
271
+ getting
272
+ ghar
273
+ given
274
+ gives
275
+ go
276
+ goes
277
+ going
278
+ gone
279
+ good
280
+ got
281
+ gotten
282
+ greetings
283
+ guys
284
+ haan
285
+ had
286
+ hadd
287
+ hadn
288
+ hadnt
289
+ hadn't
290
+ hai
291
+ hain
292
+ hamara
293
+ hamare
294
+ hamari
295
+ hamne
296
+ han
297
+ happens
298
+ har
299
+ hardly
300
+ has
301
+ hasn
302
+ hasnt
303
+ hasn't
304
+ have
305
+ haven
306
+ havent
307
+ haven't
308
+ having
309
+ he
310
+ hello
311
+ help
312
+ hence
313
+ her
314
+ here
315
+ hereafter
316
+ hereby
317
+ herein
318
+ here's
319
+ hereupon
320
+ hers
321
+ herself
322
+ he's
323
+ hi
324
+ him
325
+ himself
326
+ his
327
+ hither
328
+ hm
329
+ hmm
330
+ ho
331
+ hoga
332
+ hoge
333
+ hogi
334
+ hona
335
+ honaa
336
+ hone
337
+ honge
338
+ hongi
339
+ honi
340
+ hopefully
341
+ hota
342
+ hotaa
343
+ hote
344
+ hoti
345
+ how
346
+ howbeit
347
+ however
348
+ hoyenge
349
+ hoyengi
350
+ hu
351
+ hua
352
+ hue
353
+ huh
354
+ hui
355
+ hum
356
+ humein
357
+ humne
358
+ hun
359
+ huye
360
+ huyi
361
+ i
362
+ i'd
363
+ idk
364
+ ie
365
+ if
366
+ i'll
367
+ i'm
368
+ imo
369
+ in
370
+ inasmuch
371
+ inc
372
+ inhe
373
+ inhi
374
+ inho
375
+ inka
376
+ inkaa
377
+ inke
378
+ inki
379
+ inn
380
+ inner
381
+ inse
382
+ insofar
383
+ into
384
+ inward
385
+ is
386
+ ise
387
+ isi
388
+ iska
389
+ iskaa
390
+ iske
391
+ iski
392
+ isme
393
+ isn
394
+ isne
395
+ isnt
396
+ isn't
397
+ iss
398
+ isse
399
+ issi
400
+ isski
401
+ it
402
+ it'd
403
+ it'll
404
+ itna
405
+ itne
406
+ itni
407
+ itno
408
+ its
409
+ it's
410
+ itself
411
+ ityaadi
412
+ ityadi
413
+ i've
414
+ ja
415
+ jaa
416
+ jab
417
+ jabh
418
+ jaha
419
+ jahaan
420
+ jahan
421
+ jaisa
422
+ jaise
423
+ jaisi
424
+ jata
425
+ jayega
426
+ jidhar
427
+ jin
428
+ jinhe
429
+ jinhi
430
+ jinho
431
+ jinhone
432
+ jinka
433
+ jinke
434
+ jinki
435
+ jinn
436
+ jis
437
+ jise
438
+ jiska
439
+ jiske
440
+ jiski
441
+ jisme
442
+ jiss
443
+ jisse
444
+ jitna
445
+ jitne
446
+ jitni
447
+ jo
448
+ just
449
+ jyaada
450
+ jyada
451
+ k
452
+ ka
453
+ kaafi
454
+ kab
455
+ kabhi
456
+ kafi
457
+ kaha
458
+ kahaa
459
+ kahaan
460
+ kahan
461
+ kahi
462
+ kahin
463
+ kahte
464
+ kaisa
465
+ kaise
466
+ kaisi
467
+ kal
468
+ kam
469
+ kar
470
+ kara
471
+ kare
472
+ karega
473
+ karegi
474
+ karen
475
+ karenge
476
+ kari
477
+ karke
478
+ karna
479
+ karne
480
+ karni
481
+ karo
482
+ karta
483
+ karte
484
+ karti
485
+ karu
486
+ karun
487
+ karunga
488
+ karungi
489
+ kaun
490
+ kaunsa
491
+ kayi
492
+ kch
493
+ ke
494
+ keep
495
+ keeps
496
+ keh
497
+ kehte
498
+ kept
499
+ khud
500
+ ki
501
+ kin
502
+ kine
503
+ kinhe
504
+ kinho
505
+ kinka
506
+ kinke
507
+ kinki
508
+ kinko
509
+ kinn
510
+ kino
511
+ kis
512
+ kise
513
+ kisi
514
+ kiska
515
+ kiske
516
+ kiski
517
+ kisko
518
+ kisliye
519
+ kisne
520
+ kitna
521
+ kitne
522
+ kitni
523
+ kitno
524
+ kiya
525
+ kiye
526
+ know
527
+ known
528
+ knows
529
+ ko
530
+ koi
531
+ kon
532
+ konsa
533
+ koyi
534
+ krna
535
+ krne
536
+ kuch
537
+ kuchch
538
+ kuchh
539
+ kul
540
+ kull
541
+ kya
542
+ kyaa
543
+ kyu
544
+ kyuki
545
+ kyun
546
+ kyunki
547
+ lagta
548
+ lagte
549
+ lagti
550
+ last
551
+ lately
552
+ later
553
+ le
554
+ least
555
+ lekar
556
+ lekin
557
+ less
558
+ lest
559
+ let
560
+ let's
561
+ li
562
+ like
563
+ liked
564
+ likely
565
+ little
566
+ liya
567
+ liye
568
+ ll
569
+ lo
570
+ log
571
+ logon
572
+ lol
573
+ look
574
+ looking
575
+ looks
576
+ ltd
577
+ lunga
578
+ m
579
+ maan
580
+ maana
581
+ maane
582
+ maani
583
+ maano
584
+ magar
585
+ mai
586
+ main
587
+ maine
588
+ mainly
589
+ mana
590
+ mane
591
+ mani
592
+ mano
593
+ many
594
+ mat
595
+ may
596
+ maybe
597
+ me
598
+ mean
599
+ meanwhile
600
+ mein
601
+ mera
602
+ mere
603
+ merely
604
+ meri
605
+ might
606
+ mightn
607
+ mightnt
608
+ mightn't
609
+ mil
610
+ mjhe
611
+ more
612
+ moreover
613
+ most
614
+ mostly
615
+ much
616
+ mujhe
617
+ must
618
+ mustn
619
+ mustnt
620
+ mustn't
621
+ my
622
+ myself
623
+ na
624
+ naa
625
+ naah
626
+ nahi
627
+ nahin
628
+ nai
629
+ name
630
+ namely
631
+ nd
632
+ ne
633
+ near
634
+ nearly
635
+ necessary
636
+ neeche
637
+ need
638
+ needn
639
+ neednt
640
+ needn't
641
+ needs
642
+ neither
643
+ never
644
+ nevertheless
645
+ new
646
+ next
647
+ nhi
648
+ nine
649
+ no
650
+ nobody
651
+ non
652
+ none
653
+ noone
654
+ nope
655
+ nor
656
+ normally
657
+ not
658
+ nothing
659
+ novel
660
+ now
661
+ nowhere
662
+ o
663
+ obviously
664
+ of
665
+ off
666
+ often
667
+ oh
668
+ ok
669
+ okay
670
+ old
671
+ on
672
+ once
673
+ one
674
+ ones
675
+ only
676
+ onto
677
+ or
678
+ other
679
+ others
680
+ otherwise
681
+ ought
682
+ our
683
+ ours
684
+ ourselves
685
+ out
686
+ outside
687
+ over
688
+ overall
689
+ own
690
+ par
691
+ pata
692
+ pe
693
+ pehla
694
+ pehle
695
+ pehli
696
+ people
697
+ per
698
+ perhaps
699
+ phla
700
+ phle
701
+ phli
702
+ placed
703
+ please
704
+ plus
705
+ poora
706
+ poori
707
+ provides
708
+ pura
709
+ puri
710
+ q
711
+ que
712
+ quite
713
+ raha
714
+ rahaa
715
+ rahe
716
+ rahi
717
+ rakh
718
+ rakha
719
+ rakhe
720
+ rakhen
721
+ rakhi
722
+ rakho
723
+ rather
724
+ re
725
+ really
726
+ reasonably
727
+ regarding
728
+ regardless
729
+ regards
730
+ rehte
731
+ rha
732
+ rhaa
733
+ rhe
734
+ rhi
735
+ ri
736
+ right
737
+ s
738
+ sa
739
+ saara
740
+ saare
741
+ saath
742
+ sab
743
+ sabhi
744
+ sabse
745
+ sahi
746
+ said
747
+ sakta
748
+ saktaa
749
+ sakte
750
+ sakti
751
+ same
752
+ sang
753
+ sara
754
+ sath
755
+ saw
756
+ say
757
+ saying
758
+ says
759
+ se
760
+ second
761
+ secondly
762
+ see
763
+ seeing
764
+ seem
765
+ seemed
766
+ seeming
767
+ seems
768
+ seen
769
+ self
770
+ selves
771
+ sensible
772
+ sent
773
+ serious
774
+ seriously
775
+ seven
776
+ several
777
+ shall
778
+ shan
779
+ shant
780
+ shan't
781
+ she
782
+ she's
783
+ should
784
+ shouldn
785
+ shouldnt
786
+ shouldn't
787
+ should've
788
+ si
789
+ sir
790
+ sir.
791
+ since
792
+ six
793
+ so
794
+ soch
795
+ some
796
+ somebody
797
+ somehow
798
+ someone
799
+ something
800
+ sometime
801
+ sometimes
802
+ somewhat
803
+ somewhere
804
+ soon
805
+ still
806
+ sub
807
+ such
808
+ sup
809
+ sure
810
+ t
811
+ tab
812
+ tabh
813
+ tak
814
+ take
815
+ taken
816
+ tarah
817
+ teen
818
+ teeno
819
+ teesra
820
+ teesre
821
+ teesri
822
+ tell
823
+ tends
824
+ tera
825
+ tere
826
+ teri
827
+ th
828
+ tha
829
+ than
830
+ thank
831
+ thanks
832
+ thanx
833
+ that
834
+ that'll
835
+ thats
836
+ that's
837
+ the
838
+ theek
839
+ their
840
+ theirs
841
+ them
842
+ themselves
843
+ then
844
+ thence
845
+ there
846
+ thereafter
847
+ thereby
848
+ therefore
849
+ therein
850
+ theres
851
+ there's
852
+ thereupon
853
+ these
854
+ they
855
+ they'd
856
+ they'll
857
+ they're
858
+ they've
859
+ thi
860
+ thik
861
+ thing
862
+ think
863
+ thinking
864
+ third
865
+ this
866
+ tho
867
+ thoda
868
+ thodi
869
+ thorough
870
+ thoroughly
871
+ those
872
+ though
873
+ thought
874
+ three
875
+ through
876
+ throughout
877
+ thru
878
+ thus
879
+ tjhe
880
+ to
881
+ together
882
+ toh
883
+ too
884
+ took
885
+ toward
886
+ towards
887
+ tried
888
+ tries
889
+ true
890
+ truly
891
+ try
892
+ trying
893
+ tu
894
+ tujhe
895
+ tum
896
+ tumhara
897
+ tumhare
898
+ tumhari
899
+ tune
900
+ twice
901
+ two
902
+ um
903
+ umm
904
+ un
905
+ under
906
+ unhe
907
+ unhi
908
+ unho
909
+ unhone
910
+ unka
911
+ unkaa
912
+ unke
913
+ unki
914
+ unko
915
+ unless
916
+ unlikely
917
+ unn
918
+ unse
919
+ until
920
+ unto
921
+ up
922
+ upar
923
+ upon
924
+ us
925
+ use
926
+ used
927
+ useful
928
+ uses
929
+ usi
930
+ using
931
+ uska
932
+ uske
933
+ usne
934
+ uss
935
+ usse
936
+ ussi
937
+ usually
938
+ vaala
939
+ vaale
940
+ vaali
941
+ vahaan
942
+ vahan
943
+ vahi
944
+ vahin
945
+ vaisa
946
+ vaise
947
+ vaisi
948
+ vala
949
+ vale
950
+ vali
951
+ various
952
+ ve
953
+ very
954
+ via
955
+ viz
956
+ vo
957
+ waala
958
+ waale
959
+ waali
960
+ wagaira
961
+ wagairah
962
+ wagerah
963
+ waha
964
+ wahaan
965
+ wahan
966
+ wahi
967
+ wahin
968
+ waisa
969
+ waise
970
+ waisi
971
+ wala
972
+ wale
973
+ wali
974
+ want
975
+ wants
976
+ was
977
+ wasn
978
+ wasnt
979
+ wasn't
980
+ way
981
+ we
982
+ we'd
983
+ well
984
+ we'll
985
+ went
986
+ were
987
+ we're
988
+ weren
989
+ werent
990
+ weren't
991
+ we've
992
+ what
993
+ whatever
994
+ what's
995
+ when
996
+ whence
997
+ whenever
998
+ where
999
+ whereafter
1000
+ whereas
1001
+ whereby
1002
+ wherein
1003
+ where's
1004
+ whereupon
1005
+ wherever
1006
+ whether
1007
+ which
1008
+ while
1009
+ who
1010
+ whoever
1011
+ whole
1012
+ whom
1013
+ who's
1014
+ whose
1015
+ why
1016
+ will
1017
+ willing
1018
+ with
1019
+ within
1020
+ without
1021
+ wo
1022
+ woh
1023
+ wohi
1024
+ won
1025
+ wont
1026
+ won't
1027
+ would
1028
+ wouldn
1029
+ wouldnt
1030
+ wouldn't
1031
+ y
1032
+ ya
1033
+ yadi
1034
+ yah
1035
+ yaha
1036
+ yahaan
1037
+ yahan
1038
+ yahi
1039
+ yahin
1040
+ ye
1041
+ yeah
1042
+ yeh
1043
+ yehi
1044
+ yes
1045
+ yet
1046
+ you
1047
+ you'd
1048
+ you'll
1049
+ your
1050
+ you're
1051
+ yours
1052
+ yourself
1053
+ yourselves
1054
+ you've
1055
+ yup
streamlit_app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import preprocessor,helper
3
+ import matplotlib.pyplot as plt
4
+
5
+ st.markdown(
6
+ "<h1 style='text-align: center;'>Chat Analysis Space</h1>",
7
+ unsafe_allow_html=True
8
+ )
9
+
10
+ st.sidebar.title("Whatsapp Chat Analyzer")
11
+ uploaded_file = st.sidebar.file_uploader("Choose a file")
12
+ if uploaded_file is not None:
13
+ bytes_data = uploaded_file.getvalue()
14
+ data = bytes_data.decode("utf-8")
15
+ df = preprocessor.preprocess(data)
16
+
17
+
18
+ #fetch uniquw users
19
+ user_list=df["user"].unique().tolist()
20
+ user_list.remove("group_notification")
21
+ user_list.sort()
22
+ user_list.insert(0,"Overall")
23
+ selected_user=st.sidebar.selectbox("Show Analysis wrt",user_list)
24
+
25
+ if st.sidebar.button("Show Analysis"):
26
+ num_messages,words, num_media_messages,links=helper.fetch_stats(selected_user,df)
27
+ st.title("Top Statistics")
28
+ col1, col2, col3, col4 = st.columns(4)
29
+
30
+ with col1:
31
+ st.header("Total Message")
32
+ st.title(num_messages)
33
+
34
+ with col2:
35
+ st.header("Total Words")
36
+ st.title(words)
37
+
38
+ with col3:
39
+ st.header("Media Shared")
40
+ st.title(num_media_messages)
41
+
42
+ with col4:
43
+ st.header("Links Shared")
44
+ st.title(links)
45
+
46
+ if selected_user=="overall":
47
+ col1,col2=st.beta_columns(2)
48
+
49
+ # monthly timeline
50
+ st.title("Monthly Timeline")
51
+ timeline = helper.monthly_timeline(selected_user,df)
52
+ fig,ax = plt.subplots()
53
+ ax.plot(timeline['time'], timeline['message'],color='green')
54
+ plt.xticks(rotation='vertical')
55
+ st.pyplot(fig)
56
+
57
+ # daily timeline
58
+ st.title("Daily Timeline")
59
+ daily_timeline = helper.daily_timeline(selected_user, df)
60
+ fig, ax = plt.subplots()
61
+ ax.plot(daily_timeline['only_date'], daily_timeline['message'], color='black')
62
+ plt.xticks(rotation='vertical')
63
+ st.pyplot(fig)
64
+
65
+
66
+ # activity map
67
+ st.title('Activity Map')
68
+ col1,col2 = st.columns(2)
69
+
70
+ with col1:
71
+ st.header("Most busy day")
72
+ busy_day = helper.week_activity_map(selected_user,df)
73
+ fig,ax = plt.subplots()
74
+ ax.bar(busy_day.index,busy_day.values,color='purple')
75
+ plt.xticks(rotation='vertical')
76
+ st.pyplot(fig)
77
+
78
+ with col2:
79
+ st.header("Most busy month")
80
+ busy_month = helper.month_activity_map(selected_user, df)
81
+ fig, ax = plt.subplots()
82
+ ax.bar(busy_month.index, busy_month.values,color='orange')
83
+ plt.xticks(rotation='vertical')
84
+ st.pyplot(fig)
85
+
86
+
87
+
88
+ # finding the busiest users in the group(Group level)
89
+ if selected_user == 'Overall':
90
+ st.title('Most Busy Users')
91
+
92
+ x, new_df = helper.most_busy_users(df)
93
+ fig, ax = plt.subplots()
94
+
95
+ col1, col2 = st.columns(2)
96
+
97
+ with col1:
98
+ ax.bar(x.index, x.values, color='red')
99
+ #ax.set_xlabel('Users')
100
+ #ax.set_ylabel('Message Count')
101
+ #ax.set_title('Top 5 Most Active Users')
102
+ plt.xticks(rotation='vertical')
103
+ st.pyplot(fig)
104
+
105
+ with col2:
106
+ st.dataframe(new_df)
107
+
108
+ # WordCloud
109
+ st.title("Wordcloud")
110
+ df_wc = helper.create_wordcloud(selected_user,df)
111
+ fig,ax = plt.subplots()
112
+ ax.imshow(df_wc)
113
+ st.pyplot(fig)
114
+
115
+
116
+ # most common words
117
+ most_common_df = helper.most_common_words(selected_user,df)
118
+
119
+ fig,ax = plt.subplots()
120
+
121
+ ax.barh(most_common_df[0],most_common_df[1])
122
+ plt.xticks(rotation='vertical')
123
+
124
+ st.title('Most commmon words')
125
+ st.pyplot(fig)
126
+
127
+ # emoji analysis
128
+ emoji_df = helper.emoji_helper(selected_user,df)
129
+ st.title("Emoji Analysis")
130
+
131
+ col1,col2 = st.columns(2)
132
+
133
+ with col1:
134
+ st.dataframe(emoji_df)
135
+ with col2:
136
+ fig,ax = plt.subplots()
137
+ ax.pie(emoji_df[1].head(),labels=emoji_df[0].head(),autopct="%0.2f")
138
+ st.pyplot(fig)
139
+
140
+
141
+