officialamit558 commited on
Commit
e08c4d2
·
verified ·
1 Parent(s): 881d42a

Upload 11 files

Browse files
Files changed (12) hide show
  1. .gitattributes +1 -0
  2. Designer.png +3 -0
  3. Procfile +1 -0
  4. app.py +202 -0
  5. apps.py +140 -0
  6. downloadfile.jpg +0 -0
  7. helper.py +126 -0
  8. preprocessor.py +56 -0
  9. preprocessor2.py +0 -0
  10. requirements.txt +8 -0
  11. setup.sh +9 -0
  12. stop_hinglish.txt +1055 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Designer.png filter=lfs diff=lfs merge=lfs -text
Designer.png ADDED

Git LFS Details

  • SHA256: ae46bc71798d18bbcdba3f08836dbffd294f8f926d107b2d4fa2f4a342afd4b8
  • Pointer size: 132 Bytes
  • Size of remote file: 1.75 MB
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: sh setup.sh && streamlit run app.py
app.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import preprocessor, helper
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+
6
+ # Define sections
7
+ def home():
8
+ st.title("Welcome to WhatsApp Chat Analyzer")
9
+ st.markdown("""
10
+ ## Product Description
11
+ The WhatsApp Chat Analyzer is a powerful tool designed to help you gain insights from your WhatsApp chat data. By simply uploading your chat file, you can explore various metrics and visualizations that reveal patterns and trends in your conversations.
12
+
13
+ ## Features
14
+ - **Total Messages**: Count the total number of messages exchanged.
15
+ - **Word Analysis**: Analyze the total words used in the chat.
16
+ - **Media Analysis**: Count the number of media files shared.
17
+ - **Link Analysis**: Count the number of links shared.
18
+ - **Sentiment Analysis**: Understand the sentiment of the conversation.
19
+ - **Emoji Analysis**: Analyze the emojis used in the chat.
20
+ - **Monthly Timeline**: Visualize the number of messages sent each month.
21
+ - **Daily Timeline**: See daily message trends.
22
+ - **Activity Maps**: Discover the most active days and months.
23
+ - **Word Clouds**: Generate word clouds to see the most frequently used words.
24
+ - **Common Words**: Identify the most common words in the chat.
25
+ - **Busy Users**: Find out who is the most active user in the group.
26
+
27
+ ## How to Use It
28
+ 1. **Upload File**: Choose your WhatsApp chat file (in `.txt` format) from the sidebar.
29
+ 2. **Select User**: Select the user for whom you want to see the analysis or choose 'Overall' for group analysis.
30
+ 3. **View Analysis**: Click on 'Show Analysis' to view detailed insights and visualizations.
31
+
32
+ ## Restrictions
33
+ - The app currently supports only `.txt` files exported from WhatsApp.
34
+ - Ensure the chat file is not too large to avoid performance issues.
35
+
36
+ ## Memory Size
37
+ - The app can handle files up to 200MB efficiently.
38
+ - Larger files may take more time to process and could impact performance.
39
+
40
+ ## Privacy and Security
41
+ - **Data Privacy**: Your uploaded chat data is processed locally and not stored on any server.
42
+ - **Security**: The app does not share your data with third parties. All processing is done in-memory and the data is discarded after the session ends.
43
+
44
+ Enjoy analyzing your WhatsApp conversations with our interactive tools!
45
+ """)
46
+
47
+ # Add images
48
+ image_path1 = "Designer.png"
49
+ image_path = "downloadfile.jpg" # Update with the correct path to your image
50
+ st.image(image_path, caption="WhatsApp Chat Analysis", width=400)
51
+ st.image(image_path1, caption="WhatsApp Chat Analysis", width=400)
52
+
53
+ def analyze():
54
+ st.title("WhatsApp Chat Analysis")
55
+ st.sidebar.title("WhatsApp Chat Analyzer")
56
+ uploaded_file = st.sidebar.file_uploader("Choose a file")
57
+
58
+ if uploaded_file is not None:
59
+ bytes_data = uploaded_file.getvalue()
60
+ data = bytes_data.decode("utf-8")
61
+ df = preprocessor.preprocess(data)
62
+
63
+ # fetch unique users
64
+ user_list = df['user'].unique().tolist()
65
+ user_list.remove('group_notification')
66
+ user_list.sort()
67
+ user_list.insert(0, "Overall")
68
+
69
+ selected_user = st.sidebar.selectbox("Show analysis wrt", user_list)
70
+
71
+ if st.sidebar.button("Show Analysis"):
72
+ # Stats Area
73
+ num_messages, words, num_media_messages, num_links = helper.fetch_stats(selected_user, df)
74
+ st.title("Top Statistics")
75
+ col1, col2, col3, col4 = st.columns(4)
76
+
77
+ with col1:
78
+ st.header("Total Messages")
79
+ st.title(num_messages)
80
+ with col2:
81
+ st.header("Total Words")
82
+ st.title(words)
83
+ with col3:
84
+ st.header("Media Shared")
85
+ st.title(num_media_messages)
86
+ with col4:
87
+ st.header("Links Shared")
88
+ st.title(num_links)
89
+
90
+ st.title("Sentiment Analysis")
91
+ sentiment_df = helper.sentiment_analysis(selected_user, df)
92
+ fig, ax = plt.subplots()
93
+ ax.bar(sentiment_df['sentiment_label'], sentiment_df['message'], color=['red', 'blue', 'green'])
94
+ st.pyplot(fig)
95
+
96
+ # Emoji analysis
97
+ st.title("Emoji Analysis")
98
+ emoji_df = helper.emoji_helper(selected_user, df)
99
+ col1, col2 = st.columns(2)
100
+
101
+ with col1:
102
+ st.dataframe(emoji_df)
103
+ with col2:
104
+ fig, ax = plt.subplots()
105
+ ax.pie(emoji_df[1].head(), labels=emoji_df[0].head(), autopct="%0.2f")
106
+ st.pyplot(fig)
107
+
108
+ # Monthly timeline
109
+ st.title("Monthly Timeline")
110
+ timeline = helper.monthly_timeline(selected_user, df)
111
+ fig, ax = plt.subplots()
112
+ ax.plot(timeline['time'], timeline['message'], color='green')
113
+ plt.xticks(rotation='vertical')
114
+ st.pyplot(fig)
115
+
116
+ # Daily timeline
117
+ st.title("Daily Timeline")
118
+ daily_timeline = helper.daily_timeline(selected_user, df)
119
+ fig, ax = plt.subplots()
120
+ ax.plot(daily_timeline['only_date'], daily_timeline['message'], color='black')
121
+ plt.xticks(rotation='vertical')
122
+ st.pyplot(fig)
123
+
124
+ # Activity map
125
+ st.title('Activity Map')
126
+ col1, col2 = st.columns(2)
127
+
128
+ with col1:
129
+ st.header("Most Busy Day")
130
+ busy_day = helper.week_activity_map(selected_user, df)
131
+ fig, ax = plt.subplots()
132
+ ax.bar(busy_day.index, busy_day.values, color='purple')
133
+ plt.xticks(rotation='vertical')
134
+ st.pyplot(fig)
135
+
136
+ with col2:
137
+ st.header("Most Busy Month")
138
+ busy_month = helper.month_activity_map(selected_user, df)
139
+ fig, ax = plt.subplots()
140
+ ax.bar(busy_month.index, busy_month.values, color='orange')
141
+ plt.xticks(rotation='vertical')
142
+ st.pyplot(fig)
143
+
144
+ st.title("Weekly Activity Map")
145
+ user_heatmap = helper.activity_heatmap(selected_user, df)
146
+ fig, ax = plt.subplots()
147
+ ax = sns.heatmap(user_heatmap)
148
+ st.pyplot(fig)
149
+
150
+ # Finding the busiest users in the group (Group level)
151
+ if selected_user == 'Overall':
152
+ st.title('Most Busy Users')
153
+ x, new_df = helper.most_busy_users(df)
154
+ fig, ax = plt.subplots()
155
+
156
+ col1, col2 = st.columns(2)
157
+
158
+ with col1:
159
+ ax.bar(x.index, x.values, color='red')
160
+ plt.xticks(rotation='vertical')
161
+ st.pyplot(fig)
162
+ with col2:
163
+ st.dataframe(new_df)
164
+
165
+ # WordCloud
166
+ st.title("Wordcloud")
167
+ df_wc = helper.create_wordcloud(selected_user, df)
168
+ fig, ax = plt.subplots()
169
+ ax.imshow(df_wc)
170
+ st.pyplot(fig)
171
+
172
+ # Most common words
173
+ most_common_df = helper.most_common_words(selected_user, df)
174
+ fig, ax = plt.subplots()
175
+ ax.barh(most_common_df[0], most_common_df[1])
176
+ plt.xticks(rotation='vertical')
177
+
178
+ st.title('Most Common Words')
179
+ st.pyplot(fig)
180
+
181
+ # Emoji analysis (repeated)
182
+ st.title("Emoji Analysis")
183
+ emoji_df = helper.emoji_helper(selected_user, df)
184
+ col1, col2 = st.columns(2)
185
+
186
+ with col1:
187
+ st.dataframe(emoji_df)
188
+ with col2:
189
+ fig, ax = plt.subplots()
190
+ ax.pie(emoji_df[1].head(), labels=emoji_df[0].head(), autopct="%0.2f")
191
+ st.pyplot(fig)
192
+
193
+ # Sidebar navigation
194
+ st.sidebar.title(" Whatsapp Chat Analyzer ")
195
+ page = st.sidebar.radio("Go to", ["Home", "Analyze"])
196
+
197
+ # Display the selected section
198
+ if page == "Home":
199
+ home()
200
+ elif page == "Analyze":
201
+ analyze()
202
+
apps.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import preprocessor,helper
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+
6
+ st.sidebar.title("Whatsapp Chat Analyzer")
7
+
8
+ uploaded_file = st.sidebar.file_uploader("Choose a file")
9
+
10
+ if uploaded_file is not None:
11
+ bytes_data = uploaded_file.getvalue()
12
+ data = bytes_data.decode("utf-8")
13
+ df = preprocessor.preprocess(data)
14
+
15
+ # fetch unique users
16
+ user_list = df['user'].unique().tolist()
17
+ user_list.remove('group_notification')
18
+ user_list.sort()
19
+ user_list.insert(0,"Overall")
20
+
21
+ selected_user = st.sidebar.selectbox("Show analysis wrt",user_list)
22
+
23
+ if st.sidebar.button("Show Analysis"):
24
+
25
+ # Stats Area
26
+ num_messages, words, num_media_messages, num_links = helper.fetch_stats(selected_user,df)
27
+ st.title("Top Statistics")
28
+ col1, col2, col3, col4 = st.columns(4)
29
+
30
+ with col1:
31
+ st.header("Total Messages")
32
+ st.title(num_messages)
33
+ with col2:
34
+ st.header("Total Words")
35
+ st.title(words)
36
+ with col3:
37
+ st.header("Media Shared")
38
+ st.title(num_media_messages)
39
+ with col4:
40
+ st.header("Links Shared")
41
+ st.title(num_links)
42
+
43
+ # monthly timeline
44
+ st.title("Monthly Timeline")
45
+ timeline = helper.monthly_timeline(selected_user,df)
46
+ fig,ax = plt.subplots()
47
+ ax.plot(timeline['time'], timeline['message'],color='green')
48
+ plt.xticks(rotation='vertical')
49
+ st.pyplot(fig)
50
+
51
+ # daily timeline
52
+ st.title("Daily Timeline")
53
+ daily_timeline = helper.daily_timeline(selected_user, df)
54
+ fig, ax = plt.subplots()
55
+ ax.plot(daily_timeline['only_date'], daily_timeline['message'], color='black')
56
+ plt.xticks(rotation='vertical')
57
+ st.pyplot(fig)
58
+
59
+ # activity map
60
+ st.title('Activity Map')
61
+ col1,col2 = st.columns(2)
62
+
63
+ with col1:
64
+ st.header("Most busy day")
65
+ busy_day = helper.week_activity_map(selected_user,df)
66
+ fig,ax = plt.subplots()
67
+ ax.bar(busy_day.index,busy_day.values,color='purple')
68
+ plt.xticks(rotation='vertical')
69
+ st.pyplot(fig)
70
+
71
+ with col2:
72
+ st.header("Most busy month")
73
+ busy_month = helper.month_activity_map(selected_user, df)
74
+ fig, ax = plt.subplots()
75
+ ax.bar(busy_month.index, busy_month.values,color='orange')
76
+ plt.xticks(rotation='vertical')
77
+ st.pyplot(fig)
78
+
79
+ st.title("Weekly Activity Map")
80
+ user_heatmap = helper.activity_heatmap(selected_user,df)
81
+ fig,ax = plt.subplots()
82
+ ax = sns.heatmap(user_heatmap)
83
+ st.pyplot(fig)
84
+
85
+ # finding the busiest users in the group(Group level)
86
+ if selected_user == 'Overall':
87
+ st.title('Most Busy Users')
88
+ x,new_df = helper.most_busy_users(df)
89
+ fig, ax = plt.subplots()
90
+
91
+ col1, col2 = st.columns(2)
92
+
93
+ with col1:
94
+ ax.bar(x.index, x.values,color='red')
95
+ plt.xticks(rotation='vertical')
96
+ st.pyplot(fig)
97
+ with col2:
98
+ st.dataframe(new_df)
99
+
100
+ # WordCloud
101
+ st.title("Wordcloud")
102
+ df_wc = helper.create_wordcloud(selected_user,df)
103
+ fig,ax = plt.subplots()
104
+ ax.imshow(df_wc)
105
+ st.pyplot(fig)
106
+
107
+ # most common words
108
+ most_common_df = helper.most_common_words(selected_user,df)
109
+
110
+ fig,ax = plt.subplots()
111
+
112
+ ax.barh(most_common_df[0],most_common_df[1])
113
+ plt.xticks(rotation='vertical')
114
+
115
+ st.title('Most commmon words')
116
+ st.pyplot(fig)
117
+
118
+ # emoji analysis
119
+ emoji_df = helper.emoji_helper(selected_user,df)
120
+ st.title("Emoji Analysis")
121
+
122
+ col1,col2 = st.columns(2)
123
+
124
+ with col1:
125
+ st.dataframe(emoji_df)
126
+ with col2:
127
+ fig,ax = plt.subplots()
128
+ ax.pie(emoji_df[1].head(),labels=emoji_df[0].head(),autopct="%0.2f")
129
+ st.pyplot(fig)
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
downloadfile.jpg ADDED
helper.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from urlextract import URLExtract
2
+ from wordcloud import WordCloud
3
+ import pandas as pd
4
+ from collections import Counter
5
+ import emoji
6
+ from nltk.sentiment.vader import SentimentIntensityAnalyzer
7
+ import nltk
8
+
9
+ nltk.download('vader_lexicon')
10
+
11
+ extract = URLExtract()
12
+ sentiment_analyzer = SentimentIntensityAnalyzer()
13
+
14
+ def fetch_stats(selected_user, df):
15
+ if selected_user != 'Overall':
16
+ df = df[df['user'] == selected_user]
17
+
18
+ # Number of messahes
19
+ num_messages = df.shape[0]
20
+
21
+ # fetch the total number of words
22
+ words = []
23
+ for message in df['message']:
24
+ words.extend(message.split())
25
+
26
+ # fetch number of media messages
27
+ num_media_messages = df[df['message'] == '<Media omitted>\n'].shape[0]
28
+
29
+ # fetch number of links shared
30
+ links = []
31
+ for message in df['message']:
32
+ links.extend(extract.find_urls(message))
33
+
34
+ return num_messages,len(words),num_media_messages,len(links)
35
+ #return num_messages, len(words), num_media_messages, len(links)
36
+
37
+ def most_busy_users(df):
38
+ x = df['user'].value_counts().head()
39
+ df = round((df['user'].value_counts() / df.shape[0]) * 100, 2).reset_index().rename(
40
+ columns={'index': 'name', 'user': 'percent'})
41
+ return x, df
42
+
43
+ def create_wordcloud(selected_user, df):
44
+ f = open('stop_hinglish.txt', 'r')
45
+ stop_words = f.read().splitlines()
46
+
47
+ if selected_user != 'Overall':
48
+ df = df[df['user'] == selected_user]
49
+
50
+ temp = df[df['user'] != 'group_notification']
51
+ temp = temp[temp['message'] != '<Media omitted>\n']
52
+
53
+ def remove_stop_words(message):
54
+ y = [word for word in message.lower().split() if word not in stop_words]
55
+ return " ".join(y)
56
+
57
+ wc = WordCloud(width=500, height=500, min_font_size=10, background_color='white')
58
+ temp['message'] = temp['message'].apply(remove_stop_words)
59
+ df_wc = wc.generate(temp['message'].str.cat(sep=" "))
60
+ return df_wc
61
+
62
+ def most_common_words(selected_user, df):
63
+ f = open('stop_hinglish.txt', 'r')
64
+ stop_words = f.read().splitlines()
65
+
66
+ if selected_user != 'Overall':
67
+ df = df[df['user'] == selected_user]
68
+
69
+ temp = df[df['user'] != 'group_notification']
70
+ temp = temp[temp['message'] != '<Media omitted>\n']
71
+
72
+ words = [word for message in temp['message'] for word in message.lower().split() if word not in stop_words]
73
+ most_common_df = pd.DataFrame(Counter(words).most_common(20))
74
+ return most_common_df
75
+
76
+ def emoji_helper(selected_user, df):
77
+ if selected_user != 'Overall':
78
+ df = df[df['user'] == selected_user]
79
+
80
+ emojis = [c for message in df['message'] for c in message if c in emoji.UNICODE_EMOJI['en']]
81
+ emoji_df = pd.DataFrame(Counter(emojis).most_common(len(Counter(emojis))))
82
+ return emoji_df
83
+
84
+ def monthly_timeline(selected_user, df):
85
+ if selected_user != 'Overall':
86
+ df = df[df['user'] == selected_user]
87
+
88
+ timeline = df.groupby(['year', 'month_num', 'month']).count()['message'].reset_index()
89
+ timeline['time'] = timeline['month'] + "-" + timeline['year'].astype(str)
90
+ return timeline
91
+
92
+ def daily_timeline(selected_user, df):
93
+ if selected_user != 'Overall':
94
+ df = df[df['user'] == selected_user]
95
+
96
+ daily_timeline = df.groupby('only_date').count()['message'].reset_index()
97
+ return daily_timeline
98
+
99
+ def week_activity_map(selected_user, df):
100
+ if selected_user != 'Overall':
101
+ df = df[df['user'] == selected_user]
102
+
103
+ return df['day_name'].value_counts()
104
+
105
+ def month_activity_map(selected_user, df):
106
+ if selected_user != 'Overall':
107
+ df = df[df['user'] == selected_user]
108
+
109
+ return df['month'].value_counts()
110
+
111
+ def activity_heatmap(selected_user, df):
112
+ if selected_user != 'Overall':
113
+ df = df[df['user'] == selected_user]
114
+
115
+ user_heatmap = df.pivot_table(index='day_name', columns='period', values='message', aggfunc='count').fillna(0)
116
+ return user_heatmap
117
+
118
+ def sentiment_analysis(selected_user, df):
119
+ if selected_user != 'Overall':
120
+ df = df[df['user'] == selected_user]
121
+
122
+ df['sentiment'] = df['message'].apply(lambda x: sentiment_analyzer.polarity_scores(x))
123
+ df['sentiment_score'] = df['sentiment'].apply(lambda x: x['compound'])
124
+ df['sentiment_label'] = df['sentiment_score'].apply(lambda x: 'Positive' if x > 0 else ('Negative' if x < 0 else 'Neutral'))
125
+ sentiment_df = df.groupby('sentiment_label').count()['message'].reset_index()
126
+ return sentiment_df
preprocessor.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pandas as pd
3
+ import nltk
4
+ from nltk.corpus import stopwords
5
+ from nltk.tokenize import word_tokenize
6
+
7
+ nltk.download('stopwords')
8
+ nltk.download('punkt')
9
+
10
+ def preprocess(data):
11
+ pattern = '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s'
12
+
13
+ messages = re.split(pattern, data)[1:]
14
+ dates = re.findall(pattern, data)
15
+
16
+ df = pd.DataFrame({'user_message': messages, 'message_date': dates})
17
+ df['message_date'] = pd.to_datetime(df['message_date'], format='%d/%m/%Y, %H:%M - ')
18
+
19
+ df.rename(columns={'message_date': 'date'}, inplace=True)
20
+
21
+ users = []
22
+ messages = []
23
+ for message in df['user_message']:
24
+ entry = re.split('([\w\W]+?):\s', message)
25
+ if entry[1:]:
26
+ users.append(entry[1])
27
+ messages.append(" ".join(entry[2:]))
28
+ else:
29
+ users.append('group_notification')
30
+ messages.append(entry[0])
31
+
32
+ df['user'] = users
33
+ df['message'] = messages
34
+ df.drop(columns=['user_message'], inplace=True)
35
+
36
+ df['only_date'] = df['date'].dt.date
37
+ df['year'] = df['date'].dt.year
38
+ df['month_num'] = df['date'].dt.month
39
+ df['month'] = df['date'].dt.month_name()
40
+ df['day'] = df['date'].dt.day
41
+ df['day_name'] = df['date'].dt.day_name()
42
+ df['hour'] = df['date'].dt.hour
43
+ df['minute'] = df['date'].dt.minute
44
+
45
+ period = []
46
+ for hour in df[['day_name', 'hour']]['hour']:
47
+ if hour == 23:
48
+ period.append(str(hour) + "-" + str('00'))
49
+ elif hour == 0:
50
+ period.append(str('00') + "-" + str(hour + 1))
51
+ else:
52
+ period.append(str(hour) + "-" + str(hour + 1))
53
+
54
+ df['period'] = period
55
+
56
+ return df
preprocessor2.py ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ matplotlib
3
+ seaborn
4
+ urlextract
5
+ wordcloud
6
+ pandas
7
+ emoji
8
+ nltk
setup.sh ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ mkdir -p ~/.streamlit/
2
+
3
+ echo "\
4
+ [server]\n\
5
+ port = $PORT\n\
6
+ enableCORS = false\n\
7
+ headless = true\n\
8
+ \n\
9
+ " > ~/.streamlit/config.toml
stop_hinglish.txt ADDED
@@ -0,0 +1,1055 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .
2
+ ..
3
+ ...
4
+ ?
5
+ -
6
+ --
7
+ 1
8
+ 2
9
+ 3
10
+ 4
11
+ 5
12
+ 6
13
+ 7
14
+ 8
15
+ 9
16
+ 0
17
+ a
18
+ aadi
19
+ aaj
20
+ aap
21
+ aapne
22
+ aata
23
+ aati
24
+ aaya
25
+ aaye
26
+ ab
27
+ abbe
28
+ abbey
29
+ abe
30
+ abhi
31
+ able
32
+ about
33
+ above
34
+ accha
35
+ according
36
+ accordingly
37
+ acha
38
+ achcha
39
+ across
40
+ actually
41
+ after
42
+ afterwards
43
+ again
44
+ against
45
+ agar
46
+ ain
47
+ aint
48
+ ain't
49
+ aisa
50
+ aise
51
+ aisi
52
+ alag
53
+ all
54
+ allow
55
+ allows
56
+ almost
57
+ alone
58
+ along
59
+ already
60
+ also
61
+ although
62
+ always
63
+ am
64
+ among
65
+ amongst
66
+ an
67
+ and
68
+ andar
69
+ another
70
+ any
71
+ anybody
72
+ anyhow
73
+ anyone
74
+ anything
75
+ anyway
76
+ anyways
77
+ anywhere
78
+ ap
79
+ apan
80
+ apart
81
+ apna
82
+ apnaa
83
+ apne
84
+ apni
85
+ appear
86
+ are
87
+ aren
88
+ arent
89
+ aren't
90
+ around
91
+ arre
92
+ as
93
+ aside
94
+ ask
95
+ asking
96
+ at
97
+ aur
98
+ avum
99
+ aya
100
+ aye
101
+ baad
102
+ baar
103
+ bad
104
+ bahut
105
+ bana
106
+ banae
107
+ banai
108
+ banao
109
+ banaya
110
+ banaye
111
+ banayi
112
+ banda
113
+ bande
114
+ bandi
115
+ bane
116
+ bani
117
+ bas
118
+ bata
119
+ batao
120
+ bc
121
+ be
122
+ became
123
+ because
124
+ become
125
+ becomes
126
+ becoming
127
+ been
128
+ before
129
+ beforehand
130
+ behind
131
+ being
132
+ below
133
+ beside
134
+ besides
135
+ best
136
+ better
137
+ between
138
+ beyond
139
+ bhai
140
+ bheetar
141
+ bhi
142
+ bhitar
143
+ bht
144
+ bilkul
145
+ bohot
146
+ bol
147
+ bola
148
+ bole
149
+ boli
150
+ bolo
151
+ bolta
152
+ bolte
153
+ bolti
154
+ both
155
+ brief
156
+ bro
157
+ btw
158
+ but
159
+ by
160
+ came
161
+ can
162
+ cannot
163
+ cant
164
+ can't
165
+ cause
166
+ causes
167
+ certain
168
+ certainly
169
+ chahiye
170
+ chaiye
171
+ chal
172
+ chalega
173
+ chhaiye
174
+ clearly
175
+ c'mon
176
+ com
177
+ come
178
+ comes
179
+ could
180
+ couldn
181
+ couldnt
182
+ couldn't
183
+ d
184
+ de
185
+ dede
186
+ dega
187
+ degi
188
+ dekh
189
+ dekha
190
+ dekhe
191
+ dekhi
192
+ dekho
193
+ denge
194
+ dhang
195
+ di
196
+ did
197
+ didn
198
+ didnt
199
+ didn't
200
+ dijiye
201
+ diya
202
+ diyaa
203
+ diye
204
+ diyo
205
+ do
206
+ does
207
+ doesn
208
+ doesnt
209
+ doesn't
210
+ doing
211
+ done
212
+ dono
213
+ dont
214
+ don't
215
+ doosra
216
+ doosre
217
+ down
218
+ downwards
219
+ dude
220
+ dunga
221
+ dungi
222
+ during
223
+ dusra
224
+ dusre
225
+ dusri
226
+ dvaara
227
+ dvara
228
+ dwaara
229
+ dwara
230
+ each
231
+ edu
232
+ eg
233
+ eight
234
+ either
235
+ ek
236
+ else
237
+ elsewhere
238
+ enough
239
+ etc
240
+ even
241
+ ever
242
+ every
243
+ everybody
244
+ everyone
245
+ everything
246
+ everywhere
247
+ ex
248
+ exactly
249
+ example
250
+ except
251
+ far
252
+ few
253
+ fifth
254
+ fir
255
+ first
256
+ five
257
+ followed
258
+ following
259
+ follows
260
+ for
261
+ forth
262
+ four
263
+ from
264
+ further
265
+ furthermore
266
+ gaya
267
+ gaye
268
+ gayi
269
+ get
270
+ gets
271
+ getting
272
+ ghar
273
+ given
274
+ gives
275
+ go
276
+ goes
277
+ going
278
+ gone
279
+ good
280
+ got
281
+ gotten
282
+ greetings
283
+ guys
284
+ haan
285
+ had
286
+ hadd
287
+ hadn
288
+ hadnt
289
+ hadn't
290
+ hai
291
+ hain
292
+ hamara
293
+ hamare
294
+ hamari
295
+ hamne
296
+ han
297
+ happens
298
+ har
299
+ hardly
300
+ has
301
+ hasn
302
+ hasnt
303
+ hasn't
304
+ have
305
+ haven
306
+ havent
307
+ haven't
308
+ having
309
+ he
310
+ hello
311
+ help
312
+ hence
313
+ her
314
+ here
315
+ hereafter
316
+ hereby
317
+ herein
318
+ here's
319
+ hereupon
320
+ hers
321
+ herself
322
+ he's
323
+ hi
324
+ him
325
+ himself
326
+ his
327
+ hither
328
+ hm
329
+ hmm
330
+ ho
331
+ hoga
332
+ hoge
333
+ hogi
334
+ hona
335
+ honaa
336
+ hone
337
+ honge
338
+ hongi
339
+ honi
340
+ hopefully
341
+ hota
342
+ hotaa
343
+ hote
344
+ hoti
345
+ how
346
+ howbeit
347
+ however
348
+ hoyenge
349
+ hoyengi
350
+ hu
351
+ hua
352
+ hue
353
+ huh
354
+ hui
355
+ hum
356
+ humein
357
+ humne
358
+ hun
359
+ huye
360
+ huyi
361
+ i
362
+ i'd
363
+ idk
364
+ ie
365
+ if
366
+ i'll
367
+ i'm
368
+ imo
369
+ in
370
+ inasmuch
371
+ inc
372
+ inhe
373
+ inhi
374
+ inho
375
+ inka
376
+ inkaa
377
+ inke
378
+ inki
379
+ inn
380
+ inner
381
+ inse
382
+ insofar
383
+ into
384
+ inward
385
+ is
386
+ ise
387
+ isi
388
+ iska
389
+ iskaa
390
+ iske
391
+ iski
392
+ isme
393
+ isn
394
+ isne
395
+ isnt
396
+ isn't
397
+ iss
398
+ isse
399
+ issi
400
+ isski
401
+ it
402
+ it'd
403
+ it'll
404
+ itna
405
+ itne
406
+ itni
407
+ itno
408
+ its
409
+ it's
410
+ itself
411
+ ityaadi
412
+ ityadi
413
+ i've
414
+ ja
415
+ jaa
416
+ jab
417
+ jabh
418
+ jaha
419
+ jahaan
420
+ jahan
421
+ jaisa
422
+ jaise
423
+ jaisi
424
+ jata
425
+ jayega
426
+ jidhar
427
+ jin
428
+ jinhe
429
+ jinhi
430
+ jinho
431
+ jinhone
432
+ jinka
433
+ jinke
434
+ jinki
435
+ jinn
436
+ jis
437
+ jise
438
+ jiska
439
+ jiske
440
+ jiski
441
+ jisme
442
+ jiss
443
+ jisse
444
+ jitna
445
+ jitne
446
+ jitni
447
+ jo
448
+ just
449
+ jyaada
450
+ jyada
451
+ k
452
+ ka
453
+ kaafi
454
+ kab
455
+ kabhi
456
+ kafi
457
+ kaha
458
+ kahaa
459
+ kahaan
460
+ kahan
461
+ kahi
462
+ kahin
463
+ kahte
464
+ kaisa
465
+ kaise
466
+ kaisi
467
+ kal
468
+ kam
469
+ kar
470
+ kara
471
+ kare
472
+ karega
473
+ karegi
474
+ karen
475
+ karenge
476
+ kari
477
+ karke
478
+ karna
479
+ karne
480
+ karni
481
+ karo
482
+ karta
483
+ karte
484
+ karti
485
+ karu
486
+ karun
487
+ karunga
488
+ karungi
489
+ kaun
490
+ kaunsa
491
+ kayi
492
+ kch
493
+ ke
494
+ keep
495
+ keeps
496
+ keh
497
+ kehte
498
+ kept
499
+ khud
500
+ ki
501
+ kin
502
+ kine
503
+ kinhe
504
+ kinho
505
+ kinka
506
+ kinke
507
+ kinki
508
+ kinko
509
+ kinn
510
+ kino
511
+ kis
512
+ kise
513
+ kisi
514
+ kiska
515
+ kiske
516
+ kiski
517
+ kisko
518
+ kisliye
519
+ kisne
520
+ kitna
521
+ kitne
522
+ kitni
523
+ kitno
524
+ kiya
525
+ kiye
526
+ know
527
+ known
528
+ knows
529
+ ko
530
+ koi
531
+ kon
532
+ konsa
533
+ koyi
534
+ krna
535
+ krne
536
+ kuch
537
+ kuchch
538
+ kuchh
539
+ kul
540
+ kull
541
+ kya
542
+ kyaa
543
+ kyu
544
+ kyuki
545
+ kyun
546
+ kyunki
547
+ lagta
548
+ lagte
549
+ lagti
550
+ last
551
+ lately
552
+ later
553
+ le
554
+ least
555
+ lekar
556
+ lekin
557
+ less
558
+ lest
559
+ let
560
+ let's
561
+ li
562
+ like
563
+ liked
564
+ likely
565
+ little
566
+ liya
567
+ liye
568
+ ll
569
+ lo
570
+ log
571
+ logon
572
+ lol
573
+ look
574
+ looking
575
+ looks
576
+ ltd
577
+ lunga
578
+ m
579
+ maan
580
+ maana
581
+ maane
582
+ maani
583
+ maano
584
+ magar
585
+ mai
586
+ main
587
+ maine
588
+ mainly
589
+ mana
590
+ mane
591
+ mani
592
+ mano
593
+ many
594
+ mat
595
+ may
596
+ maybe
597
+ me
598
+ mean
599
+ meanwhile
600
+ mein
601
+ mera
602
+ mere
603
+ merely
604
+ meri
605
+ might
606
+ mightn
607
+ mightnt
608
+ mightn't
609
+ mil
610
+ mjhe
611
+ more
612
+ moreover
613
+ most
614
+ mostly
615
+ much
616
+ mujhe
617
+ must
618
+ mustn
619
+ mustnt
620
+ mustn't
621
+ my
622
+ myself
623
+ na
624
+ naa
625
+ naah
626
+ nahi
627
+ nahin
628
+ nai
629
+ name
630
+ namely
631
+ nd
632
+ ne
633
+ near
634
+ nearly
635
+ necessary
636
+ neeche
637
+ need
638
+ needn
639
+ neednt
640
+ needn't
641
+ needs
642
+ neither
643
+ never
644
+ nevertheless
645
+ new
646
+ next
647
+ nhi
648
+ nine
649
+ no
650
+ nobody
651
+ non
652
+ none
653
+ noone
654
+ nope
655
+ nor
656
+ normally
657
+ not
658
+ nothing
659
+ novel
660
+ now
661
+ nowhere
662
+ o
663
+ obviously
664
+ of
665
+ off
666
+ often
667
+ oh
668
+ ok
669
+ okay
670
+ old
671
+ on
672
+ once
673
+ one
674
+ ones
675
+ only
676
+ onto
677
+ or
678
+ other
679
+ others
680
+ otherwise
681
+ ought
682
+ our
683
+ ours
684
+ ourselves
685
+ out
686
+ outside
687
+ over
688
+ overall
689
+ own
690
+ par
691
+ pata
692
+ pe
693
+ pehla
694
+ pehle
695
+ pehli
696
+ people
697
+ per
698
+ perhaps
699
+ phla
700
+ phle
701
+ phli
702
+ placed
703
+ please
704
+ plus
705
+ poora
706
+ poori
707
+ provides
708
+ pura
709
+ puri
710
+ q
711
+ que
712
+ quite
713
+ raha
714
+ rahaa
715
+ rahe
716
+ rahi
717
+ rakh
718
+ rakha
719
+ rakhe
720
+ rakhen
721
+ rakhi
722
+ rakho
723
+ rather
724
+ re
725
+ really
726
+ reasonably
727
+ regarding
728
+ regardless
729
+ regards
730
+ rehte
731
+ rha
732
+ rhaa
733
+ rhe
734
+ rhi
735
+ ri
736
+ right
737
+ s
738
+ sa
739
+ saara
740
+ saare
741
+ saath
742
+ sab
743
+ sabhi
744
+ sabse
745
+ sahi
746
+ said
747
+ sakta
748
+ saktaa
749
+ sakte
750
+ sakti
751
+ same
752
+ sang
753
+ sara
754
+ sath
755
+ saw
756
+ say
757
+ saying
758
+ says
759
+ se
760
+ second
761
+ secondly
762
+ see
763
+ seeing
764
+ seem
765
+ seemed
766
+ seeming
767
+ seems
768
+ seen
769
+ self
770
+ selves
771
+ sensible
772
+ sent
773
+ serious
774
+ seriously
775
+ seven
776
+ several
777
+ shall
778
+ shan
779
+ shant
780
+ shan't
781
+ she
782
+ she's
783
+ should
784
+ shouldn
785
+ shouldnt
786
+ shouldn't
787
+ should've
788
+ si
789
+ sir
790
+ sir.
791
+ since
792
+ six
793
+ so
794
+ soch
795
+ some
796
+ somebody
797
+ somehow
798
+ someone
799
+ something
800
+ sometime
801
+ sometimes
802
+ somewhat
803
+ somewhere
804
+ soon
805
+ still
806
+ sub
807
+ such
808
+ sup
809
+ sure
810
+ t
811
+ tab
812
+ tabh
813
+ tak
814
+ take
815
+ taken
816
+ tarah
817
+ teen
818
+ teeno
819
+ teesra
820
+ teesre
821
+ teesri
822
+ tell
823
+ tends
824
+ tera
825
+ tere
826
+ teri
827
+ th
828
+ tha
829
+ than
830
+ thank
831
+ thanks
832
+ thanx
833
+ that
834
+ that'll
835
+ thats
836
+ that's
837
+ the
838
+ theek
839
+ their
840
+ theirs
841
+ them
842
+ themselves
843
+ then
844
+ thence
845
+ there
846
+ thereafter
847
+ thereby
848
+ therefore
849
+ therein
850
+ theres
851
+ there's
852
+ thereupon
853
+ these
854
+ they
855
+ they'd
856
+ they'll
857
+ they're
858
+ they've
859
+ thi
860
+ thik
861
+ thing
862
+ think
863
+ thinking
864
+ third
865
+ this
866
+ tho
867
+ thoda
868
+ thodi
869
+ thorough
870
+ thoroughly
871
+ those
872
+ though
873
+ thought
874
+ three
875
+ through
876
+ throughout
877
+ thru
878
+ thus
879
+ tjhe
880
+ to
881
+ together
882
+ toh
883
+ too
884
+ took
885
+ toward
886
+ towards
887
+ tried
888
+ tries
889
+ true
890
+ truly
891
+ try
892
+ trying
893
+ tu
894
+ tujhe
895
+ tum
896
+ tumhara
897
+ tumhare
898
+ tumhari
899
+ tune
900
+ twice
901
+ two
902
+ um
903
+ umm
904
+ un
905
+ under
906
+ unhe
907
+ unhi
908
+ unho
909
+ unhone
910
+ unka
911
+ unkaa
912
+ unke
913
+ unki
914
+ unko
915
+ unless
916
+ unlikely
917
+ unn
918
+ unse
919
+ until
920
+ unto
921
+ up
922
+ upar
923
+ upon
924
+ us
925
+ use
926
+ used
927
+ useful
928
+ uses
929
+ usi
930
+ using
931
+ uska
932
+ uske
933
+ usne
934
+ uss
935
+ usse
936
+ ussi
937
+ usually
938
+ vaala
939
+ vaale
940
+ vaali
941
+ vahaan
942
+ vahan
943
+ vahi
944
+ vahin
945
+ vaisa
946
+ vaise
947
+ vaisi
948
+ vala
949
+ vale
950
+ vali
951
+ various
952
+ ve
953
+ very
954
+ via
955
+ viz
956
+ vo
957
+ waala
958
+ waale
959
+ waali
960
+ wagaira
961
+ wagairah
962
+ wagerah
963
+ waha
964
+ wahaan
965
+ wahan
966
+ wahi
967
+ wahin
968
+ waisa
969
+ waise
970
+ waisi
971
+ wala
972
+ wale
973
+ wali
974
+ want
975
+ wants
976
+ was
977
+ wasn
978
+ wasnt
979
+ wasn't
980
+ way
981
+ we
982
+ we'd
983
+ well
984
+ we'll
985
+ went
986
+ were
987
+ we're
988
+ weren
989
+ werent
990
+ weren't
991
+ we've
992
+ what
993
+ whatever
994
+ what's
995
+ when
996
+ whence
997
+ whenever
998
+ where
999
+ whereafter
1000
+ whereas
1001
+ whereby
1002
+ wherein
1003
+ where's
1004
+ whereupon
1005
+ wherever
1006
+ whether
1007
+ which
1008
+ while
1009
+ who
1010
+ whoever
1011
+ whole
1012
+ whom
1013
+ who's
1014
+ whose
1015
+ why
1016
+ will
1017
+ willing
1018
+ with
1019
+ within
1020
+ without
1021
+ wo
1022
+ woh
1023
+ wohi
1024
+ won
1025
+ wont
1026
+ won't
1027
+ would
1028
+ wouldn
1029
+ wouldnt
1030
+ wouldn't
1031
+ y
1032
+ ya
1033
+ yadi
1034
+ yah
1035
+ yaha
1036
+ yahaan
1037
+ yahan
1038
+ yahi
1039
+ yahin
1040
+ ye
1041
+ yeah
1042
+ yeh
1043
+ yehi
1044
+ yes
1045
+ yet
1046
+ you
1047
+ you'd
1048
+ you'll
1049
+ your
1050
+ you're
1051
+ yours
1052
+ yourself
1053
+ yourselves
1054
+ you've
1055
+ yup