File size: 2,201 Bytes
f46aea0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55b58cc
 
f46aea0
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import streamlit as st
import pickle
from datetime import datetime
from sentence_transformers import SentenceTransformer
import numpy as np
import re

loaded_scaler = pickle.load(open('minMaxScaler.sav', 'rb'))
loaded_model = pickle.load(open('justin_rf_model.sav', 'rb'))
vectorizer = SentenceTransformer('all-MiniLM-L6-v2')

def featurize(tweet, tweet_date, tweet_time):
    year = float(tweet_date.year)
    month = float(tweet_date.month)
    day = float(tweet_date.day)
    hr = float(tweet_time.hour)
    minutes = float(tweet_time.minute)
    weekDay = float(tweet_date.weekday())

    # preprocess tweets
    tweet = re.sub(r'http\S+', 'url', tweet)

    # count the number of accounts tagged and hashtags mentioned in tweet
    tagCount = float(len(re.findall(r"@(\w+)", tweet)))
    hashTagsCount = float(len(re.findall(r"#(\w+)", tweet)))

    # vectorize data
    x1 = np.array([tagCount, hashTagsCount, minutes, hr, day, weekDay, month, year ])
    x1 = loaded_scaler.transform(x1.reshape(1,-1))
    x2 = vectorizer.encode(tweet)
    inp_vec = np.concatenate([x2, x1.flatten()]).reshape(1,-1)

    return inp_vec

def getOutput(inp_vec):
    output = loaded_model.predict(inp_vec)
    return output[0]

def main():

	st.title("Welcome to tweet engagement predictor")
	with st.form("my_form", clear_on_submit=True):
		tweet = st.text_input('Enter a tweet')
		tweet_date = st.date_input("Enter the date of tweeting", 
			value = datetime(2018,1,1,0,0),
			min_value=datetime(2015,1,1,0,0),
			max_value=datetime(2021,12,12,23,59))
		tweet_time = st.time_input('Enter the time of tweeting')

		# Every form must have a submit button.
		submitted = st.form_submit_button("Submit")
		
	if submitted: 
		inp_vec = featurize(tweet, tweet_date, tweet_time)
		output = getOutput(inp_vec)
		st.write(tweet)
		st.write(tweet_date)
		st.write(tweet_time)
		if output == 1 : 
			st.write('Given tweet will get low engagment - less than 8800 ')
		elif output == 2 : 
			st.write('Given tweet will get moderate engagment - retweets in the range of 8800 to 24000')
		elif output == 3 : 
			st.write('Given tweet will get high engagment - more than 24000 retweets ')

if __name__ == '__main__' : 
	main()