rishikesh commited on
Commit
343b88e
·
1 Parent(s): bcc189a

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +67 -0
  3. justin_rf_model.sav +3 -0
  4. minMaxScaler.sav +0 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ justin_rf_model.sav filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ from datetime import datetime
4
+ from sentence_transformers import SentenceTransformer
5
+ import numpy as np
6
+ import re
7
+
8
+ loaded_scaler = pickle.load(open('minMaxScaler.sav', 'rb'))
9
+ loaded_model = pickle.load(open('justin_rf_model.sav', 'rb'))
10
+ vectorizer = SentenceTransformer('all-MiniLM-L6-v2')
11
+
12
+ def featurize(tweet, tweet_date, tweet_time):
13
+ # featurize date
14
+ #date = year + '-' + month + '-' + day
15
+ #date = datetime.strptime(date, '%Y-%m-%d')
16
+ year = float(tweet_date.year)
17
+ month = float(tweet_date.month)
18
+ day = float(tweet_date.day)
19
+ hr = float(tweet_time.hour)
20
+ minutes = float(tweet_time.minute)
21
+ weekDay = float(tweet_date.weekday())
22
+
23
+ # preprocess tweets
24
+ tweet = re.sub(r'http\S+', 'url', tweet)
25
+
26
+ # count the number of accounts tagged and hashtags mentioned in tweet
27
+ tagCount = float(len(re.findall(r"@(\w+)", tweet)))
28
+ hashTagsCount = float(len(re.findall(r"#(\w+)", tweet)))
29
+
30
+ # vectorize data
31
+ x1 = np.array([tagCount, hashTagsCount, minutes, hr, day, weekDay, month, year ])
32
+ x1 = loaded_scaler.transform(x1.reshape(1,-1))
33
+ x2 = vectorizer.encode(tweet)
34
+ inp_vec = np.concatenate([x2, x1.flatten()]).reshape(1,-1)
35
+
36
+ return inp_vec
37
+
38
+ def getOutput(inp_vec):
39
+ output = loaded_model.predict(inp_vec)
40
+ return output[0]
41
+
42
+ def main():
43
+
44
+ st.title("Welcome to tweet engagement predictor")
45
+ with st.form("my_form"):
46
+ tweet = st.text_input('Enter a tweet')
47
+ tweet_date = st.date_input("Enter the date of tweeting",
48
+ value = datetime(2018,1,1,0,0),
49
+ min_value=datetime(2015,1,1,0,0),
50
+ max_value=datetime(2021,12,12,23,59))
51
+ tweet_time = st.time_input('Enter the time of tweeting')
52
+
53
+ # Every form must have a submit button.
54
+ submitted = st.form_submit_button("Submit")
55
+
56
+ if submitted:
57
+ inp_vec = featurize(tweet, tweet_date, tweet_time)
58
+ output = getOutput(inp_vec)
59
+ if output == 1 :
60
+ st.write('Given tweet will get low engagment - less than 8800 ')
61
+ elif output == 2 :
62
+ st.write('Given tweet will get moderate engagment - retweets in the range of 8800 to 24000')
63
+ elif output == 3 :
64
+ st.write('Given tweet will get high engagment - more than 24000 retweets ')
65
+
66
+ if __name__ == '__main__' :
67
+ main()
justin_rf_model.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11704df4905726dbaf20ac6c7aa4bcec32046bdbf0b6b1ee332541aba2c55005
3
+ size 17903631
minMaxScaler.sav ADDED
Binary file (961 Bytes). View file