Klaus04's picture
Update app.py
7e76d84 verified
import joblib
import numpy as np
import re
import string
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import TweetTokenizer
import nltk
import pickle
import gradio as gr
with open("freqs.pkl","rb") as fp:
freqs = pickle.load(fp)
fp.close()
model = joblib.load("sentiment1.pkl")
nltk.download("stopwords")
stop = stopwords.words("english")
punc = string.punctuation
def process_tweet(tweet):
stemmer = PorterStemmer()
tweet2 = re.sub(r'^RT[\s]+',"",tweet)
tweet2 = re.sub(r'https?://[^\s\n\r]+','',tweet2)
tweet2 = re.sub(r'#','',tweet2)
tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True, reduce_len=True)
tokens = tokenizer.tokenize(tweet2)
tokens_new = []
for word in tokens:
if (word not in stop and word not in punc):
tokens_new.append(stemmer.stem(word))
else:
continue
return tokens_new
def extract_features(tweets, freqs):
m = len(tweets)
original_row = np.array([1,0,0])
x = np.tile(original_row, (m, 1))
count = 0
for i in range(0,m):
for word in process_tweet(tweets[i]):
x[i][1]+=freqs.get((word,1),0)
x[i][2]+=freqs.get((word,0),0)
if "not" in tweets[0]:
if(x[0][1]>x[0][2]): x[0][2]=x[0][1]+50
else: x[0][1]=x[0][2]+50
return x
def predict(tweet,freqs=freqs):
arr = [tweet]
x= extract_features(arr,freqs)
res = model.predict(x)
if (res==0): return "Negative comment"
else: return "Positive comment"
with gr.Blocks() as demo:
Tweet = gr.Textbox(label = "Tweet",placeholder="Enter your tweet here")
out = gr.Textbox(label = "Sentiment")
with gr.Row():
gr.Markdown("## Text examples")
gr.Examples(["I am not good", "I am :)", "it is bad"], inputs = Tweet, outputs=out,fn = predict)
btn = gr.Button(value= "Submit")
btn.click(fn = predict, inputs = Tweet, outputs = out)
demo.launch()