Spaces:

srini047
/

text-based-sentiment-analyzer

Runtime error

App Files Files Community

srini047 commited on Jul 29, 2022

Commit

df00128

1 Parent(s): 074cf1f

added function file

Browse files

Files changed (1) hide show

sentiment.py +238 -0

sentiment.py ADDED Viewed

	@@ -0,0 +1,238 @@

+#!/usr/bin/env python
+# coding: utf-8
+# # Text Based Sentiment Analysis
+# # IMPORTING NECESSARY MODULES
+# In[1]:
+import numpy as np # For linear algebra
+import pandas as pd # Data processing, CSV file I/O (e.g. pd.read_csv)
+import matplotlib.pyplot as plt  # For Visualisation
+# get_ipython().run_line_magic('matplotlib', 'inline')
+import seaborn as sns  # For Visualisation
+from bs4 import BeautifulSoup  # For Text Parsing
+# # IMPORTING DATASET
+# In[2]:
+data = pd.read_csv('Reviews.csv')
+# data
+# # DATA PREPROCESSING & VISUALISATION
+# In[3]:
+#data.isnull().sum()
+# In[4]:
+data=data.dropna()
+#data.isnull().sum()
+# In[5]:
+#data.shape
+# In[6]:
+score_unique = data['Score'].unique()
+#print(score_unique)
+# In[7]:
+#   0-> NEGATIVE REVIEW
+#   1-> NEUTRAL REVIEW
+#   2-> POSTIVE REVIEW
+a=[]
+for i in data['Score']:
+    if i <3:
+        a.append(0)
+    if i==3:
+        a.append(1)
+    if i>3:
+        a.append(2)
+# In[8]:
+r_0, r_1, r_2 = 0, 0, 0
+for i in a:
+    if i == 0:
+        r_0 += 1
+    elif i == 1:
+        r_1 += 1
+    else:
+        r_2 += 1
+# print('Negative Reviews:',r_0)
+# print('Neutral Reviews:',r_1)
+# print('Positive Reviews:',r_2)
+# In[9]:
+# sns.countplot(a)
+# plt.xlabel('Reviews', color = 'red')
+# plt.ylabel('Count', color = 'red')
+# plt.xticks([0,1,2],['Negative','Neutral','Positive'])
+# plt.title('COUNT PLOT', color = 'r')
+# plt.show()
+# In[10]:
+data['sentiment']=a
+#data
+final_dataset = data[['Text','sentiment']]
+#final_dataset
+# In[11]:
+data_p=final_dataset[data['sentiment']==2]
+data_n=final_dataset[data['sentiment']==0]
+#len(data_p), len(data_n)
+# In[12]:
+datap = data_p.iloc[np.random.randint(1,443766,5000), :]
+datan = data_n.iloc[np.random.randint(1, 82007,5000), :]
+#len(datan), len(datap)
+# In[13]:
+data = pd.concat([datap,datan])
+len(data)
+# In[14]:
+c=[]
+for i in data['sentiment']:
+    if i==0:
+        c.append(0)
+    if i==2:
+        c.append(1)
+data['sentiment']=c
+# In[15]:
+# sns.countplot(data['sentiment'])
+# plt.show()
+# In[16]:
+def strip_html(text):
+    soup = BeautifulSoup(text, "html.parser")
+    return soup.get_text()
+data['review'] = data['Text'].apply(strip_html)
+data=data.drop('Text',axis=1)
+#data.head()
+# # MODEL BUILDING
+# In[17]:
+import nltk  #Natural Language Processing Toolkit
+def punc_clean(text):
+    import string as st
+    a=[w for w in text if w not in st.punctuation]
+    return ''.join(a)
+data['review'] = data['review'].apply(punc_clean)
+#data.head(2)
+# In[18]:
+def remove_stopword(text):
+    stopword=nltk.corpus.stopwords.words('english')
+    stopword.remove('not')
+    a=[w for w in nltk.word_tokenize(text) if w not in stopword]
+    return ' '.join(a)
+#data['review'] = data['review'].apply(remove_stopword)
+# In[19]:
+from sklearn.feature_extraction.text import TfidfVectorizer
+vectr = TfidfVectorizer(ngram_range=(1,2),min_df=1)
+vectr.fit(data['review'])
+vect_X = vectr.transform(data['review'])
+# In[20]:
+from sklearn.linear_model import LogisticRegression
+model = LogisticRegression()
+clf=model.fit(vect_X,data['sentiment'])
+#clf.score(vect_X,data['sentiment'])*100
+# # PREDICTION
+# In[21]:
+clf.predict(vectr.transform(['''Nice look and build quality with moderately fast everything such as refresh rate, display quality, sound, processing, gaming experience and many more ..
+I didn't find any lagging or heating issue..And battery health I won't say great but I'll take that
+Only cons I can say about it is camera.. sharpening picture a little much at day light and low light photo you have to compromise.''']))
+# In[22]:
+clf.predict(vectr.transform(['''Phone has bugs , and screen quality is poor , Avoid realme. Gaming was just over hyped''']))
+# In[23]:
+clf.predict(vectr.transform(['''No lags found super speed and very good performance nice phone in this budget''']))
+# In[ ]: