Upload 2 files
Browse files- Review_Analyzer.py +70 -0
- main.py +33 -0
Review_Analyzer.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np # linear algebra
|
| 2 |
+
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
|
| 3 |
+
import nltk # Natural Language Processing Toolkit
|
| 4 |
+
from nltk.corpus import stopwords
|
| 5 |
+
import re # Natural Language Processing Toolkit
|
| 6 |
+
from textblob import TextBlob # Python library for Sentiment analysis
|
| 7 |
+
|
| 8 |
+
# Stopwords for preprocessing
|
| 9 |
+
nltk.download('stopwords')
|
| 10 |
+
nltk.download('punkt')
|
| 11 |
+
all_stopwords = stopwords.words('english')
|
| 12 |
+
all_stopwords.remove('not')
|
| 13 |
+
|
| 14 |
+
def text_preprocessor(text):
|
| 15 |
+
|
| 16 |
+
# To pre process text reviews
|
| 17 |
+
text = re.sub('[^a-zA-Z]', ' ', text)
|
| 18 |
+
text = text.lower()
|
| 19 |
+
a = [w for w in nltk.word_tokenize(text) if w not in all_stopwords]
|
| 20 |
+
return ' '.join(a)
|
| 21 |
+
|
| 22 |
+
def get_sentiment(text):
|
| 23 |
+
|
| 24 |
+
# To analyze single review
|
| 25 |
+
text = text_preprocessor(text)
|
| 26 |
+
blob = TextBlob(text)
|
| 27 |
+
|
| 28 |
+
polarity = round(blob.sentiment.polarity, 2)
|
| 29 |
+
subjectivity = round(blob.sentiment.subjectivity, 2)
|
| 30 |
+
|
| 31 |
+
if polarity >= 0.3:
|
| 32 |
+
sentiment = 'Positive'
|
| 33 |
+
elif polarity <= 0.:
|
| 34 |
+
sentiment = 'Negative'
|
| 35 |
+
else:
|
| 36 |
+
sentiment = 'Neutral'
|
| 37 |
+
|
| 38 |
+
return polarity, subjectivity, sentiment
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def analyze_data(file_path, file_format='csv'):
|
| 43 |
+
|
| 44 |
+
# Read data based on the specified file format
|
| 45 |
+
if file_format == 'csv':
|
| 46 |
+
data = pd.read_csv(file_path)
|
| 47 |
+
elif file_format == 'excel':
|
| 48 |
+
data = pd.read_excel(file_path)
|
| 49 |
+
elif file_format == 'parquet':
|
| 50 |
+
data = pd.read_parquet(file_path)
|
| 51 |
+
|
| 52 |
+
# Sentiment analysis
|
| 53 |
+
sentiment = []
|
| 54 |
+
subjectivity = []
|
| 55 |
+
polarity = []
|
| 56 |
+
|
| 57 |
+
preprocessed_data = data
|
| 58 |
+
preprocessed_data['Review'].apply(text_preprocessor)
|
| 59 |
+
|
| 60 |
+
for text in preprocessed_data['Review']:
|
| 61 |
+
pol, subj, sent = get_sentiment(text)
|
| 62 |
+
polarity.append(pol)
|
| 63 |
+
subjectivity.append(subj)
|
| 64 |
+
sentiment.append(sent)
|
| 65 |
+
|
| 66 |
+
data['Polarity'] = polarity
|
| 67 |
+
data['Sentiment'] = sentiment
|
| 68 |
+
data['Subjectivity'] = subjectivity
|
| 69 |
+
|
| 70 |
+
return data
|
main.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import streamlit as st
|
| 3 |
+
from Review_Analyzer import *
|
| 4 |
+
|
| 5 |
+
st.header('Review Analysis')
|
| 6 |
+
with st.expander('Analyze Text'):
|
| 7 |
+
text = st.text_input('Text here: ')
|
| 8 |
+
if text:
|
| 9 |
+
text = get_sentiment(text)
|
| 10 |
+
st.write('Polarity: ', text[0])
|
| 11 |
+
st.write('Subjectivity: ', text[1])
|
| 12 |
+
st.write('Sentiment: ', text[2])
|
| 13 |
+
|
| 14 |
+
with st.expander('Analyze CSV'):
|
| 15 |
+
upl = st.file_uploader('Upload file')
|
| 16 |
+
if upl:
|
| 17 |
+
file_format = st.selectbox('Select file format', ['csv', 'excel', 'parquet'])
|
| 18 |
+
df = analyze_data(upl, file_format=file_format)
|
| 19 |
+
st.write(df.head(10))
|
| 20 |
+
|
| 21 |
+
@st.cache_data
|
| 22 |
+
def convert_df(df):
|
| 23 |
+
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
| 24 |
+
return df.to_csv().encode('utf-8')
|
| 25 |
+
|
| 26 |
+
csv = convert_df(df)
|
| 27 |
+
|
| 28 |
+
st.download_button(
|
| 29 |
+
label="Download data as CSV",
|
| 30 |
+
data=csv,
|
| 31 |
+
file_name='sentiment.csv',
|
| 32 |
+
mime='text/csv',
|
| 33 |
+
)
|