Spaces:
Sleeping
Sleeping
add application file
Browse files- app.py +75 -0
- pages/01_debug.py +37 -0
app.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022)
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import streamlit as st
|
| 16 |
+
from streamlit.logger import get_logger
|
| 17 |
+
import pandas as pd
|
| 18 |
+
from utils import plotly_line_chart
|
| 19 |
+
import plotly.graph_objects as go
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
LOGGER = get_logger(__name__)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def run():
|
| 26 |
+
st.set_page_config(
|
| 27 |
+
page_title="Restaurant review analysis",
|
| 28 |
+
page_icon="👋",
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
st.write("# Restaurant review analysis")
|
| 32 |
+
start_year = st.sidebar.slider('Year', 2016, 2023, 2017)
|
| 33 |
+
smooth = st.sidebar.selectbox('Rolling window', [7, 14, 30, 60, 90], index=2)
|
| 34 |
+
reviews = pd.read_pickle('reviews')
|
| 35 |
+
trend = pd.DataFrame(
|
| 36 |
+
{'sentiment': (reviews.groupby('period')['s'].sum().rolling(smooth).sum()/reviews.groupby('period')['s'].count().rolling(30).sum()),
|
| 37 |
+
'emotion': (reviews.groupby('period')['s'].count().rolling(smooth).sum()/reviews.groupby('period')['starRating'].count().rolling(30).sum()),
|
| 38 |
+
'rating': reviews.groupby('period')['rating'].mean().rolling(smooth).mean()}
|
| 39 |
+
)[str(start_year):]
|
| 40 |
+
trend.index = pd.to_datetime(trend.index.astype(str))
|
| 41 |
+
with st.expander('Customer sentiment and emotions', expanded=True):
|
| 42 |
+
st.write('''emotion defined as percentage of response with review comment''')
|
| 43 |
+
plotly_line_chart(trend, columns=['sentiment', 'emotion'],
|
| 44 |
+
styles={'emotion': dict(dash='dot', color=('rgb(128, 128, 128)'))}
|
| 45 |
+
)
|
| 46 |
+
with st.expander('score rating', expanded=False):
|
| 47 |
+
plotly_line_chart(trend, columns=['rating'],
|
| 48 |
+
#styles={'sentiment': dict(dash='dot', color=('rgb(128, 128, 128)'))},
|
| 49 |
+
yaxis={'sentiment': 'y2'}
|
| 50 |
+
)
|
| 51 |
+
st.write(trend)
|
| 52 |
+
|
| 53 |
+
absa = pd.read_pickle('stats')
|
| 54 |
+
cols = st.sidebar.multiselect('customer view point', options=['food', 'service', 'atmosphere', 'staff', 'dish', 'price', 'restaurant', 'owner', 'cuisine', 'rice', 'drinks'], default=['food', 'service', 'price'])
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
positivity = absa.groupby('Period').sum().rolling(smooth).sum()/absa.groupby('Period').count().rolling(smooth).sum()
|
| 58 |
+
positivity = positivity[str(start_year):]
|
| 59 |
+
new_cols = [c for c in cols if c in positivity]
|
| 60 |
+
positivity.index = pd.to_datetime(positivity.index.astype(str))
|
| 61 |
+
plotly_line_chart(positivity, columns=new_cols,)
|
| 62 |
+
|
| 63 |
+
top_words = st.slider('top X customer concerns', 5, 50, 30)
|
| 64 |
+
top_mentions = absa.count().sort_values(ascending=False).head(top_words)
|
| 65 |
+
with st.expander('Customer top mentions', expanded=True):
|
| 66 |
+
fig = go.Figure()
|
| 67 |
+
fig.add_trace(
|
| 68 |
+
go.Bar(x=top_mentions.index, y=top_mentions.values)
|
| 69 |
+
)
|
| 70 |
+
st.plotly_chart(fig)
|
| 71 |
+
#st.write(top_mentions)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
if __name__ == "__main__":
|
| 75 |
+
run()
|
pages/01_debug.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from streamlit.logger import get_logger
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from transformers import pipeline
|
| 5 |
+
from setfit import AbsaModel
|
| 6 |
+
import translators as ts
|
| 7 |
+
from config import base_model, senti_map, absa_model, senti_color
|
| 8 |
+
from annotated_text import annotated_text
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
LOGGER = get_logger(__name__)
|
| 12 |
+
#ts.preaccelerate_and_speedtest()
|
| 13 |
+
senti_task = pipeline("sentiment-analysis", model=base_model, tokenizer=base_model)
|
| 14 |
+
absa = AbsaModel.from_pretrained(absa_model[0], absa_model[1])
|
| 15 |
+
|
| 16 |
+
def run():
|
| 17 |
+
st.write('Copy and paste comment into below text box.')
|
| 18 |
+
txt = st.text_area('customer review')
|
| 19 |
+
if len(txt.strip()) > 0:
|
| 20 |
+
lang = st.selectbox('pick output language', ['en', 'hi', 'zh'], index=0)
|
| 21 |
+
with st.spinner(f'translate to {lang}'):
|
| 22 |
+
txt_en = ts.translate_text(txt, to_language=lang, translator='google')
|
| 23 |
+
|
| 24 |
+
with st.spinner('working on comment sentiment, please wait...'):
|
| 25 |
+
sentiment = senti_task(txt_en)
|
| 26 |
+
st.write(f"it's {senti_map[sentiment[0]['label']]} feedback with a confidence of {sentiment[0]['score']:.1%}")
|
| 27 |
+
with st.spinner('detecting aspect sentiment...'):
|
| 28 |
+
preds = absa(txt_en)
|
| 29 |
+
st.write(f"the comment talks about: {','.join([t['span'] for t in preds])}, detailed sentiments as follow:")
|
| 30 |
+
#st.write(f'Customer says: {txt_en}')
|
| 31 |
+
preds = {p['span']: p['polarity'] for p in preds}
|
| 32 |
+
annotated_text(
|
| 33 |
+
[(t + ' ', preds[t], senti_color[preds[t]]) if t in preds else t+' ' for t in txt_en.split(' ')]
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
if __name__ == "__main__":
|
| 37 |
+
run()
|