jiandong's picture
Upload with huggingface_hub
043f774
from typing import Literal
from pydantic import BaseModel, Field
from app_store_scraper import AppStore
from transformers import pipeline
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from workcell.integrations.types import PlotlyPlot
COUNTRIES = Literal["us", "cn", "uk", "jp"]
APPS = Literal["slack", "tiktok", "wechat", "tesla", "facebook", "instagram", "twitter"]
class Input(BaseModel):
country: COUNTRIES
app_name: APPS
def get_appstore_reviews(country, app_name, how_many=100):
"""Get reviews from app store.
country: str, "cn", "us", "uk", "jp"
app_name: str, "twitter", "facebook", "instagram", "tesla"
how_many: int, 100
"""
appstore = AppStore(
country=country,
app_name=app_name
)
# fetch reviews from app store
appstore.review(how_many=how_many)
reviews = appstore.reviews
return reviews
def get_sentiments(data):
"""Return sentiment scores given text list.
data = ["I love you", "I hate you"]
scores = [{'label': 'POSITIVE', 'score': 0.9998}, {'label': 'NEGATIVE', 'score': 0.9991}]
"""
sentiment_pipeline = pipeline("sentiment-analysis")
# custom pipline
# sentiment_pipeline = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
scores = sentiment_pipeline(data)
return scores
def process_score(data):
if data['label'] == 'NEGATIVE':
score = -1 * data['score']
else:
score = data['score']
return score
def process_df(df):
"""
process review dataframe into plot format
"""
df['scores'] = df['scores'].apply(process_score)
df = df.sort_values(by='date', ascending=True).reset_index(drop=True)
df_plot = df[['date','scores','rating']].groupby(pd.Grouper(key='date', freq='W')).agg(counts=('rating', 'count'), scores=('scores', 'mean'), rating=('rating', 'mean')).reset_index()
df_plot['color'] = np.where(df_plot["scores"]<0, 'red', 'green')
return df_plot
def visualization(df, country, app_name):
fig = make_subplots(rows=3, cols=1, shared_xaxes=True)
fig.update_layout(title='App store reviews for app: {}, country: {}'.format(app_name, country))
fig.add_trace(go.Bar(x=df.date, y=df.counts), row=1, col=1)
fig.add_trace(go.Bar(x=df.date, y=df.rating), row=2, col=1)
fig.add_trace(go.Bar(x=df.date, y=df.scores, marker_color=df['color']), row=3, col=1)
fig.update_yaxes(title_text="review count", row=1, col=1)
fig.update_yaxes(title_text="rating", row=2, col=1)
fig.update_yaxes(title_text="sentiment", row=3, col=1)
fig.update_layout(
hovermode="x",
showlegend=False
)
return fig
def appstore_reviews(input: Input) -> PlotlyPlot:
"""Scrape reviews from app store, calculate sentiment score, and compared with review count & rating.
"""
# step1. get reviews
reviews = get_appstore_reviews(input.country, input.app_name, how_many=100)
df = pd.DataFrame(reviews)
# step2. pipe reviews
df['scores'] = get_sentiments(df.review.tolist())
# step3. preprocess
df_plot = process_df(df)
df_plot = df_plot[df_plot.date>'2020'] # filter
# step4. visualization
fig = visualization(df_plot, input.country, input.app_name)
# return
output = PlotlyPlot(
data=fig
)
return output