from typing import Literal from pydantic import BaseModel, Field from app_store_scraper import AppStore from transformers import pipeline import pandas as pd import numpy as np from plotly.subplots import make_subplots import plotly.graph_objects as go from workcell.integrations.types import PlotlyPlot COUNTRIES = Literal["us", "cn", "uk", "jp"] APPS = Literal["slack", "tiktok", "wechat", "tesla", "facebook", "instagram", "twitter"] class Input(BaseModel): country: COUNTRIES app_name: APPS def get_appstore_reviews(country, app_name, how_many=100): """Get reviews from app store. country: str, "cn", "us", "uk", "jp" app_name: str, "twitter", "facebook", "instagram", "tesla" how_many: int, 100 """ appstore = AppStore( country=country, app_name=app_name ) # fetch reviews from app store appstore.review(how_many=how_many) reviews = appstore.reviews return reviews def get_sentiments(data): """Return sentiment scores given text list. data = ["I love you", "I hate you"] scores = [{'label': 'POSITIVE', 'score': 0.9998}, {'label': 'NEGATIVE', 'score': 0.9991}] """ sentiment_pipeline = pipeline("sentiment-analysis") # custom pipline # sentiment_pipeline = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis") scores = sentiment_pipeline(data) return scores def process_score(data): if data['label'] == 'NEGATIVE': score = -1 * data['score'] else: score = data['score'] return score def process_df(df): """ process review dataframe into plot format """ df['scores'] = df['scores'].apply(process_score) df = df.sort_values(by='date', ascending=True).reset_index(drop=True) df_plot = df[['date','scores','rating']].groupby(pd.Grouper(key='date', freq='W')).agg(counts=('rating', 'count'), scores=('scores', 'mean'), rating=('rating', 'mean')).reset_index() df_plot['color'] = np.where(df_plot["scores"]<0, 'red', 'green') return df_plot def visualization(df, country, app_name): fig = make_subplots(rows=3, cols=1, shared_xaxes=True) fig.update_layout(title='App store reviews for app: {}, country: {}'.format(app_name, country)) fig.add_trace(go.Bar(x=df.date, y=df.counts), row=1, col=1) fig.add_trace(go.Bar(x=df.date, y=df.rating), row=2, col=1) fig.add_trace(go.Bar(x=df.date, y=df.scores, marker_color=df['color']), row=3, col=1) fig.update_yaxes(title_text="review count", row=1, col=1) fig.update_yaxes(title_text="rating", row=2, col=1) fig.update_yaxes(title_text="sentiment", row=3, col=1) fig.update_layout( hovermode="x", showlegend=False ) return fig def appstore_reviews(input: Input) -> PlotlyPlot: """Scrape reviews from app store, calculate sentiment score, and compared with review count & rating. """ # step1. get reviews reviews = get_appstore_reviews(input.country, input.app_name, how_many=100) df = pd.DataFrame(reviews) # step2. pipe reviews df['scores'] = get_sentiments(df.review.tolist()) # step3. preprocess df_plot = process_df(df) df_plot = df_plot[df_plot.date>'2020'] # filter # step4. visualization fig = visualization(df_plot, input.country, input.app_name) # return output = PlotlyPlot( data=fig ) return output