Spaces:
Build error
Build error
| from typing import Literal | |
| from pydantic import BaseModel, Field | |
| from app_store_scraper import AppStore | |
| from transformers import pipeline | |
| import pandas as pd | |
| import numpy as np | |
| from plotly.subplots import make_subplots | |
| import plotly.graph_objects as go | |
| from workcell.integrations.types import PlotlyPlot | |
| COUNTRIES = Literal["us", "cn", "uk", "jp"] | |
| APPS = Literal["slack", "tiktok", "wechat", "tesla", "facebook", "instagram", "twitter"] | |
| class Input(BaseModel): | |
| country: COUNTRIES | |
| app_name: APPS | |
| def get_appstore_reviews(country, app_name, how_many=100): | |
| """Get reviews from app store. | |
| country: str, "cn", "us", "uk", "jp" | |
| app_name: str, "twitter", "facebook", "instagram", "tesla" | |
| how_many: int, 100 | |
| """ | |
| appstore = AppStore( | |
| country=country, | |
| app_name=app_name | |
| ) | |
| # fetch reviews from app store | |
| appstore.review(how_many=how_many) | |
| reviews = appstore.reviews | |
| return reviews | |
| def get_sentiments(data): | |
| """Return sentiment scores given text list. | |
| data = ["I love you", "I hate you"] | |
| scores = [{'label': 'POSITIVE', 'score': 0.9998}, {'label': 'NEGATIVE', 'score': 0.9991}] | |
| """ | |
| sentiment_pipeline = pipeline("sentiment-analysis") | |
| # custom pipline | |
| # sentiment_pipeline = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis") | |
| scores = sentiment_pipeline(data) | |
| return scores | |
| def process_score(data): | |
| if data['label'] == 'NEGATIVE': | |
| score = -1 * data['score'] | |
| else: | |
| score = data['score'] | |
| return score | |
| def process_df(df): | |
| """ | |
| process review dataframe into plot format | |
| """ | |
| df['scores'] = df['scores'].apply(process_score) | |
| df = df.sort_values(by='date', ascending=True).reset_index(drop=True) | |
| df_plot = df[['date','scores','rating']].groupby(pd.Grouper(key='date', freq='W')).agg(counts=('rating', 'count'), scores=('scores', 'mean'), rating=('rating', 'mean')).reset_index() | |
| df_plot['color'] = np.where(df_plot["scores"]<0, 'red', 'green') | |
| return df_plot | |
| def visualization(df, country, app_name): | |
| fig = make_subplots(rows=3, cols=1, shared_xaxes=True) | |
| fig.update_layout(title='App store reviews for app: {}, country: {}'.format(app_name, country)) | |
| fig.add_trace(go.Bar(x=df.date, y=df.counts), row=1, col=1) | |
| fig.add_trace(go.Bar(x=df.date, y=df.rating), row=2, col=1) | |
| fig.add_trace(go.Bar(x=df.date, y=df.scores, marker_color=df['color']), row=3, col=1) | |
| fig.update_yaxes(title_text="review count", row=1, col=1) | |
| fig.update_yaxes(title_text="rating", row=2, col=1) | |
| fig.update_yaxes(title_text="sentiment", row=3, col=1) | |
| fig.update_layout( | |
| hovermode="x", | |
| showlegend=False | |
| ) | |
| return fig | |
| def appstore_reviews(input: Input) -> PlotlyPlot: | |
| """Scrape reviews from app store, calculate sentiment score, and compared with review count & rating. | |
| """ | |
| # step1. get reviews | |
| reviews = get_appstore_reviews(input.country, input.app_name, how_many=100) | |
| df = pd.DataFrame(reviews) | |
| # step2. pipe reviews | |
| df['scores'] = get_sentiments(df.review.tolist()) | |
| # step3. preprocess | |
| df_plot = process_df(df) | |
| df_plot = df_plot[df_plot.date>'2020'] # filter | |
| # step4. visualization | |
| fig = visualization(df_plot, input.country, input.app_name) | |
| # return | |
| output = PlotlyPlot( | |
| data=fig | |
| ) | |
| return output | |