Spaces:

weanalyze
/

appstore_reviews

Build error

App Files Files Community

appstore_reviews / app.py

jiandong

Upload with huggingface_hub

043f774 almost 3 years ago

raw

history blame contribute delete

3.4 kB

	from typing import Literal
	from pydantic import BaseModel, Field
	from app_store_scraper import AppStore
	from transformers import pipeline
	import pandas as pd
	import numpy as np
	from plotly.subplots import make_subplots
	import plotly.graph_objects as go
	from workcell.integrations.types import PlotlyPlot


	COUNTRIES = Literal["us", "cn", "uk", "jp"]
	APPS = Literal["slack", "tiktok", "wechat", "tesla", "facebook", "instagram", "twitter"]

	class Input(BaseModel):
	country: COUNTRIES
	app_name: APPS


	def get_appstore_reviews(country, app_name, how_many=100):
	"""Get reviews from app store.
	country: str, "cn", "us", "uk", "jp"
	app_name: str, "twitter", "facebook", "instagram", "tesla"
	how_many: int, 100
	"""
	appstore = AppStore(
	country=country,
	app_name=app_name
	)
	# fetch reviews from app store
	appstore.review(how_many=how_many)
	reviews = appstore.reviews
	return reviews


	def get_sentiments(data):
	"""Return sentiment scores given text list.
	data = ["I love you", "I hate you"]
	scores = [{'label': 'POSITIVE', 'score': 0.9998}, {'label': 'NEGATIVE', 'score': 0.9991}]
	"""
	sentiment_pipeline = pipeline("sentiment-analysis")
	# custom pipline
	# sentiment_pipeline = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
	scores = sentiment_pipeline(data)
	return scores


	def process_score(data):
	if data['label'] == 'NEGATIVE':
	score = -1 * data['score']
	else:
	score = data['score']
	return score


	def process_df(df):
	"""
	process review dataframe into plot format
	"""
	df['scores'] = df['scores'].apply(process_score)
	df = df.sort_values(by='date', ascending=True).reset_index(drop=True)
	df_plot = df[['date','scores','rating']].groupby(pd.Grouper(key='date', freq='W')).agg(counts=('rating', 'count'), scores=('scores', 'mean'), rating=('rating', 'mean')).reset_index()
	df_plot['color'] = np.where(df_plot["scores"]<0, 'red', 'green')
	return df_plot


	def visualization(df, country, app_name):
	fig = make_subplots(rows=3, cols=1, shared_xaxes=True)
	fig.update_layout(title='App store reviews for app: {}, country: {}'.format(app_name, country))
	fig.add_trace(go.Bar(x=df.date, y=df.counts), row=1, col=1)
	fig.add_trace(go.Bar(x=df.date, y=df.rating), row=2, col=1)
	fig.add_trace(go.Bar(x=df.date, y=df.scores, marker_color=df['color']), row=3, col=1)
	fig.update_yaxes(title_text="review count", row=1, col=1)
	fig.update_yaxes(title_text="rating", row=2, col=1)
	fig.update_yaxes(title_text="sentiment", row=3, col=1)
	fig.update_layout(
	hovermode="x",
	showlegend=False
	)
	return fig


	def appstore_reviews(input: Input) -> PlotlyPlot:
	"""Scrape reviews from app store, calculate sentiment score, and compared with review count & rating.
	"""
	# step1. get reviews
	reviews = get_appstore_reviews(input.country, input.app_name, how_many=100)
	df = pd.DataFrame(reviews)
	# step2. pipe reviews
	df['scores'] = get_sentiments(df.review.tolist())
	# step3. preprocess
	df_plot = process_df(df)
	df_plot = df_plot[df_plot.date>'2020'] # filter
	# step4. visualization
	fig = visualization(df_plot, input.country, input.app_name)
	# return
	output = PlotlyPlot(
	data=fig
	)
	return output