Spaces:

MukeshKapoor25
/

MobileAppInsights

Build error

App Files Files Community

MobileAppInsights / app.py

MukeshKapoor25

Update app.py

d656893 verified about 1 year ago

raw

history blame contribute delete

5.96 kB

	import streamlit as st
	import pandas as pd
	import nltk
	from sklearn.model_selection import train_test_split
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.naive_bayes import MultinomialNB
	from nltk.corpus import stopwords
	from app_store_scraper import AppStore
	from google_play_scraper import Sort, reviews as google_reviews
	from bokeh.plotting import figure
	from bokeh.models import ColumnDataSource

	# Download NLTK data
	nltk.download('stopwords')
	stop_words = set(stopwords.words('english'))

	# Title for the app
	st.title("AppInsights")

	# Country selection
	country_options = {
	"All": "all",
	"India": "in",
	"China": "cn",
	"Singapore": "sg",
	"Hong Kong": "hk",
	"Malaysia": "my",
	"UAE": "ae",
	"Thailand": "th"
	}

	platform_options = {
	"iOS": "app_store",
	"Android": "google_play"
	}

	selected_platform = st.selectbox("Select a Platform", list(platform_options.keys()))
	selected_country = st.selectbox("Select a Country", list(country_options.keys()))

	# Initialize reviews list
	reviews = []

	# Fetch reviews
	if selected_platform == "iOS":
	if selected_country == "All":
	for country_code in country_options.values():
	if country_code != "all":
	app = AppStore(country=country_code, app_name="Straight2Bank", app_id=1169270682)
	app.review(how_many=1000) # Fetch reviews
	if app.reviews: # Check if reviews are fetched
	reviews += app.reviews
	else:
	app = AppStore(country=country_options[selected_country], app_name="Straight2Bank", app_id=1169270682)
	app.review(how_many=2000)
	reviews = app.reviews if app.reviews else []

	elif selected_platform == "Android":
	if selected_country == "all":
	reviews, _ = google_reviews("com.sc.s2b.ng.mobile", sort=Sort.NEWEST, count=1000)
	else:
	reviews, _ = google_reviews("com.sc.s2b.ng.mobile", lang=country_options[selected_country], sort=Sort.NEWEST, count=1000)

	# Convert reviews into a DataFrame only if there are reviews
	if reviews:
	if selected_platform == "iOS":
	df = pd.DataFrame(reviews)
	#st.write(df)
	# Check if 'date' is available, if not create a default one
	#if 'date' not in df.columns:
	# df['date'] = pd.Timestamp.now() # or you can set it to a specific date if needed
	else: # Android
	df = pd.DataFrame(reviews, columns=["reviewId", "userName", "userImage", "content", "score",
	"thumbsUpCount", "reviewCreatedVersion", "at", "replyContent",
	"repliedAt", "appVersion"])
	# Convert 'at' to datetime
	df['date'] = pd.to_datetime(df['at'])

	# Function to clean text
	def clean_text(text):
	text = text.lower()
	text = ''.join([char for char in text if char.isalnum() or char.isspace()])
	text = ' '.join([word for word in text.split() if word not in stop_words])
	return text

	# Use the appropriate content column for cleaning and analysis
	content_col = 'content' if selected_platform == "Android" else 'review'
	rating_col = 'score' if selected_platform == "Android" else 'rating'

	# Apply the clean function to the review content
	df['cleaned_content'] = df[content_col].apply(clean_text)

	# Define the sentiment based on ratings
	df['sentiment'] = df[rating_col].apply(lambda x: 1 if x >= 4 else 0)

	# Split data into features and labels
	X = df['cleaned_content']
	y = df['sentiment']

	# Vectorize the text data
	vectorizer = TfidfVectorizer(max_features=1000)
	X_vectorized = vectorizer.fit_transform(X)

	# Split the data into training and test sets
	X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.3, random_state=42)

	# Train a Naive Bayes classifier
	classifier = MultinomialNB()
	classifier.fit(X_train, y_train)

	# Top 5 positive and negative feedback
	top_positive = df[df['sentiment'] == 1].nlargest(5, rating_col)[[content_col, rating_col]]
	top_negative = df[df['sentiment'] == 0].nsmallest(5, rating_col)[[content_col, rating_col]]

	st.subheader("Top 5 Positive Feedback")
	st.write(top_positive)

	st.subheader("Top 5 Negative Feedback")
	st.write(top_negative)

	# Trend Analysis for the last 6 months
	if 'date' in df.columns:
	df['date'] = pd.to_datetime(df['date'])

	# Filter for the last 6 months
	six_months_ago = pd.Timestamp.now() - pd.DateOffset(months=6)
	df = df[df['date'] >= six_months_ago]

	# Create a month-year column for grouping
	df['month'] = df['date'].dt.to_period('M').astype(str)

	# Count positive and negative reviews per month
	trend_data = df.groupby(['month', 'sentiment']).size().unstack(fill_value=0).reset_index()

	# Prepare data for Bokeh
	source = ColumnDataSource(data={
	'month': trend_data['month'],
	'positive': trend_data.get(1, [0]*len(trend_data)),
	'negative': trend_data.get(0, [0]*len(trend_data)),
	})

	# Create a Bokeh plot
	p = figure(title="Review Trend Over Time", x_axis_label='Month', y_axis_label='Number of Reviews',
	x_range=trend_data['month'].tolist(), plot_height=400)

	# Plotting lines for positive and negative reviews
	p.line(x='month', y='positive', source=source, line_width=2, color='green', legend_label='Positive Reviews', line_dash='solid')
	p.line(x='month', y='negative', source=source, line_width=2, color='red', legend_label='Negative Reviews', line_dash='solid')

	# Formatting x-axis to show months
	p.xaxis.major_label_orientation = "vertical"

	# Show Bokeh plot in Streamlit
	st.bokeh_chart(p)
	else:
	st.write("No date column found in the reviews.")
	else:
	st.write("No reviews found.")