Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| import nltk | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.naive_bayes import MultinomialNB | |
| from nltk.corpus import stopwords | |
| from app_store_scraper import AppStore | |
| from google_play_scraper import Sort, reviews as google_reviews | |
| from bokeh.plotting import figure | |
| from bokeh.models import ColumnDataSource | |
| # Download NLTK data | |
| nltk.download('stopwords') | |
| stop_words = set(stopwords.words('english')) | |
| # Title for the app | |
| st.title("AppInsights") | |
| # Country selection | |
| country_options = { | |
| "All": "all", | |
| "India": "in", | |
| "China": "cn", | |
| "Singapore": "sg", | |
| "Hong Kong": "hk", | |
| "Malaysia": "my", | |
| "UAE": "ae", | |
| "Thailand": "th" | |
| } | |
| platform_options = { | |
| "iOS": "app_store", | |
| "Android": "google_play" | |
| } | |
| selected_platform = st.selectbox("Select a Platform", list(platform_options.keys())) | |
| selected_country = st.selectbox("Select a Country", list(country_options.keys())) | |
| # Initialize reviews list | |
| reviews = [] | |
| # Fetch reviews | |
| if selected_platform == "iOS": | |
| if selected_country == "All": | |
| for country_code in country_options.values(): | |
| if country_code != "all": | |
| app = AppStore(country=country_code, app_name="Straight2Bank", app_id=1169270682) | |
| app.review(how_many=1000) # Fetch reviews | |
| if app.reviews: # Check if reviews are fetched | |
| reviews += app.reviews | |
| else: | |
| app = AppStore(country=country_options[selected_country], app_name="Straight2Bank", app_id=1169270682) | |
| app.review(how_many=2000) | |
| reviews = app.reviews if app.reviews else [] | |
| elif selected_platform == "Android": | |
| if selected_country == "all": | |
| reviews, _ = google_reviews("com.sc.s2b.ng.mobile", sort=Sort.NEWEST, count=1000) | |
| else: | |
| reviews, _ = google_reviews("com.sc.s2b.ng.mobile", lang=country_options[selected_country], sort=Sort.NEWEST, count=1000) | |
| # Convert reviews into a DataFrame only if there are reviews | |
| if reviews: | |
| if selected_platform == "iOS": | |
| df = pd.DataFrame(reviews) | |
| #st.write(df) | |
| # Check if 'date' is available, if not create a default one | |
| #if 'date' not in df.columns: | |
| # df['date'] = pd.Timestamp.now() # or you can set it to a specific date if needed | |
| else: # Android | |
| df = pd.DataFrame(reviews, columns=["reviewId", "userName", "userImage", "content", "score", | |
| "thumbsUpCount", "reviewCreatedVersion", "at", "replyContent", | |
| "repliedAt", "appVersion"]) | |
| # Convert 'at' to datetime | |
| df['date'] = pd.to_datetime(df['at']) | |
| # Function to clean text | |
| def clean_text(text): | |
| text = text.lower() | |
| text = ''.join([char for char in text if char.isalnum() or char.isspace()]) | |
| text = ' '.join([word for word in text.split() if word not in stop_words]) | |
| return text | |
| # Use the appropriate content column for cleaning and analysis | |
| content_col = 'content' if selected_platform == "Android" else 'review' | |
| rating_col = 'score' if selected_platform == "Android" else 'rating' | |
| # Apply the clean function to the review content | |
| df['cleaned_content'] = df[content_col].apply(clean_text) | |
| # Define the sentiment based on ratings | |
| df['sentiment'] = df[rating_col].apply(lambda x: 1 if x >= 4 else 0) | |
| # Split data into features and labels | |
| X = df['cleaned_content'] | |
| y = df['sentiment'] | |
| # Vectorize the text data | |
| vectorizer = TfidfVectorizer(max_features=1000) | |
| X_vectorized = vectorizer.fit_transform(X) | |
| # Split the data into training and test sets | |
| X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.3, random_state=42) | |
| # Train a Naive Bayes classifier | |
| classifier = MultinomialNB() | |
| classifier.fit(X_train, y_train) | |
| # Top 5 positive and negative feedback | |
| top_positive = df[df['sentiment'] == 1].nlargest(5, rating_col)[[content_col, rating_col]] | |
| top_negative = df[df['sentiment'] == 0].nsmallest(5, rating_col)[[content_col, rating_col]] | |
| st.subheader("Top 5 Positive Feedback") | |
| st.write(top_positive) | |
| st.subheader("Top 5 Negative Feedback") | |
| st.write(top_negative) | |
| # Trend Analysis for the last 6 months | |
| if 'date' in df.columns: | |
| df['date'] = pd.to_datetime(df['date']) | |
| # Filter for the last 6 months | |
| six_months_ago = pd.Timestamp.now() - pd.DateOffset(months=6) | |
| df = df[df['date'] >= six_months_ago] | |
| # Create a month-year column for grouping | |
| df['month'] = df['date'].dt.to_period('M').astype(str) | |
| # Count positive and negative reviews per month | |
| trend_data = df.groupby(['month', 'sentiment']).size().unstack(fill_value=0).reset_index() | |
| # Prepare data for Bokeh | |
| source = ColumnDataSource(data={ | |
| 'month': trend_data['month'], | |
| 'positive': trend_data.get(1, [0]*len(trend_data)), | |
| 'negative': trend_data.get(0, [0]*len(trend_data)), | |
| }) | |
| # Create a Bokeh plot | |
| p = figure(title="Review Trend Over Time", x_axis_label='Month', y_axis_label='Number of Reviews', | |
| x_range=trend_data['month'].tolist(), plot_height=400) | |
| # Plotting lines for positive and negative reviews | |
| p.line(x='month', y='positive', source=source, line_width=2, color='green', legend_label='Positive Reviews', line_dash='solid') | |
| p.line(x='month', y='negative', source=source, line_width=2, color='red', legend_label='Negative Reviews', line_dash='solid') | |
| # Formatting x-axis to show months | |
| p.xaxis.major_label_orientation = "vertical" | |
| # Show Bokeh plot in Streamlit | |
| st.bokeh_chart(p) | |
| else: | |
| st.write("No date column found in the reviews.") | |
| else: | |
| st.write("No reviews found.") |