Spaces:

Bayhaqy
/

Play_Store_Analysis

Sleeping

App Files Files

Bayhaqy commited on May 10, 2025

Commit

d4d0d90

1 Parent(s): 766bf49

Add Gradio app files

Browse files

Files changed (7) hide show

.gradio/certificate.pem +31 -0
.gradio/flagged/dataset1.csv +2 -0
app.py +407 -0
data/app_reviews_1y_ex3.csv +0 -0
model/best_model.pkl +3 -0
model/vectorizer.pkl +3 -0
requirements.txt +11 -0

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

.gradio/flagged/dataset1.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Masukkan Ulasan,Prediksi Model,Prediksi Gemini,timestamp
2	+ test,tidak_puas,netral,2025-05-10 16:46:29.337667

app.py ADDED Viewed

	@@ -0,0 +1,407 @@

+import matplotlib.pyplot as plt
+import plotly.express as px
+import pandas as pd
+import gradio as gr
+from google_play_scraper import Sort, reviews, app
+from datetime import datetime, timedelta
+import io
+import google.generativeai as genai
+import re
+from nltk.corpus import stopwords
+from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
+from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
+import pickle
+import nltk
+nltk.download('stopwords')
+# Tujuan file disimpan
+destination_file_1y_ex3 = 'data/app_reviews_1y_ex3.csv'
+model_file = 'model/best_model.pkl'
+vectorizer_file = 'model/vectorizer.pkl'
+# Global variables to store API key and model name
+api_key = None
+model_name = "gemini-2.0-flash"  # Default model name
+with open(model_file, 'rb') as file:
+    best_model = pickle.load(file)
+with open(vectorizer_file, 'rb') as file:
+    vectorizer = pickle.load(file)
+# Cache stop words
+indonesian_stopwords = stopwords.words('indonesian')
+# Create stemmer
+factory = StemmerFactory()
+stemmer = factory.create_stemmer()
+# Create stop word remover
+stopword_factory = StopWordRemoverFactory()
+stopword_remover = stopword_factory.create_stop_word_remover()
+def preprocess_text(text):
+    # 1. Handle None values
+    if text is None:
+        return ""  # Or any other suitable replacement
+    # Lowercase and remove punctuation & special characters in one step
+    text = re.sub(r'[^\w\s\d]+', '', text.lower())
+    # Remove extra whitespaces
+    text = re.sub(r'\s+', ' ', text).strip()
+    # Stemming and stop word removal using NLTK and list comprehension
+    text = stemmer.stem(text) # Indonesian stemming
+    text = stopword_remover.remove(text) # Indonesian stopword removal
+    words = text.split()
+    words = [word for word in words if word not in indonesian_stopwords] # Remove Indonesian stopwords
+    text = " ".join(words)
+    return text
+def predict_sentiment(text):
+    # Preprocess the input text
+    processed_text = preprocess_text(text)
+    # Transform the text using the loaded vectorizer
+    text_vectorized = vectorizer.transform([processed_text])
+    # Predict the sentiment
+    prediction = best_model.predict(text_vectorized)[0]
+    return prediction
+# Fungsi untuk melakukan labeling dengan gemini api
+def label_sentiment_with_gemini(text, api_key, model_name):
+    """Melakukan labeling sentimen menggunakan Gemini."""
+    prompt = f"""Klasifikasikan sentimen ulasan berikut menjadi: '1.puas', '2.tidak puas', '3.netral'.
+    Perhatikan sarkasme dan sindiran, atau ekspresi negatif/positif halus, serta bahasa yang digunakan.
+    **Ulasan:** {text}
+    **Tampilkan hanya Sentimen**
+    """
+    try:
+        genai.configure(api_key=api_key)  # Konfigurasi Gemini API di dalam fungsi
+        model = genai.GenerativeModel(model_name)
+        response = model.generate_content(prompt)
+        generated_content = response.text.strip().lower()
+        generated_content = re.sub(' ', '', generated_content)
+        if "1.puas" in generated_content:
+            return "puas"
+        elif "2.tidakpuas" in generated_content:
+            return "tidak puas"
+        else:
+            return "netral"
+    except genai.errors.ResourceExhaustedError:
+        print("Error: Rate limit exceeded. Please try again later.")
+        return "netral"  # or another appropriate default value
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return "netral"  # or another appropriate default value
+def predict_and_label(text):
+    try:
+        if not text:  # Check if the text input is empty
+            raise gr.Error("Please enter a review.")  # Raise a Gradio error with a message
+        prediction = predict_sentiment(text)
+        # Konversi np.str_ menjadi str
+        prediction = prediction.item()  # Atau prediction.astype(str)
+        label_gemini = label_sentiment_with_gemini(text, api_key, model_name)
+        return prediction, label_gemini
+    except (ValueError, TypeError, AttributeError) as e:
+        # Catch specific errors related to data types, empty inputs, and unexpected values
+        raise gr.Error(f"Error processing input: {type(e).__name__}. Please check your input.")
+    except genai.errors.ResourceExhaustedError:
+        # Handle rate limit exceeded error
+        raise gr.Error("Error: Rate limit exceeded for Gemini API/You forgot to update API_KEY. Please try again later.")
+    except Exception as e:
+        # Catch any other unexpected errors
+        raise gr.Error(f"An unexpected error occurred: {type(e).__name__}. Please try again later.")
+def update_api_credentials(new_api_key, new_model_name):
+    global api_key, model_name  # Access the global variables
+    api_key = str(new_api_key)
+    model_name = str(new_model_name)
+    #test api and show successfull if connected
+    try:
+        genai.configure(api_key=api_key)  # Konfigurasi Gemini API di dalam fungsi
+        model = genai.GenerativeModel(model_name)
+        response = model.generate_content("Test API Connection.Just say Yes if successfull")
+        generated_content = response.text.strip().lower()
+    except genai.errors.ResourceExhaustedError:
+        print("Error: Rate limit exceeded. Please try again later.")
+        return "Error: Rate limit exceeded. Please try again later."
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return "An unexpected error occurred. Please try again later."
+    print(f"API Key: {api_key}")
+    print(f"Model Name: {model_name}")
+    return generated_content, " API credentials updated successfully!"
+def scrape_and_show_data():
+  try:
+    # List App Packages
+    app_packages = [
+    'id.dana',             #Dana
+    'com.shopeepay.id',    #Shopeepay
+    'com.gojek.gopay',     #Gopay
+    'ovo.id',              #Ovo
+    ]
+    language = 'id'
+    country = 'id'
+    app_reviews = []
+    current_date = datetime.now()
+    one_year_ago = current_date - timedelta(days=365)
+    for ap in app_packages:
+        for score in [1, 2, 3, 4, 5]:  # Ambil semua rating (1-5)
+            rvs, _ = reviews(
+                ap,
+                lang=language,
+                country=country,
+                sort=Sort.NEWEST,  # Hanya ambil ulasan terbaru (newest)
+                count=10,  # Sesuaikan jumlah ulasan yang ingin di-scrape
+                filter_score_with=score
+            )
+            # Filter ulasan untuk satu tahun terakhir
+            for r in rvs:
+                review_date = datetime.strptime(r['at'].strftime("%Y-%m-%d"), "%Y-%m-%d")
+                if review_date >= one_year_ago:
+                    r['sortOrder'] = 'newest'  # Tetapkan sortOrder menjadi 'newest'
+                    r['appId'] = ap
+                    app_reviews.append(r)
+    df = pd.DataFrame(app_reviews)
+    # Buat label (misal: score 4-5 puas, 3 netral, 1-2 nggak puas)
+    def label_sentiment(score):
+        if score >= 4:
+            return 'puas'
+        elif score < 3:
+            return 'tidak_puas'
+        else:
+            return 'netral'
+    df['rating'] = df['score'].apply(label_sentiment)
+    # Load Apps Info
+    app_infos = []
+    for ap in app_packages:
+      info = app(ap, lang=language, country=country)
+      del info['comments']
+      app_infos.append(info)
+    app_infos_df = pd.DataFrame(app_infos)
+    df = pd.merge(df, app_infos_df[['appId', 'title']], on='appId', how='left')
+    df = df.sort_values(by='at', ascending=False).head(10)
+    # predict the data with predict_and_label. The result have 2 list, example is ('puas', 'netral'). Put to dataframe for column predict_model and predict_gemini
+    df['predict_model'], df['predict_gemini'] = zip(*df['content'].apply(predict_and_label))
+    # show only column at rename as date, content, rating, and order desc by date
+    df = df[['title','at', 'content', 'score', 'rating','predict_model','predict_gemini']].rename(columns={'at': 'date'})
+    return df
+  except Exception as e:
+    raise gr.Error(f"Error scraping data: {type(e).__name__}. Please check your app package names and connection.")
+def scrape_and_download_data(app_packages, language, country, sort, score, start_date, end_date, count):
+  try:
+    app_reviews = []
+    # Convert app_packages to a list if it's a string
+    if isinstance(app_packages, str):
+        app_packages = [app_packages]
+    # Convert date strings to datetime objects (if needed)
+    if isinstance(start_date, str):
+        start_date = datetime.strptime(start_date, "%Y-%m-%d")
+    if isinstance(end_date, str):
+        end_date = datetime.strptime(end_date, "%Y-%m-%d")
+    # Scrape data based on criteria
+    for ap in app_packages:
+        for scr in str(score):  # Ambil semua rating (1-5)
+          rvs, _ = reviews(
+              ap,
+              lang=str(language),  # Convert language to string
+              country=str(country),  # Convert country to string
+              sort=Sort.NEWEST if str(sort) == 'NEWEST' else Sort.MOST_RELEVANT,
+              count=int(count),
+              filter_score_with=scr,
+          )
+          # Filter reviews based on date range and other criteria
+          for r in rvs:
+              review_date = datetime.strptime(r['at'].strftime("%Y-%m-%d"), "%Y-%m-%d")
+              if start_date <= review_date <= end_date:  # Date range filter
+                  r['sortOrder'] = sort
+                  r['appId'] = ap
+                  app_reviews.append(r)
+    # Create DataFrame
+    df = pd.DataFrame(app_reviews)
+    # Check if DataFrame is empty
+    if df.empty:
+        # Handle empty DataFrame, e.g., return an empty DataFrame or raise an exception
+        print("DataFrame is empty. No reviews found for the specified criteria.")
+        return df # or: raise ValueError("No reviews found for the specified criteria.")
+    else:
+        # Load Apps Info
+        app_infos = []
+        for ap in app_packages:
+          info = app(ap, lang=language, country=country)
+          del info['comments']
+          app_infos.append(info)
+        app_infos_df = pd.DataFrame(app_infos)
+        df = pd.merge(df, app_infos_df[['appId', 'title']], on='appId', how='left')
+        # Create label if DataFrame is not empty
+        def label_sentiment(score):
+            if score >= 4:
+                return 'puas'
+            elif score < 3:
+                return 'tidak_puas'
+            else:
+                return 'netral'
+    df['rating'] = df['score'].apply(label_sentiment)
+    # show only column title, at, sortOrder, reviewId, userName, userImage, content, score, thumbsUpCount, replyContent, repliedAt, rating
+    df = df[['title','at', 'sortOrder', 'reviewId', 'userName', 'userImage', 'content', 'score', 'thumbsUpCount', 'replyContent', 'repliedAt', 'rating']].rename(columns={'at': 'date'})  # Rename 'at' to 'date
+    df = df.sort_values(by='date', ascending=False)
+    return df
+  except Exception as e:
+      raise gr.Error(f"Error scraping or processing data: {type(e).__name__}. Please check your inputs and connection.")
+# def create_charts():
+#     # 1. Rating Distribution Pie Chart
+#     df = scrape_and_show_data()
+#     rating_counts = df['rating'].value_counts()
+#     # Create the pie chart using Matplotlib
+#     fig_pie, ax_pie = plt.subplots()
+#     ax_pie.pie(rating_counts, labels=rating_counts.index, autopct='%1.1f%%', startangle=90)
+#     ax_pie.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
+#     plt.title("Rating Distribution")
+#     # Convert to gradio plot
+#     rating_pie_chart = gr.Plot(value=fig_pie)  # Using gr.Plot
+#     # 2. Daily Reviews Line Chart
+#     daily_reviews = df.groupby('date').size().reset_index(name='total_reviews')
+#     # Create line chart using Plotly
+#     fig_line = px.line(daily_reviews, x='date', y='total_reviews', title='Total Reviews per Day')
+#     fig_line.update_traces(mode='markers+lines')
+#     # Convert to gradio plot
+#     daily_reviews_chart = gr.Plot(value=fig_line)  # Using gr.Plot
+#     return rating_pie_chart, daily_reviews_chart  # Return both gradio plots
+with gr.Blocks() as apps:
+    with gr.Tabs():
+        with gr.TabItem("Prediction Existing Data"):
+            # Sentiment Prediction section
+            gr.Interface(
+                fn=predict_and_label,
+                inputs=[
+                    gr.Textbox(lines=5, label="Masukkan Ulasan"),
+                ],
+                outputs=[
+                    gr.Textbox(label="Prediksi Model",info="Prediksi Model Sentiment"),
+                    gr.Textbox(label="Prediksi Gemini",info="Prediksi Gemini Sentiment"),
+                ],
+                title="Prediksi Sentimen Ulasan Aplikasi Transportasi",
+                description="Masukkan ulasan Anda untuk memprediksi sentimen (puas, tidak puas).",
+                api_name="prediksi_sentimen"
+            )
+            gr.Interface(
+                fn=scrape_and_show_data,
+                inputs=None,
+                outputs=gr.Dataframe(label="Cleaned Reviews DataFrame",wrap=True),
+                description="Displaying the Latest the Data:",
+                api_name="prediksi_sentimen_latest"
+            )
+            # gr.Interface(
+            #     fn=create_charts,
+            #     inputs=None,
+            #     outputs=[
+            #         gr.Plot(label="Rating Distribution"),
+            #         gr.Plot(label="Daily Reviews"),
+            #     ],
+            #     description="Displaying Charts:",
+            # )
+        with gr.TabItem("Download New Data"):
+            with gr.Column():  # Place input elements in a column
+                app_packages_input = gr.Textbox(label="App Packages (comma-separated)", value="com.gojek.gopay",info="Enter app packages separated by commas")
+                language_input = gr.Textbox(label="Language", value="id", info="Enter language code")
+                country_input = gr.Textbox(label="Country", value="id", info="Enter country code")
+                sort_input = gr.Radio(["NEWEST", "MOST_RELEVANT"], label="Sort Order", value="NEWEST", info="Select sort order")
+                scores_input = gr.CheckboxGroup([1, 2, 3, 4, 5], label="Scores", value=[1, 2, 3, 4, 5], info="Select scores")
+                start_date_input = gr.Textbox(label="Start Date (YYYY-MM-DD)", value=(datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d"),info="Enter start date (YYYY-MM-DD)")
+                end_date_input = gr.Textbox(label="End Date (YYYY-MM-DD)", value=datetime.now().strftime("%Y-%m-%d"),info="Enter end date (YYYY-MM-DD)")
+                count = gr.Textbox(label="Count", value="10",info="Enter count")
+                generate_button = gr.Button("Generate Data")
+                download_button = gr.DownloadButton(label="Download Data")
+            # Place output elements below the input column
+            output_data = gr.Dataframe(label="Scraped Data", wrap=True)
+            generate_button.click(
+                fn=scrape_and_download_data,
+                inputs=[app_packages_input, language_input, country_input, sort_input, scores_input, start_date_input, end_date_input, count],
+                outputs=[output_data],
+                api_name="generate_data"
+            )
+            download_button.click(
+                fn=lambda df: io.StringIO(df.to_csv(index=False)),  # Convert DataFrame to CSV in memory
+                inputs=output_data,
+                outputs=download_button,
+                api_name="download_data"
+            )
+        with gr.TabItem("API Settings"):  # New tab for API settings
+            with gr.Row():
+                api_key_input = gr.Textbox(label="API Key", value="", info="Enter your API key")
+                model_name_input = gr.Textbox(label="Model Name", value="gemini-2.0-flash", info="Enter the model name")
+            update_button = gr.Button("Check and Update API Credentials")
+            update_button.click(
+                fn=update_api_credentials,
+                inputs=[api_key_input, model_name_input],
+                outputs=gr.Textbox(label="Status"),
+                api_name="update_api_credentials"
+            )
+            # information to get API Key on https://aistudio.google.com/app/apikey
+            gr.Markdown("Get API Key on https://aistudio.google.com/app/apikey")
+apps.launch(share=True,debug=True, auth=("admin", "admin"))

data/app_reviews_1y_ex3.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

model/best_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b055a653bcf592c0d1a2b11a45b0200b736b5988ed1020853b3114ffaa03e485
+size 184548

model/vectorizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdd9ff7d4c16e716bee257c28b5b3012dbad7db084897843feea28cbe867d25a
+size 119777

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+pandas
+gradio
+google-play-scraper
+pySastrawi
+google-generativeai
+openpyxl
+nltk
+plotly
+matplotlib
+seaborn
+scikit-learn