Spaces:

ktara
/

Session5

Build error

App Files Files Community

ktara commited on Mar 23

Commit

3e30f6d

verified ·

1 Parent(s): ebea030

Upload 5 files

Browse files

Files changed (3) hide show

README.md +13 -2
app.py +145 -57
requirements.txt +1 -2

README.md CHANGED Viewed

@@ -1,12 +1,23 @@
 ---
 title: Book Analytics Dashboard
 emoji: 📊
 sdk: gradio
 app_file: app.py
 ---
 # Book Analytics Dashboard
-Run full analytics on fixed datasets.
-Click "Run Analysis" to generate results.

 ---
 title: Book Analytics Dashboard
 emoji: 📊
+colorFrom: blue
+colorTo: green
 sdk: gradio
 app_file: app.py
+pinned: false
 ---
 # Book Analytics Dashboard
+This Space runs the notebook workflow on two fixed datasets bundled in the repository.
+## Files expected in the repo
+- `synthetic_book_reviews.csv`
+- `synthetic_sales_data.csv`
+## What the app shows
+- Sales trends chart
+- Sentiment distribution chart
+- Pricing decisions table
+- Downloadable export ZIP

app.py CHANGED Viewed

@@ -2,73 +2,161 @@
 # Generated with The App Builder. ESCP coursework.
 # Student must be able to explain all code when asked.
 import gradio as gr
-import pandas as pd
 import matplotlib.pyplot as plt
-from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
-from statsmodels.tsa.arima.model import ARIMA
 def load_data():
-    reviews = pd.read_csv("synthetic_book_reviews.csv")
-    sales = pd.read_csv("synthetic_sales_data.csv")
     return reviews, sales
-def run_sentiment_analysis(df):
-    analyzer = SentimentIntensityAnalyzer()
-    def get_sentiment(text):
-        score = analyzer.polarity_scores(str(text))["compound"]
-        if score >= 0.05:
-            return "Positive"
-        elif score <= -0.05:
-            return "Negative"
-        return "Neutral"
-    df["sentiment"] = df["review"].apply(get_sentiment)
-    return df
-def create_sentiment_plot(df):
-    fig, ax = plt.subplots()
-    df["sentiment"].value_counts().plot(kind="bar", ax=ax)
-    ax.set_title("Sentiment Distribution")
-    filepath = "sentiment_plot.png"
-    fig.savefig(filepath)
     plt.close(fig)
-    return filepath
-def forecast_sales(df):
-    df["date"] = pd.to_datetime(df["date"])
-    df = df.sort_values("date")
-    model = ARIMA(df["sales"], order=(1, 1, 1))
-    model_fit = model.fit()
-    return model_fit.forecast(steps=5)
-def pricing_decision(sentiment_df, forecast):
-    sentiment_score = sentiment_df["sentiment"].value_counts(normalize=True)
-    positive_ratio = sentiment_score.get("Positive", 0)
-    avg_forecast = forecast.mean()
-    decision = "Increase Price" if positive_ratio > 0.6 else "Keep Price"
-    result = pd.DataFrame({
-        "Positive Sentiment Ratio": [positive_ratio],
-        "Avg Forecast Sales": [avg_forecast],
-        "Decision": [decision]
-    })
-    result.to_csv("pricing_decision.csv", index=False)
-    return result
-def run_full_analysis():
     reviews, sales = load_data()
-    reviews = run_sentiment_analysis(reviews)
-    plot_path = create_sentiment_plot(reviews)
-    forecast = forecast_sales(sales)
-    decision_df = pricing_decision(reviews, forecast)
-    return plot_path, decision_df, "pricing_decision.csv"
 with gr.Blocks() as demo:
-    gr.Markdown("# 📊 Automated Book Analytics Dashboard")
-    run_button = gr.Button("Run Analysis")
-    plot_output = gr.Image()
-    table_output = gr.Dataframe()
-    file_output = gr.File()
-    run_button.click(fn=run_full_analysis, inputs=[], outputs=[plot_output, table_output, file_output])
 if __name__ == "__main__":
     demo.launch()

 # Generated with The App Builder. ESCP coursework.
 # Student must be able to explain all code when asked.
+"""Gradio Space that runs the fixed notebook workflow on bundled CSV files."""
+from pathlib import Path
+import shutil
+import warnings
 import gradio as gr
 import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+import statsmodels.api as sm
+from itertools import product
+from zipfile import ZipFile
+DATA_REVIEWS = "synthetic_book_reviews.csv"
+DATA_SALES = "synthetic_sales_data.csv"
+ART_DIR = Path("artifacts")
+FIG_DIR = ART_DIR / "figures"
+TAB_DIR = ART_DIR / "tables"
+def ensure_dirs():
+    """Create output folders used by the app."""
+    FIG_DIR.mkdir(parents=True, exist_ok=True)
+    TAB_DIR.mkdir(parents=True, exist_ok=True)
 def load_data():
+    """Load the two fixed datasets bundled with the Space."""
+    reviews = pd.read_csv(DATA_REVIEWS)
+    sales = pd.read_csv(DATA_SALES)
+    required_reviews = {"title", "review_text", "rating", "popularity_score"}
+    required_sales = {"title", "month", "units_sold"}
+    if not required_reviews.issubset(reviews.columns):
+        raise ValueError(f"Missing review columns: {required_reviews - set(reviews.columns)}")
+    if not required_sales.issubset(sales.columns):
+        raise ValueError(f"Missing sales columns: {required_sales - set(sales.columns)}")
     return reviews, sales
+def build_sample_titles(reviews):
+    """Pick up to 5 titles from each popularity score, like in the notebook."""
+    sampled_titles = []
+    for score in sorted(reviews["popularity_score"].dropna().unique()):
+        titles = reviews.loc[reviews["popularity_score"] == score, "title"].dropna().unique().tolist()
+        sampled_titles.extend(titles[:5])
+    return sampled_titles
+def save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles):
+    """Create the sampled sales trend figure."""
+    popularity_colors = {1: "darkred", 2: "orangered", 3: "gold", 4: "mediumseagreen", 5: "royalblue"}
+    fig, ax = plt.subplots(figsize=(14, 6))
+    for title in sampled_titles:
+        row = sampled_books[sampled_books["title"] == title].iloc[0]
+        subset = sampled_sales[sampled_sales["title"] == title]
+        ax.plot(subset["month"], subset["units_sold"], label=title,
+                color=popularity_colors.get(row["popularity_score"], "gray"))
+    ax.set_title("Sales Trends Over Time")
+    ax.set_xlabel("Month")
+    ax.set_ylabel("Units Sold")
+    ax.tick_params(axis="x", rotation=45)
+    ax.grid(True, alpha=0.3)
+    ax.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize="small")
+    fig.tight_layout()
+    out = FIG_DIR / "sales_trends_sampled_titles.png"
+    fig.savefig(out, dpi=150, bbox_inches="tight")
+    plt.close(fig)
+    return str(out)
+def save_sentiment_chart(sampled_reviews):
+    """Create the stacked sentiment distribution chart."""
+    sampled_reviews = sampled_reviews.copy()
+    sampled_reviews["grouped_title"] = sampled_reviews["rating"].astype(str) + "★ | " + sampled_reviews["title"]
+    counts = sampled_reviews.groupby(["grouped_title", "sentiment_label"]).size().unstack(fill_value=0)
+    counts = counts.reindex(columns=["negative", "neutral", "positive"], fill_value=0)
+    counts.reset_index().to_csv(TAB_DIR / "sentiment_counts_sampled.csv", index=False)
+    fig, ax = plt.subplots(figsize=(12, 12))
+    counts.plot.barh(stacked=True, ax=ax, color={"negative": "royalblue", "neutral": "lightgray", "positive": "crimson"})
+    ax.set_title("Sentiment Distribution in Reviews")
+    ax.set_xlabel("Number of Reviews")
+    ax.set_ylabel("Book Title")
+    ax.grid(axis="x", linestyle="--", alpha=0.4)
+    fig.tight_layout()
+    out = FIG_DIR / "sentiment_distribution_sampled_titles.png"
+    fig.savefig(out, dpi=150, bbox_inches="tight")
     plt.close(fig)
+    return str(out)
+def pricing_action(row):
+    """Apply the exact notebook decision rules."""
+    if row["avg_units_sold"] >= 120 and row.get("positive_ratio", 0) >= 0.6:
+        return "increase price"
+    if row["avg_units_sold"] <= 60 and row.get("negative_ratio", 0) >= 0.4:
+        return "decrease price"
+    return "keep price"
+def save_decision_table(reviews, sales):
+    """Compute and save the final pricing decision table."""
+    avg_sales = sales.groupby("title", as_index=False)["units_sold"].mean().rename(columns={"units_sold": "avg_units_sold"})
+    sentiment = reviews.groupby(["title", "sentiment_label"]).size().unstack(fill_value=0)
+    sentiment["total"] = sentiment.sum(axis=1)
+    sentiment["positive_ratio"] = sentiment.get("positive", 0) / sentiment["total"]
+    sentiment["negative_ratio"] = sentiment.get("negative", 0) / sentiment["total"]
+    decisions = avg_sales.merge(sentiment, on="title", how="left").fillna(0)
+    decisions["pricing_action"] = decisions.apply(pricing_action, axis=1)
+    final_cols = ["title", "avg_units_sold", "positive_ratio", "negative_ratio", "pricing_action"]
+    final_df = decisions[final_cols].sort_values("title").reset_index(drop=True)
+    final_df.to_csv(TAB_DIR / "pricing_decisions.csv", index=False)
+    return final_df
+def save_dashboard_export(sales):
+    """Save a monthly dashboard export like the notebook fallback."""
+    dashboard = sales.groupby("month", as_index=False).agg(total_units_sold=("units_sold", "sum")).sort_values("month")
+    dashboard.to_csv(TAB_DIR / "df_dashboard.csv", index=False)
+def bundle_exports():
+    """Zip all generated export files for easy download."""
+    zip_path = ART_DIR / "exports.zip"
+    with ZipFile(zip_path, "w") as zf:
+        for path in list(FIG_DIR.glob("*")) + list(TAB_DIR.glob("*")):
+            zf.write(path, arcname=path.relative_to(ART_DIR))
+    return str(zip_path)
+def run_analysis():
+    """Run the complete pipeline and return final outputs only."""
+    ensure_dirs()
     reviews, sales = load_data()
+    sales["month"] = pd.to_datetime(sales["month"])
+    sampled_titles = build_sample_titles(reviews)
+    sampled_sales = sales[sales["title"].isin(sampled_titles)].copy()
+    sampled_reviews = reviews[reviews["title"].isin(sampled_titles)].copy()
+    sampled_books = reviews[reviews["title"].isin(sampled_titles)].copy()
+    chart_1 = save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles)
+    chart_2 = save_sentiment_chart(sampled_reviews)
+    decision_df = save_decision_table(reviews, sales)
+    save_dashboard_export(sales)
+    export_zip = bundle_exports()
+    return chart_1, chart_2, decision_df, export_zip
 with gr.Blocks() as demo:
+    gr.Markdown("# Book Analytics Dashboard")
+    gr.Markdown("Runs the fixed notebook workflow on the bundled review and sales datasets.")
+    run_btn = gr.Button("Run analysis")
+    sales_chart = gr.Image(label="Sales trends")
+    sentiment_chart = gr.Image(label="Sentiment distribution")
+    decision_table = gr.Dataframe(label="Pricing decisions")
+    exports = gr.File(label="Download all exports")
+    run_btn.click(fn=run_analysis, inputs=None, outputs=[sales_chart, sentiment_chart, decision_table, exports])
 if __name__ == "__main__":
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
-gradio==4.36.1
 pandas==2.2.2
 matplotlib==3.8.4
-vaderSentiment==3.3.2
 statsmodels==0.14.2

 pandas==2.2.2
 matplotlib==3.8.4
+seaborn==0.13.2
 statsmodels==0.14.2