ktara commited on
Commit
3e30f6d
·
verified ·
1 Parent(s): ebea030

Upload 5 files

Browse files
Files changed (3) hide show
  1. README.md +13 -2
  2. app.py +145 -57
  3. requirements.txt +1 -2
README.md CHANGED
@@ -1,12 +1,23 @@
1
  ---
2
  title: Book Analytics Dashboard
3
  emoji: 📊
 
 
4
  sdk: gradio
5
  app_file: app.py
 
6
  ---
7
 
8
  # Book Analytics Dashboard
9
 
10
- Run full analytics on fixed datasets.
11
 
12
- Click "Run Analysis" to generate results.
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Book Analytics Dashboard
3
  emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
  app_file: app.py
8
+ pinned: false
9
  ---
10
 
11
  # Book Analytics Dashboard
12
 
13
+ This Space runs the notebook workflow on two fixed datasets bundled in the repository.
14
 
15
+ ## Files expected in the repo
16
+ - `synthetic_book_reviews.csv`
17
+ - `synthetic_sales_data.csv`
18
+
19
+ ## What the app shows
20
+ - Sales trends chart
21
+ - Sentiment distribution chart
22
+ - Pricing decisions table
23
+ - Downloadable export ZIP
app.py CHANGED
@@ -2,73 +2,161 @@
2
  # Generated with The App Builder. ESCP coursework.
3
  # Student must be able to explain all code when asked.
4
 
 
 
 
 
 
5
  import gradio as gr
6
- import pandas as pd
7
  import matplotlib.pyplot as plt
8
- from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
9
- from statsmodels.tsa.arima.model import ARIMA
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def load_data():
12
- reviews = pd.read_csv("synthetic_book_reviews.csv")
13
- sales = pd.read_csv("synthetic_sales_data.csv")
 
 
 
 
 
 
 
14
  return reviews, sales
15
 
16
- def run_sentiment_analysis(df):
17
- analyzer = SentimentIntensityAnalyzer()
18
- def get_sentiment(text):
19
- score = analyzer.polarity_scores(str(text))["compound"]
20
- if score >= 0.05:
21
- return "Positive"
22
- elif score <= -0.05:
23
- return "Negative"
24
- return "Neutral"
25
- df["sentiment"] = df["review"].apply(get_sentiment)
26
- return df
27
-
28
- def create_sentiment_plot(df):
29
- fig, ax = plt.subplots()
30
- df["sentiment"].value_counts().plot(kind="bar", ax=ax)
31
- ax.set_title("Sentiment Distribution")
32
- filepath = "sentiment_plot.png"
33
- fig.savefig(filepath)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  plt.close(fig)
35
- return filepath
36
-
37
- def forecast_sales(df):
38
- df["date"] = pd.to_datetime(df["date"])
39
- df = df.sort_values("date")
40
- model = ARIMA(df["sales"], order=(1, 1, 1))
41
- model_fit = model.fit()
42
- return model_fit.forecast(steps=5)
43
-
44
- def pricing_decision(sentiment_df, forecast):
45
- sentiment_score = sentiment_df["sentiment"].value_counts(normalize=True)
46
- positive_ratio = sentiment_score.get("Positive", 0)
47
- avg_forecast = forecast.mean()
48
- decision = "Increase Price" if positive_ratio > 0.6 else "Keep Price"
49
- result = pd.DataFrame({
50
- "Positive Sentiment Ratio": [positive_ratio],
51
- "Avg Forecast Sales": [avg_forecast],
52
- "Decision": [decision]
53
- })
54
- result.to_csv("pricing_decision.csv", index=False)
55
- return result
56
-
57
- def run_full_analysis():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  reviews, sales = load_data()
59
- reviews = run_sentiment_analysis(reviews)
60
- plot_path = create_sentiment_plot(reviews)
61
- forecast = forecast_sales(sales)
62
- decision_df = pricing_decision(reviews, forecast)
63
- return plot_path, decision_df, "pricing_decision.csv"
 
 
 
 
 
 
 
64
 
65
  with gr.Blocks() as demo:
66
- gr.Markdown("# 📊 Automated Book Analytics Dashboard")
67
- run_button = gr.Button("Run Analysis")
68
- plot_output = gr.Image()
69
- table_output = gr.Dataframe()
70
- file_output = gr.File()
71
- run_button.click(fn=run_full_analysis, inputs=[], outputs=[plot_output, table_output, file_output])
 
 
72
 
73
  if __name__ == "__main__":
74
  demo.launch()
 
2
  # Generated with The App Builder. ESCP coursework.
3
  # Student must be able to explain all code when asked.
4
 
5
+ """Gradio Space that runs the fixed notebook workflow on bundled CSV files."""
6
+
7
+ from pathlib import Path
8
+ import shutil
9
+ import warnings
10
  import gradio as gr
 
11
  import matplotlib.pyplot as plt
12
+ import pandas as pd
13
+ import seaborn as sns
14
+ import statsmodels.api as sm
15
+ from itertools import product
16
+ from zipfile import ZipFile
17
+
18
+ DATA_REVIEWS = "synthetic_book_reviews.csv"
19
+ DATA_SALES = "synthetic_sales_data.csv"
20
+ ART_DIR = Path("artifacts")
21
+ FIG_DIR = ART_DIR / "figures"
22
+ TAB_DIR = ART_DIR / "tables"
23
+
24
+
25
+ def ensure_dirs():
26
+ """Create output folders used by the app."""
27
+ FIG_DIR.mkdir(parents=True, exist_ok=True)
28
+ TAB_DIR.mkdir(parents=True, exist_ok=True)
29
+
30
 
31
  def load_data():
32
+ """Load the two fixed datasets bundled with the Space."""
33
+ reviews = pd.read_csv(DATA_REVIEWS)
34
+ sales = pd.read_csv(DATA_SALES)
35
+ required_reviews = {"title", "review_text", "rating", "popularity_score"}
36
+ required_sales = {"title", "month", "units_sold"}
37
+ if not required_reviews.issubset(reviews.columns):
38
+ raise ValueError(f"Missing review columns: {required_reviews - set(reviews.columns)}")
39
+ if not required_sales.issubset(sales.columns):
40
+ raise ValueError(f"Missing sales columns: {required_sales - set(sales.columns)}")
41
  return reviews, sales
42
 
43
+
44
+ def build_sample_titles(reviews):
45
+ """Pick up to 5 titles from each popularity score, like in the notebook."""
46
+ sampled_titles = []
47
+ for score in sorted(reviews["popularity_score"].dropna().unique()):
48
+ titles = reviews.loc[reviews["popularity_score"] == score, "title"].dropna().unique().tolist()
49
+ sampled_titles.extend(titles[:5])
50
+ return sampled_titles
51
+
52
+
53
+ def save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles):
54
+ """Create the sampled sales trend figure."""
55
+ popularity_colors = {1: "darkred", 2: "orangered", 3: "gold", 4: "mediumseagreen", 5: "royalblue"}
56
+ fig, ax = plt.subplots(figsize=(14, 6))
57
+ for title in sampled_titles:
58
+ row = sampled_books[sampled_books["title"] == title].iloc[0]
59
+ subset = sampled_sales[sampled_sales["title"] == title]
60
+ ax.plot(subset["month"], subset["units_sold"], label=title,
61
+ color=popularity_colors.get(row["popularity_score"], "gray"))
62
+ ax.set_title("Sales Trends Over Time")
63
+ ax.set_xlabel("Month")
64
+ ax.set_ylabel("Units Sold")
65
+ ax.tick_params(axis="x", rotation=45)
66
+ ax.grid(True, alpha=0.3)
67
+ ax.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize="small")
68
+ fig.tight_layout()
69
+ out = FIG_DIR / "sales_trends_sampled_titles.png"
70
+ fig.savefig(out, dpi=150, bbox_inches="tight")
71
+ plt.close(fig)
72
+ return str(out)
73
+
74
+
75
+ def save_sentiment_chart(sampled_reviews):
76
+ """Create the stacked sentiment distribution chart."""
77
+ sampled_reviews = sampled_reviews.copy()
78
+ sampled_reviews["grouped_title"] = sampled_reviews["rating"].astype(str) + "★ | " + sampled_reviews["title"]
79
+ counts = sampled_reviews.groupby(["grouped_title", "sentiment_label"]).size().unstack(fill_value=0)
80
+ counts = counts.reindex(columns=["negative", "neutral", "positive"], fill_value=0)
81
+ counts.reset_index().to_csv(TAB_DIR / "sentiment_counts_sampled.csv", index=False)
82
+ fig, ax = plt.subplots(figsize=(12, 12))
83
+ counts.plot.barh(stacked=True, ax=ax, color={"negative": "royalblue", "neutral": "lightgray", "positive": "crimson"})
84
+ ax.set_title("Sentiment Distribution in Reviews")
85
+ ax.set_xlabel("Number of Reviews")
86
+ ax.set_ylabel("Book Title")
87
+ ax.grid(axis="x", linestyle="--", alpha=0.4)
88
+ fig.tight_layout()
89
+ out = FIG_DIR / "sentiment_distribution_sampled_titles.png"
90
+ fig.savefig(out, dpi=150, bbox_inches="tight")
91
  plt.close(fig)
92
+ return str(out)
93
+
94
+
95
+ def pricing_action(row):
96
+ """Apply the exact notebook decision rules."""
97
+ if row["avg_units_sold"] >= 120 and row.get("positive_ratio", 0) >= 0.6:
98
+ return "increase price"
99
+ if row["avg_units_sold"] <= 60 and row.get("negative_ratio", 0) >= 0.4:
100
+ return "decrease price"
101
+ return "keep price"
102
+
103
+
104
+ def save_decision_table(reviews, sales):
105
+ """Compute and save the final pricing decision table."""
106
+ avg_sales = sales.groupby("title", as_index=False)["units_sold"].mean().rename(columns={"units_sold": "avg_units_sold"})
107
+ sentiment = reviews.groupby(["title", "sentiment_label"]).size().unstack(fill_value=0)
108
+ sentiment["total"] = sentiment.sum(axis=1)
109
+ sentiment["positive_ratio"] = sentiment.get("positive", 0) / sentiment["total"]
110
+ sentiment["negative_ratio"] = sentiment.get("negative", 0) / sentiment["total"]
111
+ decisions = avg_sales.merge(sentiment, on="title", how="left").fillna(0)
112
+ decisions["pricing_action"] = decisions.apply(pricing_action, axis=1)
113
+ final_cols = ["title", "avg_units_sold", "positive_ratio", "negative_ratio", "pricing_action"]
114
+ final_df = decisions[final_cols].sort_values("title").reset_index(drop=True)
115
+ final_df.to_csv(TAB_DIR / "pricing_decisions.csv", index=False)
116
+ return final_df
117
+
118
+
119
+ def save_dashboard_export(sales):
120
+ """Save a monthly dashboard export like the notebook fallback."""
121
+ dashboard = sales.groupby("month", as_index=False).agg(total_units_sold=("units_sold", "sum")).sort_values("month")
122
+ dashboard.to_csv(TAB_DIR / "df_dashboard.csv", index=False)
123
+
124
+
125
+ def bundle_exports():
126
+ """Zip all generated export files for easy download."""
127
+ zip_path = ART_DIR / "exports.zip"
128
+ with ZipFile(zip_path, "w") as zf:
129
+ for path in list(FIG_DIR.glob("*")) + list(TAB_DIR.glob("*")):
130
+ zf.write(path, arcname=path.relative_to(ART_DIR))
131
+ return str(zip_path)
132
+
133
+
134
+ def run_analysis():
135
+ """Run the complete pipeline and return final outputs only."""
136
+ ensure_dirs()
137
  reviews, sales = load_data()
138
+ sales["month"] = pd.to_datetime(sales["month"])
139
+ sampled_titles = build_sample_titles(reviews)
140
+ sampled_sales = sales[sales["title"].isin(sampled_titles)].copy()
141
+ sampled_reviews = reviews[reviews["title"].isin(sampled_titles)].copy()
142
+ sampled_books = reviews[reviews["title"].isin(sampled_titles)].copy()
143
+ chart_1 = save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles)
144
+ chart_2 = save_sentiment_chart(sampled_reviews)
145
+ decision_df = save_decision_table(reviews, sales)
146
+ save_dashboard_export(sales)
147
+ export_zip = bundle_exports()
148
+ return chart_1, chart_2, decision_df, export_zip
149
+
150
 
151
  with gr.Blocks() as demo:
152
+ gr.Markdown("# Book Analytics Dashboard")
153
+ gr.Markdown("Runs the fixed notebook workflow on the bundled review and sales datasets.")
154
+ run_btn = gr.Button("Run analysis")
155
+ sales_chart = gr.Image(label="Sales trends")
156
+ sentiment_chart = gr.Image(label="Sentiment distribution")
157
+ decision_table = gr.Dataframe(label="Pricing decisions")
158
+ exports = gr.File(label="Download all exports")
159
+ run_btn.click(fn=run_analysis, inputs=None, outputs=[sales_chart, sentiment_chart, decision_table, exports])
160
 
161
  if __name__ == "__main__":
162
  demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
- gradio==4.36.1
2
  pandas==2.2.2
3
  matplotlib==3.8.4
4
- vaderSentiment==3.3.2
5
  statsmodels==0.14.2
 
 
1
  pandas==2.2.2
2
  matplotlib==3.8.4
3
+ seaborn==0.13.2
4
  statsmodels==0.14.2