Danielleeee commited on
Commit
4501e16
·
verified ·
1 Parent(s): 2e3dfea

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +36 -5
  2. app.py +261 -0
  3. requirements.txt +18 -0
  4. style.css +33 -0
README.md CHANGED
@@ -1,12 +1,43 @@
1
  ---
2
- title: '123'
3
- emoji: 🏢
4
- colorFrom: pink
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 6.9.0
 
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: ESCP Notebook Runner
3
+ emoji: 📊
4
+ colorFrom: indigo
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 6.9.0
8
+ python_version: 3.10.13
9
  app_file: app.py
10
  pinned: false
11
  ---
12
 
13
+ # ESCP Notebook Runner
14
+
15
+ This Hugging Face Space runs one bundled Jupyter notebook on two bundled CSV datasets and shows the outputs in a Gradio dashboard.
16
+
17
+ ## Included files
18
+
19
+ - `app.py` — Gradio app
20
+ - `analysis.ipynb` — bundled notebook
21
+ - `synthetic_book_reviews.csv` — bundled reviews dataset
22
+ - `synthetic_sales_data.csv` — bundled sales dataset
23
+ - `requirements.txt` — Python dependencies
24
+ - `style.css` — clean styling with no background images
25
+ - `artifacts/` — where notebook outputs are saved
26
+ - `runs/` — executed notebook copies
27
+
28
+ ## Why this update was needed
29
+
30
+ The previous version built the image, but the app could still fail at runtime because:
31
+ 1. CSS was passed to `demo.launch(...)` instead of `gr.Blocks(..., css=...)`
32
+ 2. the Space image used a newer Gradio runtime than the app code expected
33
+ 3. Python 3.13 can break notebook/data-science dependencies more often than Python 3.10
34
+
35
+ ## How to use it
36
+
37
+ 1. Create a new Hugging Face **Gradio** Space.
38
+ 2. Upload all files from this folder.
39
+ 3. Wait for the build to finish.
40
+ 4. Open the Space and click **Run Full Pipeline**.
41
+ 5. Open **Dashboard** and click **Refresh Dashboard**.
42
+
43
+ You can leave the three upload fields empty to use the bundled notebook and CSV files.
app.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI-Assisted Code — Academic Integrity Notice
2
+ # Generated with The App Builder. ESCP coursework.
3
+ # Student must be able to explain all code when asked.
4
+
5
+ import shutil
6
+ import time
7
+ import traceback
8
+ from pathlib import Path
9
+
10
+ import gradio as gr
11
+ import pandas as pd
12
+ import papermill as pm
13
+ import plotly.graph_objects as go
14
+
15
+ BASE_DIR = Path(__file__).resolve().parent
16
+ RUNS_DIR = BASE_DIR / "runs"
17
+ ART_DIR = BASE_DIR / "artifacts"
18
+ FIG_DIR = ART_DIR / "py" / "figures"
19
+ TAB_DIR = ART_DIR / "py" / "tables"
20
+
21
+ DEFAULT_NOTEBOOK = BASE_DIR / "analysis.ipynb"
22
+ DEFAULT_REVIEWS = BASE_DIR / "synthetic_book_reviews.csv"
23
+ DEFAULT_SALES = BASE_DIR / "synthetic_sales_data.csv"
24
+
25
+ PAPERMILL_TIMEOUT = 1800
26
+ MAX_PREVIEW_ROWS = 50
27
+
28
+
29
+ def ensure_dirs() -> None:
30
+ """Create folders used by the app."""
31
+ for path in [RUNS_DIR, FIG_DIR, TAB_DIR]:
32
+ path.mkdir(parents=True, exist_ok=True)
33
+
34
+
35
+ def load_css() -> str:
36
+ """Read local CSS once at startup."""
37
+ css_path = BASE_DIR / "style.css"
38
+ return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
39
+
40
+
41
+ def timestamp() -> str:
42
+ return time.strftime("%Y%m%d-%H%M%S")
43
+
44
+
45
+ def copy_input(source_path: str | None, fallback: Path, target: Path) -> None:
46
+ """Copy the uploaded file or reuse the bundled default file."""
47
+ source = Path(source_path) if source_path else fallback
48
+ if not source.exists():
49
+ raise FileNotFoundError(f"Missing required file: {source.name}")
50
+ shutil.copy2(source, target)
51
+
52
+
53
+ def prepare_inputs(notebook_path: str | None, reviews_path: str | None, sales_path: str | None) -> None:
54
+ """Normalize filenames so the notebook can use fixed paths."""
55
+ copy_input(notebook_path, DEFAULT_NOTEBOOK, BASE_DIR / "analysis.ipynb")
56
+ copy_input(reviews_path, DEFAULT_REVIEWS, BASE_DIR / "synthetic_book_reviews.csv")
57
+ copy_input(sales_path, DEFAULT_SALES, BASE_DIR / "synthetic_sales_data.csv")
58
+
59
+
60
+ def run_pipeline(notebook_path: str | None, reviews_path: str | None, sales_path: str | None) -> str:
61
+ """Execute the notebook with papermill and return a readable log."""
62
+ ensure_dirs()
63
+ try:
64
+ prepare_inputs(notebook_path, reviews_path, sales_path)
65
+ output_nb = RUNS_DIR / f"run_{timestamp()}_analysis.ipynb"
66
+ pm.execute_notebook(
67
+ input_path=str(BASE_DIR / "analysis.ipynb"),
68
+ output_path=str(output_nb),
69
+ cwd=str(BASE_DIR),
70
+ log_output=True,
71
+ progress_bar=False,
72
+ request_save_on_cell_execute=True,
73
+ execution_timeout=PAPERMILL_TIMEOUT,
74
+ )
75
+ figures = sorted(p.name for p in FIG_DIR.glob("*") if p.is_file())
76
+ tables = sorted(p.name for p in TAB_DIR.glob("*") if p.is_file())
77
+ return (
78
+ "Pipeline completed successfully.\n\n"
79
+ f"Notebook output: {output_nb.name}\n"
80
+ f"Figures: {', '.join(figures) or '(none)'}\n"
81
+ f"Tables: {', '.join(tables) or '(none)'}"
82
+ )
83
+ except Exception as exc:
84
+ return f"Pipeline failed: {exc}\n\n{traceback.format_exc()[-5000:]}"
85
+
86
+
87
+ def read_json(path: Path) -> dict:
88
+ import json
89
+ with path.open(encoding="utf-8") as file:
90
+ return json.load(file)
91
+
92
+
93
+ def load_table(path: Path) -> pd.DataFrame:
94
+ """Safely preview a CSV or JSON artifact."""
95
+ try:
96
+ if path.suffix.lower() == ".json":
97
+ obj = read_json(path)
98
+ return pd.DataFrame([obj]) if isinstance(obj, dict) else pd.DataFrame(obj)
99
+ return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
100
+ except Exception as exc:
101
+ return pd.DataFrame([{"error": str(exc)}])
102
+
103
+
104
+ def list_tables() -> list[str]:
105
+ return sorted(p.name for p in TAB_DIR.glob("*") if p.suffix.lower() in {".csv", ".json"})
106
+
107
+
108
+ def gallery_items() -> list[tuple[str, str]]:
109
+ return [(str(path), path.stem.replace("_", " ").title()) for path in sorted(FIG_DIR.glob("*.png"))]
110
+
111
+
112
+ def load_kpis() -> dict:
113
+ for candidate in [TAB_DIR / "kpis.json", FIG_DIR / "kpis.json"]:
114
+ if candidate.exists():
115
+ try:
116
+ return read_json(candidate)
117
+ except Exception:
118
+ return {}
119
+ return {}
120
+
121
+
122
+ def kpi_cards_html() -> str:
123
+ """Render compact KPI cards without any background image."""
124
+ kpis = load_kpis()
125
+ if not kpis:
126
+ return '<div class="card-grid"><div class="card"><b>No data yet</b><br>Run the pipeline first.</div></div>'
127
+ config = [
128
+ ("n_titles", "Book Titles"),
129
+ ("n_months", "Time Periods"),
130
+ ("total_units_sold", "Units Sold"),
131
+ ("total_revenue", "Revenue"),
132
+ ]
133
+ cards = []
134
+ for key, label in config:
135
+ if key in kpis:
136
+ value = kpis[key]
137
+ if isinstance(value, (int, float)) and abs(value) >= 100:
138
+ value = f"{value:,.0f}"
139
+ cards.append(f'<div class="card"><div class="label">{label}</div><div class="value">{value}</div></div>')
140
+ return '<div class="card-grid">' + "".join(cards) + "</div>"
141
+
142
+
143
+ def empty_chart(title: str) -> go.Figure:
144
+ fig = go.Figure()
145
+ fig.update_layout(
146
+ title=title,
147
+ template="plotly_white",
148
+ height=420,
149
+ paper_bgcolor="white",
150
+ plot_bgcolor="white",
151
+ annotations=[dict(text="Run the pipeline first", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)],
152
+ )
153
+ return fig
154
+
155
+
156
+ def build_sales_chart() -> go.Figure:
157
+ path = TAB_DIR / "df_dashboard.csv"
158
+ if not path.exists():
159
+ return empty_chart("Monthly Overview")
160
+ df = pd.read_csv(path)
161
+ date_col = next((c for c in df.columns if "month" in c.lower() or "date" in c.lower()), None)
162
+ val_cols = [c for c in df.columns if c != date_col and pd.api.types.is_numeric_dtype(df[c])]
163
+ if not date_col or not val_cols:
164
+ return empty_chart("Monthly Overview")
165
+ df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
166
+ fig = go.Figure()
167
+ for col in val_cols:
168
+ fig.add_trace(go.Scatter(x=df[date_col], y=df[col], mode="lines+markers", name=col.replace("_", " ").title()))
169
+ fig.update_layout(title="Monthly Overview", template="plotly_white", height=450, paper_bgcolor="white", plot_bgcolor="white")
170
+ return fig
171
+
172
+
173
+ def build_sentiment_chart() -> go.Figure:
174
+ path = TAB_DIR / "sentiment_counts_sampled.csv"
175
+ if not path.exists():
176
+ return empty_chart("Sentiment Distribution")
177
+ df = pd.read_csv(path)
178
+ title_col = df.columns[0]
179
+ fig = go.Figure()
180
+ for col in [c for c in ["negative", "neutral", "positive"] if c in df.columns]:
181
+ fig.add_trace(go.Bar(y=df[title_col], x=df[col], orientation="h", name=col.title()))
182
+ fig.update_layout(title="Sentiment Distribution", barmode="stack", template="plotly_white", height=max(420, len(df) * 28), paper_bgcolor="white", plot_bgcolor="white")
183
+ fig.update_yaxes(autorange="reversed")
184
+ return fig
185
+
186
+
187
+ def build_top_sellers_chart() -> go.Figure:
188
+ path = TAB_DIR / "top_titles_by_units_sold.csv"
189
+ if not path.exists():
190
+ return empty_chart("Top Sellers")
191
+ df = pd.read_csv(path).head(15)
192
+ title_col = next((c for c in df.columns if "title" in c.lower()), df.columns[0])
193
+ value_col = next((c for c in df.columns if "unit" in c.lower() or "sold" in c.lower()), df.columns[-1])
194
+ fig = go.Figure(go.Bar(y=df[title_col], x=df[value_col], orientation="h"))
195
+ fig.update_layout(title="Top Sellers", template="plotly_white", height=max(420, len(df) * 28), paper_bgcolor="white", plot_bgcolor="white")
196
+ fig.update_yaxes(autorange="reversed")
197
+ return fig
198
+
199
+
200
+ def refresh_table(choice: str | None) -> pd.DataFrame:
201
+ if not choice:
202
+ return pd.DataFrame([{"hint": "Choose a table first."}])
203
+ return load_table(TAB_DIR / choice)
204
+
205
+
206
+ def refresh_dashboard() -> tuple:
207
+ choices = list_tables()
208
+ selected = choices[0] if choices else None
209
+ table_df = refresh_table(selected) if selected else pd.DataFrame()
210
+ return (
211
+ kpi_cards_html(),
212
+ build_sales_chart(),
213
+ build_sentiment_chart(),
214
+ build_top_sellers_chart(),
215
+ gallery_items(),
216
+ gr.update(choices=choices, value=selected),
217
+ table_df,
218
+ )
219
+
220
+
221
+ ensure_dirs()
222
+
223
+ with gr.Blocks(title="Notebook Runner Space", css=load_css()) as demo:
224
+ gr.Markdown(
225
+ "# ESCP Notebook Runner\n"
226
+ "Run the bundled notebook on the two bundled CSV datasets, or replace them with your own files."
227
+ )
228
+
229
+ with gr.Tab("1. Run Notebook"):
230
+ gr.Markdown(
231
+ "Default project files already included in the Space:\n"
232
+ "- `analysis.ipynb`\n"
233
+ "- `synthetic_book_reviews.csv`\n"
234
+ "- `synthetic_sales_data.csv`\n\n"
235
+ "You can leave all upload fields empty to use the bundled files."
236
+ )
237
+ notebook_file = gr.File(label="Optional notebook (.ipynb)", file_types=[".ipynb"], type="filepath")
238
+ reviews_file = gr.File(label="Optional reviews CSV", file_types=[".csv"], type="filepath")
239
+ sales_file = gr.File(label="Optional sales CSV", file_types=[".csv"], type="filepath")
240
+ run_button = gr.Button("Run Full Pipeline", variant="primary")
241
+ run_log = gr.Textbox(label="Execution Log", lines=18, interactive=False)
242
+ run_button.click(run_pipeline, inputs=[notebook_file, reviews_file, sales_file], outputs=run_log)
243
+
244
+ with gr.Tab("2. Dashboard"):
245
+ kpis = gr.HTML(value=kpi_cards_html())
246
+ refresh_button = gr.Button("Refresh Dashboard", variant="primary")
247
+ chart_sales = gr.Plot(label="Monthly Overview")
248
+ chart_sentiment = gr.Plot(label="Sentiment Distribution")
249
+ chart_top = gr.Plot(label="Top Sellers")
250
+ gallery = gr.Gallery(label="Generated Figures", columns=2, height=420, object_fit="contain")
251
+ table_name = gr.Dropdown(label="Generated Tables", choices=[], interactive=True)
252
+ table_preview = gr.Dataframe(label="Table Preview", interactive=False)
253
+ refresh_button.click(refresh_dashboard, outputs=[kpis, chart_sales, chart_sentiment, chart_top, gallery, table_name, table_preview])
254
+ table_name.change(refresh_table, inputs=table_name, outputs=table_preview)
255
+
256
+ with gr.Tab("3. Project Files"):
257
+ gr.Markdown(
258
+ "The package includes the notebook, the two CSV datasets, `requirements.txt`, `style.css`, and the `artifacts/` folders."
259
+ )
260
+
261
+ demo.launch(allowed_paths=[str(BASE_DIR)])
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==6.9.0
2
+ pandas==2.2.3
3
+ papermill==2.6.0
4
+ plotly==6.0.1
5
+ matplotlib==3.10.1
6
+ seaborn==0.13.2
7
+ numpy==2.2.4
8
+ statsmodels==0.14.4
9
+ vaderSentiment==3.3.2
10
+ textblob==0.19.0
11
+ faker==37.1.0
12
+ transformers==4.49.0
13
+ huggingface_hub==0.30.2
14
+ requests==2.32.3
15
+ nbformat==5.10.4
16
+ nbclient==0.10.2
17
+ ipykernel==6.29.5
18
+ jupyter-client==8.6.3
style.css ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: Arial, sans-serif;
3
+ }
4
+
5
+ .card-grid {
6
+ display: grid;
7
+ grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
8
+ gap: 12px;
9
+ margin-bottom: 16px;
10
+ }
11
+
12
+ .card {
13
+ background: #ffffff;
14
+ border: 1px solid #e5e7eb;
15
+ border-radius: 14px;
16
+ padding: 14px;
17
+ text-align: center;
18
+ box-shadow: 0 1px 4px rgba(0,0,0,0.06);
19
+ }
20
+
21
+ .card .label {
22
+ font-size: 12px;
23
+ color: #6b7280;
24
+ margin-bottom: 6px;
25
+ text-transform: uppercase;
26
+ letter-spacing: 0.04em;
27
+ }
28
+
29
+ .card .value {
30
+ font-size: 22px;
31
+ font-weight: 700;
32
+ color: #111827;
33
+ }