Spaces:

Danielleeee
/

123

Build error

App Files Files Community

123 / app.py

Danielleeee

Upload 4 files

4501e16 verified 22 days ago

raw

history blame contribute delete

10.3 kB

	# AI-Assisted Code — Academic Integrity Notice
	# Generated with The App Builder. ESCP coursework.
	# Student must be able to explain all code when asked.

	import shutil
	import time
	import traceback
	from pathlib import Path

	import gradio as gr
	import pandas as pd
	import papermill as pm
	import plotly.graph_objects as go

	BASE_DIR = Path(__file__).resolve().parent
	RUNS_DIR = BASE_DIR / "runs"
	ART_DIR = BASE_DIR / "artifacts"
	FIG_DIR = ART_DIR / "py" / "figures"
	TAB_DIR = ART_DIR / "py" / "tables"

	DEFAULT_NOTEBOOK = BASE_DIR / "analysis.ipynb"
	DEFAULT_REVIEWS = BASE_DIR / "synthetic_book_reviews.csv"
	DEFAULT_SALES = BASE_DIR / "synthetic_sales_data.csv"

	PAPERMILL_TIMEOUT = 1800
	MAX_PREVIEW_ROWS = 50


	def ensure_dirs() -> None:
	"""Create folders used by the app."""
	for path in [RUNS_DIR, FIG_DIR, TAB_DIR]:
	path.mkdir(parents=True, exist_ok=True)


	def load_css() -> str:
	"""Read local CSS once at startup."""
	css_path = BASE_DIR / "style.css"
	return css_path.read_text(encoding="utf-8") if css_path.exists() else ""


	def timestamp() -> str:
	return time.strftime("%Y%m%d-%H%M%S")


	def copy_input(source_path: str \| None, fallback: Path, target: Path) -> None:
	"""Copy the uploaded file or reuse the bundled default file."""
	source = Path(source_path) if source_path else fallback
	if not source.exists():
	raise FileNotFoundError(f"Missing required file: {source.name}")
	shutil.copy2(source, target)


	def prepare_inputs(notebook_path: str \| None, reviews_path: str \| None, sales_path: str \| None) -> None:
	"""Normalize filenames so the notebook can use fixed paths."""
	copy_input(notebook_path, DEFAULT_NOTEBOOK, BASE_DIR / "analysis.ipynb")
	copy_input(reviews_path, DEFAULT_REVIEWS, BASE_DIR / "synthetic_book_reviews.csv")
	copy_input(sales_path, DEFAULT_SALES, BASE_DIR / "synthetic_sales_data.csv")


	def run_pipeline(notebook_path: str \| None, reviews_path: str \| None, sales_path: str \| None) -> str:
	"""Execute the notebook with papermill and return a readable log."""
	ensure_dirs()
	try:
	prepare_inputs(notebook_path, reviews_path, sales_path)
	output_nb = RUNS_DIR / f"run_{timestamp()}_analysis.ipynb"
	pm.execute_notebook(
	input_path=str(BASE_DIR / "analysis.ipynb"),
	output_path=str(output_nb),
	cwd=str(BASE_DIR),
	log_output=True,
	progress_bar=False,
	request_save_on_cell_execute=True,
	execution_timeout=PAPERMILL_TIMEOUT,
	)
	figures = sorted(p.name for p in FIG_DIR.glob("*") if p.is_file())
	tables = sorted(p.name for p in TAB_DIR.glob("*") if p.is_file())
	return (
	"Pipeline completed successfully.\n\n"
	f"Notebook output: {output_nb.name}\n"
	f"Figures: {', '.join(figures) or '(none)'}\n"
	f"Tables: {', '.join(tables) or '(none)'}"
	)
	except Exception as exc:
	return f"Pipeline failed: {exc}\n\n{traceback.format_exc()[-5000:]}"


	def read_json(path: Path) -> dict:
	import json
	with path.open(encoding="utf-8") as file:
	return json.load(file)


	def load_table(path: Path) -> pd.DataFrame:
	"""Safely preview a CSV or JSON artifact."""
	try:
	if path.suffix.lower() == ".json":
	obj = read_json(path)
	return pd.DataFrame([obj]) if isinstance(obj, dict) else pd.DataFrame(obj)
	return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
	except Exception as exc:
	return pd.DataFrame([{"error": str(exc)}])


	def list_tables() -> list[str]:
	return sorted(p.name for p in TAB_DIR.glob("*") if p.suffix.lower() in {".csv", ".json"})


	def gallery_items() -> list[tuple[str, str]]:
	return [(str(path), path.stem.replace("_", " ").title()) for path in sorted(FIG_DIR.glob("*.png"))]


	def load_kpis() -> dict:
	for candidate in [TAB_DIR / "kpis.json", FIG_DIR / "kpis.json"]:
	if candidate.exists():
	try:
	return read_json(candidate)
	except Exception:
	return {}
	return {}


	def kpi_cards_html() -> str:
	"""Render compact KPI cards without any background image."""
	kpis = load_kpis()
	if not kpis:
	return '<div class="card-grid"><div class="card"><b>No data yet</b><br>Run the pipeline first.</div></div>'
	config = [
	("n_titles", "Book Titles"),
	("n_months", "Time Periods"),
	("total_units_sold", "Units Sold"),
	("total_revenue", "Revenue"),
	]
	cards = []
	for key, label in config:
	if key in kpis:
	value = kpis[key]
	if isinstance(value, (int, float)) and abs(value) >= 100:
	value = f"{value:,.0f}"
	cards.append(f'<div class="card"><div class="label">{label}</div><div class="value">{value}</div></div>')
	return '<div class="card-grid">' + "".join(cards) + "</div>"


	def empty_chart(title: str) -> go.Figure:
	fig = go.Figure()
	fig.update_layout(
	title=title,
	template="plotly_white",
	height=420,
	paper_bgcolor="white",
	plot_bgcolor="white",
	annotations=[dict(text="Run the pipeline first", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)],
	)
	return fig


	def build_sales_chart() -> go.Figure:
	path = TAB_DIR / "df_dashboard.csv"
	if not path.exists():
	return empty_chart("Monthly Overview")
	df = pd.read_csv(path)
	date_col = next((c for c in df.columns if "month" in c.lower() or "date" in c.lower()), None)
	val_cols = [c for c in df.columns if c != date_col and pd.api.types.is_numeric_dtype(df[c])]
	if not date_col or not val_cols:
	return empty_chart("Monthly Overview")
	df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
	fig = go.Figure()
	for col in val_cols:
	fig.add_trace(go.Scatter(x=df[date_col], y=df[col], mode="lines+markers", name=col.replace("_", " ").title()))
	fig.update_layout(title="Monthly Overview", template="plotly_white", height=450, paper_bgcolor="white", plot_bgcolor="white")
	return fig


	def build_sentiment_chart() -> go.Figure:
	path = TAB_DIR / "sentiment_counts_sampled.csv"
	if not path.exists():
	return empty_chart("Sentiment Distribution")
	df = pd.read_csv(path)
	title_col = df.columns[0]
	fig = go.Figure()
	for col in [c for c in ["negative", "neutral", "positive"] if c in df.columns]:
	fig.add_trace(go.Bar(y=df[title_col], x=df[col], orientation="h", name=col.title()))
	fig.update_layout(title="Sentiment Distribution", barmode="stack", template="plotly_white", height=max(420, len(df) * 28), paper_bgcolor="white", plot_bgcolor="white")
	fig.update_yaxes(autorange="reversed")
	return fig


	def build_top_sellers_chart() -> go.Figure:
	path = TAB_DIR / "top_titles_by_units_sold.csv"
	if not path.exists():
	return empty_chart("Top Sellers")
	df = pd.read_csv(path).head(15)
	title_col = next((c for c in df.columns if "title" in c.lower()), df.columns[0])
	value_col = next((c for c in df.columns if "unit" in c.lower() or "sold" in c.lower()), df.columns[-1])
	fig = go.Figure(go.Bar(y=df[title_col], x=df[value_col], orientation="h"))
	fig.update_layout(title="Top Sellers", template="plotly_white", height=max(420, len(df) * 28), paper_bgcolor="white", plot_bgcolor="white")
	fig.update_yaxes(autorange="reversed")
	return fig


	def refresh_table(choice: str \| None) -> pd.DataFrame:
	if not choice:
	return pd.DataFrame([{"hint": "Choose a table first."}])
	return load_table(TAB_DIR / choice)


	def refresh_dashboard() -> tuple:
	choices = list_tables()
	selected = choices[0] if choices else None
	table_df = refresh_table(selected) if selected else pd.DataFrame()
	return (
	kpi_cards_html(),
	build_sales_chart(),
	build_sentiment_chart(),
	build_top_sellers_chart(),
	gallery_items(),
	gr.update(choices=choices, value=selected),
	table_df,
	)


	ensure_dirs()

	with gr.Blocks(title="Notebook Runner Space", css=load_css()) as demo:
	gr.Markdown(
	"# ESCP Notebook Runner\n"
	"Run the bundled notebook on the two bundled CSV datasets, or replace them with your own files."
	)

	with gr.Tab("1. Run Notebook"):
	gr.Markdown(
	"Default project files already included in the Space:\n"
	"- `analysis.ipynb`\n"
	"- `synthetic_book_reviews.csv`\n"
	"- `synthetic_sales_data.csv`\n\n"
	"You can leave all upload fields empty to use the bundled files."
	)
	notebook_file = gr.File(label="Optional notebook (.ipynb)", file_types=[".ipynb"], type="filepath")
	reviews_file = gr.File(label="Optional reviews CSV", file_types=[".csv"], type="filepath")
	sales_file = gr.File(label="Optional sales CSV", file_types=[".csv"], type="filepath")
	run_button = gr.Button("Run Full Pipeline", variant="primary")
	run_log = gr.Textbox(label="Execution Log", lines=18, interactive=False)
	run_button.click(run_pipeline, inputs=[notebook_file, reviews_file, sales_file], outputs=run_log)

	with gr.Tab("2. Dashboard"):
	kpis = gr.HTML(value=kpi_cards_html())
	refresh_button = gr.Button("Refresh Dashboard", variant="primary")
	chart_sales = gr.Plot(label="Monthly Overview")
	chart_sentiment = gr.Plot(label="Sentiment Distribution")
	chart_top = gr.Plot(label="Top Sellers")
	gallery = gr.Gallery(label="Generated Figures", columns=2, height=420, object_fit="contain")
	table_name = gr.Dropdown(label="Generated Tables", choices=[], interactive=True)
	table_preview = gr.Dataframe(label="Table Preview", interactive=False)
	refresh_button.click(refresh_dashboard, outputs=[kpis, chart_sales, chart_sentiment, chart_top, gallery, table_name, table_preview])
	table_name.change(refresh_table, inputs=table_name, outputs=table_preview)

	with gr.Tab("3. Project Files"):
	gr.Markdown(
	"The package includes the notebook, the two CSV datasets, `requirements.txt`, `style.css`, and the `artifacts/` folders."
	)

	demo.launch(allowed_paths=[str(BASE_DIR)])