Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| import os | |
| import shutil | |
| import tempfile | |
| import traceback | |
| import zipfile | |
| from pathlib import Path | |
| from typing import List, Tuple | |
| import gradio as gr | |
| import nbformat | |
| import pandas as pd | |
| from nbclient import NotebookClient | |
| ROOT = Path(__file__).resolve().parent | |
| DEFAULT_NOTEBOOK = ROOT / "2a_Python_Analysis_Charlotte_Gers.ipynb" | |
| DEFAULT_REVIEWS = ROOT / "synthetic_book_reviews.csv" | |
| DEFAULT_SALES = ROOT / "synthetic_sales_data.csv" | |
| def _safe_copy(src: Path, dst: Path) -> Path: | |
| target = dst / src.name | |
| shutil.copy2(src, target) | |
| return target | |
| def _find_files(workdir: Path) -> dict: | |
| figures = sorted((workdir / "artifacts" / "py" / "figures").glob("*.png")) | |
| tables = sorted((workdir / "artifacts" / "py" / "tables").glob("*.csv")) | |
| json_files = sorted((workdir / "artifacts" / "py" / "figures").glob("*.json")) | |
| return { | |
| "figures": figures, | |
| "tables": tables, | |
| "json": json_files, | |
| } | |
| def _zip_results(workdir: Path) -> Path: | |
| zip_path = workdir / "hf_space_outputs.zip" | |
| with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: | |
| for path in workdir.rglob("*"): | |
| if path.is_file() and path.name != zip_path.name: | |
| zf.write(path, arcname=path.relative_to(workdir)) | |
| return zip_path | |
| def _read_csv_preview(csv_path: Path, rows: int = 15) -> pd.DataFrame: | |
| return pd.read_csv(csv_path).head(rows) | |
| def _run_notebook(notebook_path: Path, workdir: Path) -> Tuple[Path, List[str]]: | |
| with notebook_path.open("r", encoding="utf-8") as f: | |
| nb = nbformat.read(f, as_version=4) | |
| for cell in nb.cells: | |
| if cell.cell_type == "code" and cell.source.lstrip().startswith("!pip install"): | |
| cell.source = 'print("Skipping notebook package install cell because dependencies are handled by requirements.txt")' | |
| client = NotebookClient( | |
| nb, | |
| timeout=600, | |
| kernel_name="python3", | |
| allow_errors=False, | |
| resources={"metadata": {"path": str(workdir)}}, | |
| ) | |
| client.execute() | |
| executed_path = workdir / f"executed_{notebook_path.name}" | |
| with executed_path.open("w", encoding="utf-8") as f: | |
| nbformat.write(nb, f) | |
| logs = [] | |
| for idx, cell in enumerate(nb.cells): | |
| if cell.cell_type != "code": | |
| continue | |
| for output in cell.get("outputs", []): | |
| if output.get("output_type") == "stream": | |
| text = output.get("text", "").strip() | |
| if text: | |
| logs.append(f"Cell {idx}: {text}") | |
| return executed_path, logs | |
| def execute_pipeline(use_default_files, notebook_file, reviews_file, sales_file): | |
| temp_root = Path(tempfile.mkdtemp(prefix="hf_space_run_")) | |
| try: | |
| if use_default_files: | |
| if not (DEFAULT_NOTEBOOK.exists() and DEFAULT_REVIEWS.exists() and DEFAULT_SALES.exists()): | |
| raise FileNotFoundError( | |
| "Bundled default files are missing. Upload the notebook and both CSV files, or add them to the Space repository root." | |
| ) | |
| notebook_src = DEFAULT_NOTEBOOK | |
| reviews_src = DEFAULT_REVIEWS | |
| sales_src = DEFAULT_SALES | |
| else: | |
| if notebook_file is None or reviews_file is None or sales_file is None: | |
| raise ValueError("Please upload the notebook file and both CSV files.") | |
| notebook_src = Path(notebook_file) | |
| reviews_src = Path(reviews_file) | |
| sales_src = Path(sales_file) | |
| notebook_local = _safe_copy(notebook_src, temp_root) | |
| _safe_copy(reviews_src, temp_root) | |
| _safe_copy(sales_src, temp_root) | |
| executed_path, logs = _run_notebook(notebook_local, temp_root) | |
| found = _find_files(temp_root) | |
| zip_path = _zip_results(temp_root) | |
| gallery = [(str(p), p.name) for p in found["figures"]] | |
| table_choices = [p.name for p in found["tables"]] | |
| first_table = _read_csv_preview(found["tables"][0]) if found["tables"] else pd.DataFrame() | |
| summary_lines = [ | |
| "Execution completed successfully.", | |
| f"Notebook: {notebook_local.name}", | |
| f"Figures generated: {len(found['figures'])}", | |
| f"Tables generated: {len(found['tables'])}", | |
| f"JSON artifacts: {len(found['json'])}", | |
| ] | |
| if logs: | |
| summary_lines.append("\nExecution log highlights:") | |
| summary_lines.extend(logs[:20]) | |
| json_text = "" | |
| if found["json"]: | |
| json_text = "\n\nKPI JSON:\n" + found["json"][0].read_text(encoding="utf-8") | |
| all_downloads = [str(executed_path), str(zip_path)] + [str(p) for p in found["tables"]] + [str(p) for p in found["json"]] | |
| return ( | |
| "\n".join(summary_lines) + json_text, | |
| gallery, | |
| gr.update(choices=table_choices, value=table_choices[0] if table_choices else None), | |
| first_table, | |
| all_downloads, | |
| ) | |
| except Exception: | |
| error = traceback.format_exc() | |
| return ( | |
| f"Execution failed.\n\n{error}", | |
| [], | |
| gr.update(choices=[], value=None), | |
| pd.DataFrame(), | |
| [], | |
| ) | |
| def load_selected_table(table_name, use_default_files, notebook_file, reviews_file, sales_file): | |
| if not table_name: | |
| return pd.DataFrame() | |
| candidate_roots = sorted(Path(tempfile.gettempdir()).glob("hf_space_run_*"), key=os.path.getmtime, reverse=True) | |
| for root in candidate_roots: | |
| candidate = root / "artifacts" / "py" / "tables" / table_name | |
| if candidate.exists(): | |
| return _read_csv_preview(candidate) | |
| return pd.DataFrame() | |
| with gr.Blocks(title="Notebook Runner for Book Analytics") as demo: | |
| gr.Markdown( | |
| """ | |
| # Notebook Runner for Book Analytics | |
| Upload a Jupyter notebook and two CSV files, or run the bundled defaults. | |
| The app executes the notebook, collects exported figures and tables, and returns a ZIP with all results. | |
| """ | |
| ) | |
| with gr.Row(): | |
| use_default_files = gr.Checkbox(value=True, label="Use bundled notebook and CSV files") | |
| with gr.Row(): | |
| notebook_file = gr.File(label="Notebook (.ipynb)", file_count="single", type="filepath") | |
| reviews_file = gr.File(label="Reviews CSV", file_count="single", type="filepath") | |
| sales_file = gr.File(label="Sales CSV", file_count="single", type="filepath") | |
| run_btn = gr.Button("Run notebook", variant="primary") | |
| status_box = gr.Textbox(label="Run status", lines=18) | |
| gallery = gr.Gallery(label="Generated figures", columns=1, height="auto") | |
| table_selector = gr.Dropdown(label="Preview generated table") | |
| table_preview = gr.Dataframe(label="Generated table preview") | |
| downloads = gr.Files(label="Download outputs") | |
| run_btn.click( | |
| fn=execute_pipeline, | |
| inputs=[use_default_files, notebook_file, reviews_file, sales_file], | |
| outputs=[status_box, gallery, table_selector, table_preview, downloads], | |
| ) | |
| table_selector.change( | |
| fn=load_selected_table, | |
| inputs=[table_selector, use_default_files, notebook_file, reviews_file, sales_file], | |
| outputs=table_preview, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |