SE21AppTemplate / app.py
RimAlMoatassime's picture
Update app.py
4d15078 verified
raw
history blame
8.12 kB
# AI-Assisted Code — Academic Integrity Notice
# Generated with The App Builder. ESCP coursework.
# Student must be able to explain all code when asked.
"""Interactive Hugging Face Space for running a notebook on two CSV files."""
from __future__ import annotations
import shutil
import tempfile
import traceback
import zipfile
from pathlib import Path
import gradio as gr
import nbformat
import pandas as pd
from nbclient import NotebookClient
APP_DIR = Path(__file__).resolve().parent
DEFAULT_NOTEBOOK = APP_DIR / "analysis_notebook.ipynb"
DEFAULT_REVIEWS = APP_DIR / "synthetic_book_reviews.csv"
DEFAULT_SALES = APP_DIR / "synthetic_sales_data.csv"
def save_uploaded_file(file_obj, destination: Path) -> None:
"""Copy an uploaded Gradio file to the destination path."""
source = Path(file_obj.name)
shutil.copy(source, destination)
def preview_csv(file_obj, fallback_path: Path):
"""Return a small preview table for the selected CSV file."""
try:
path = Path(file_obj.name) if file_obj else fallback_path
if not path.exists():
return pd.DataFrame({"message": ["No file available. Upload one first."]})
dataframe = pd.read_csv(path)
return dataframe.head(10)
except Exception as error:
return pd.DataFrame({"error": [str(error)]})
def prepare_run_directory(notebook_file, reviews_file, sales_file):
"""Create a temporary run folder and standardise filenames."""
run_dir = Path(tempfile.mkdtemp(prefix="hf_notebook_run_"))
notebook_path = run_dir / "analysis_notebook.ipynb"
reviews_path = run_dir / "synthetic_book_reviews.csv"
sales_path = run_dir / "synthetic_sales_data.csv"
source_notebook = notebook_file if notebook_file else DEFAULT_NOTEBOOK
source_reviews = reviews_file if reviews_file else DEFAULT_REVIEWS
source_sales = sales_file if sales_file else DEFAULT_SALES
for source in [source_notebook, source_reviews, source_sales]:
if not Path(source if isinstance(source, str) else source).exists():
raise FileNotFoundError(
"Missing input file. Upload a notebook and two CSV files, "
"or place default files in the Space root."
)
if notebook_file:
save_uploaded_file(notebook_file, notebook_path)
else:
shutil.copy(source_notebook, notebook_path)
if reviews_file:
save_uploaded_file(reviews_file, reviews_path)
else:
shutil.copy(source_reviews, reviews_path)
if sales_file:
save_uploaded_file(sales_file, sales_path)
else:
shutil.copy(source_sales, sales_path)
return run_dir, notebook_path
def remove_pip_install_cells(notebook_node) -> int:
"""Remove notebook cells that start with !pip to avoid runtime installs."""
cleaned_cells = []
removed_count = 0
for cell in notebook_node.cells:
if cell.cell_type == "code" and cell.source.strip().startswith("!pip"):
removed_count += 1
continue
cleaned_cells.append(cell)
notebook_node.cells = cleaned_cells
return removed_count
def collect_text_outputs(notebook_node) -> str:
"""Collect text outputs from executed notebook cells for display."""
collected = []
for cell in notebook_node.cells:
if cell.cell_type != "code":
continue
for output in cell.get("outputs", []):
if output.output_type == "stream":
collected.append(output.get("text", ""))
elif output.output_type in {"execute_result", "display_data"}:
text_value = output.get("data", {}).get("text/plain")
if text_value:
collected.append(text_value)
elif output.output_type == "error":
collected.append("\n".join(output.get("traceback", [])))
final_text = "\n\n".join(item.strip() for item in collected if item).strip()
return final_text or "Notebook executed. No text output was captured."
def zip_outputs(run_dir: Path) -> str:
"""Create a downloadable ZIP file of run outputs."""
zip_path = run_dir / "run_outputs.zip"
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zip_file:
for file_path in run_dir.rglob("*"):
if file_path.is_file():
zip_file.write(file_path, arcname=file_path.relative_to(run_dir))
return str(zip_path)
def run_analysis(notebook_file, reviews_file, sales_file, use_defaults):
"""Execute the notebook and return logs plus downloadable files."""
try:
if not use_defaults and not all([notebook_file, reviews_file, sales_file]):
raise ValueError(
"Please upload a notebook and both CSV files, or tick "
"'Use bundled default files'."
)
selected_notebook = None if use_defaults else notebook_file
selected_reviews = None if use_defaults else reviews_file
selected_sales = None if use_defaults else sales_file
run_dir, notebook_path = prepare_run_directory(
selected_notebook, selected_reviews, selected_sales
)
with open(notebook_path, "r", encoding="utf-8") as notebook_handle:
notebook_node = nbformat.read(notebook_handle, as_version=4)
removed_count = remove_pip_install_cells(notebook_node)
client = NotebookClient(
notebook_node,
timeout=900,
kernel_name="python3",
resources={"metadata": {"path": str(run_dir)}},
)
client.execute()
executed_notebook = run_dir / "executed_analysis_notebook.ipynb"
with open(executed_notebook, "w", encoding="utf-8") as notebook_handle:
nbformat.write(notebook_node, notebook_handle)
log_lines = [
"Execution successful.",
f"Working directory: {run_dir}",
f"Removed !pip cells: {removed_count}",
]
text_outputs = collect_text_outputs(notebook_node)
zip_file = zip_outputs(run_dir)
return "\n".join(log_lines), text_outputs, str(executed_notebook), zip_file
except Exception:
error_text = traceback.format_exc()
return f"Execution failed.\n\n{error_text}", "", None, None
with gr.Blocks(title="Interactive Notebook Runner") as demo:
gr.Markdown(
"""
# Interactive Notebook Runner
Upload a Jupyter notebook and two CSV files, preview the data, then run
the notebook directly inside the Hugging Face Space.
"""
)
use_defaults = gr.Checkbox(
label="Use bundled default notebook and CSV files",
value=False,
)
with gr.Row():
notebook_input = gr.File(label="Notebook (.ipynb)", file_types=[".ipynb"])
reviews_input = gr.File(label="CSV 1: Book reviews", file_types=[".csv"])
sales_input = gr.File(label="CSV 2: Sales data", file_types=[".csv"])
with gr.Row():
preview_reviews_btn = gr.Button("Preview CSV 1")
preview_sales_btn = gr.Button("Preview CSV 2")
with gr.Row():
preview_reviews = gr.Dataframe(label="Preview: Book reviews")
preview_sales = gr.Dataframe(label="Preview: Sales data")
run_button = gr.Button("Run notebook", variant="primary")
run_log = gr.Textbox(label="Execution log", lines=8)
notebook_output = gr.Textbox(label="Captured notebook text output", lines=14)
executed_notebook = gr.File(label="Download executed notebook")
output_zip = gr.File(label="Download ZIP of outputs")
preview_reviews_btn.click(
fn=lambda uploaded: preview_csv(uploaded, DEFAULT_REVIEWS),
inputs=reviews_input,
outputs=preview_reviews,
)
preview_sales_btn.click(
fn=lambda uploaded: preview_csv(uploaded, DEFAULT_SALES),
inputs=sales_input,
outputs=preview_sales,
)
run_button.click(
fn=run_analysis,
inputs=[notebook_input, reviews_input, sales_input, use_defaults],
outputs=[run_log, notebook_output, executed_notebook, output_zip],
)
demo.launch()