Spaces:

ESCP
/

SE21AppTemplate

Running

App Files Files Community

128

SE21AppTemplate / app.py

RimAlMoatassime

Upload 6 files

a16ca13 verified 15 days ago

raw

history blame

10.8 kB

	# AI-Assisted Code — Academic Integrity Notice
	# Generated with The App Builder. ESCP coursework.
	# Student must be able to explain all code when asked.

	"""Interactive Hugging Face Space to execute a Jupyter notebook on uploaded CSV files."""

	import io
	import re
	import json
	import shutil
	import zipfile
	import tempfile
	from pathlib import Path

	import gradio as gr
	import nbformat
	import pandas as pd
	from nbclient import NotebookClient
	from nbclient.exceptions import CellExecutionError


	APP_DIR = Path(__file__).resolve().parent
	DEFAULT_NOTEBOOK = APP_DIR / "analysis_notebook.ipynb"
	DEFAULT_CSVS = [
	APP_DIR / "synthetic_book_reviews.csv",
	APP_DIR / "synthetic_sales_data.csv",
	]
	EXPECTED_DATASET_NAMES = [
	"synthetic_book_reviews.csv",
	"synthetic_sales_data.csv",
	]


	def _display_name(file_obj):
	"""Return a readable filename from a Gradio upload object."""
	if file_obj is None:
	return ""
	if isinstance(file_obj, str):
	return Path(file_obj).name
	name = getattr(file_obj, "name", "")
	return Path(name).name if name else ""


	def _resolve_uploaded_path(file_obj):
	"""Convert Gradio file input into a local Path."""
	if file_obj is None:
	return None
	if isinstance(file_obj, str):
	return Path(file_obj)
	file_name = getattr(file_obj, "name", None)
	return Path(file_name) if file_name else None


	def _preview_csv(file_obj):
	"""Load a small preview for the UI."""
	file_path = _resolve_uploaded_path(file_obj)
	if file_path is None:
	return pd.DataFrame({"Info": ["No file uploaded yet."]})
	try:
	preview_df = pd.read_csv(file_path).head(10)
	return preview_df
	except Exception as error:
	return pd.DataFrame({"Error": [f"Could not preview {file_path.name}: {error}"]})


	def preview_datasets(csv_one, csv_two):
	"""Return preview tables and a small status message."""
	left = _preview_csv(csv_one)
	right = _preview_csv(csv_two)
	message = (
	f"Preview ready. File 1: {_display_name(csv_one) or 'default / missing'} \| "
	f"File 2: {_display_name(csv_two) or 'default / missing'}"
	)
	return left, right, message


	def _remove_runtime_install_cells(notebook_node):
	"""Remove shell install cells because Hugging Face installs from requirements.txt."""
	cleaned_cells = []
	removed_count = 0
	install_pattern = re.compile(r"^\s!pip\s+install\|^\s%pip\s+install", re.IGNORECASE)

	for cell in notebook_node.cells:
	if cell.get("cell_type") != "code":
	cleaned_cells.append(cell)
	continue
	source = cell.get("source", "")
	if install_pattern.search(source.strip()):
	removed_count += 1
	continue
	cleaned_cells.append(cell)

	notebook_node.cells = cleaned_cells
	return removed_count


	def _prepare_run_directory(notebook_file, csv_one, csv_two):
	"""Create a clean temp folder and standardize filenames for the notebook."""
	run_dir = Path(tempfile.mkdtemp(prefix="hf_notebook_run_"))

	notebook_source = _resolve_uploaded_path(notebook_file) or DEFAULT_NOTEBOOK
	if not notebook_source.exists():
	raise FileNotFoundError("No notebook found. Upload one or include analysis_notebook.ipynb.")

	notebook_target = run_dir / "analysis_notebook.ipynb"
	shutil.copy2(notebook_source, notebook_target)

	csv_sources = [
	_resolve_uploaded_path(csv_one) or DEFAULT_CSVS[0],
	_resolve_uploaded_path(csv_two) or DEFAULT_CSVS[1],
	]

	for source, expected_name in zip(csv_sources, EXPECTED_DATASET_NAMES):
	if not Path(source).exists():
	raise FileNotFoundError(
	f"Missing dataset: {expected_name}. Upload it or keep the bundled default file."
	)
	shutil.copy2(source, run_dir / expected_name)

	return run_dir, notebook_target


	def _extract_notebook_outputs(executed_notebook):
	"""Collect text and tables from executed notebook cells."""
	text_chunks = []
	tables = []

	for cell in executed_notebook.cells:
	if cell.get("cell_type") != "code":
	continue
	for output in cell.get("outputs", []):
	if output.get("output_type") == "stream":
	text_chunks.append(output.get("text", ""))
	elif output.get("output_type") in {"execute_result", "display_data"}:
	data = output.get("data", {})
	if "text/plain" in data:
	text_chunks.append(str(data["text/plain"]))
	if "text/html" in data:
	try:
	tables.append(pd.read_html(io.StringIO(data["text/html"]))[0])
	except Exception:
	pass
	elif output.get("output_type") == "error":
	traceback_text = "\n".join(output.get("traceback", []))
	text_chunks.append(traceback_text)

	combined_text = "\n\n".join(chunk.strip() for chunk in text_chunks if str(chunk).strip())
	if not combined_text:
	combined_text = "Notebook executed, but no text output was captured."

	return combined_text, tables


	def _save_output_bundle(run_dir):
	"""Zip everything created during execution for download."""
	zip_path = run_dir / "execution_outputs.zip"
	with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zip_file:
	for item in run_dir.rglob("*"):
	if item == zip_path:
	continue
	if item.is_file():
	zip_file.write(item, item.relative_to(run_dir))
	return zip_path


	def run_analysis(notebook_file, csv_one, csv_two):
	"""Run the notebook on the selected files and return UI-friendly outputs."""
	try:
	run_dir, notebook_path = _prepare_run_directory(notebook_file, csv_one, csv_two)

	with notebook_path.open("r", encoding="utf-8") as notebook_handle:
	notebook_node = nbformat.read(notebook_handle, as_version=4)

	removed_cells = _remove_runtime_install_cells(notebook_node)

	client = NotebookClient(
	notebook_node,
	timeout=900,
	kernel_name="python3",
	resources={"metadata": {"path": str(run_dir)}},
	allow_errors=False,
	)
	client.execute()

	executed_notebook_path = run_dir / "executed_analysis_notebook.ipynb"
	with executed_notebook_path.open("w", encoding="utf-8") as notebook_handle:
	nbformat.write(notebook_node, notebook_handle)

	log_text, tables = _extract_notebook_outputs(notebook_node)
	output_zip = _save_output_bundle(run_dir)

	first_table = tables[0] if tables else pd.DataFrame({"Info": ["No table output detected."]})
	second_table = tables[1] if len(tables) > 1 else pd.DataFrame({"Info": ["No second table detected."]})

	summary = {
	"status": "success",
	"removed_install_cells": removed_cells,
	"notebook_used": _display_name(notebook_file) or DEFAULT_NOTEBOOK.name,
	"dataset_1": _display_name(csv_one) or DEFAULT_CSVS[0].name,
	"dataset_2": _display_name(csv_two) or DEFAULT_CSVS[1].name,
	"run_directory": str(run_dir),
	}

	return (
	"✅ Execution finished successfully.",
	log_text[:15000],
	first_table,
	second_table,
	str(executed_notebook_path),
	str(output_zip),
	json.dumps(summary, indent=2),
	)

	except CellExecutionError as error:
	return (
	"❌ Notebook execution failed.",
	str(error),
	pd.DataFrame({"Error": ["Notebook cell execution failed. See log above."]}),
	pd.DataFrame({"Error": ["No second table because execution stopped early."]}),
	None,
	None,
	json.dumps({"status": "failed", "reason": "CellExecutionError"}, indent=2),
	)
	except Exception as error:
	return (
	"❌ App error.",
	str(error),
	pd.DataFrame({"Error": [f"App failed before completion: {error}"]}),
	pd.DataFrame({"Error": ["No second table available."]}),
	None,
	None,
	json.dumps({"status": "failed", "reason": str(error)}, indent=2),
	)


	CUSTOM_CSS = """
	#run-btn {min-height: 52px; font-size: 18px;}
	.gradio-container {max-width: 1200px !important;}
	"""

	with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# Interactive Notebook Runner
	Upload a Jupyter notebook and two CSV files, preview the datasets, then run the notebook directly in the Space.

	Default behavior: if you leave uploads empty, the app uses the bundled class notebook and bundled CSV files.
	"""
	)

	with gr.Row():
	notebook_input = gr.File(label="Notebook (.ipynb)", file_types=[".ipynb"])
	csv_one_input = gr.File(label="Dataset 1 (.csv)", file_types=[".csv"])
	csv_two_input = gr.File(label="Dataset 2 (.csv)", file_types=[".csv"])

	with gr.Row():
	preview_button = gr.Button("Refresh previews")
	run_button = gr.Button("Run notebook", elem_id="run-btn")

	preview_status = gr.Textbox(label="Preview status", interactive=False)

	with gr.Tab("Dataset previews"):
	with gr.Row():
	preview_table_one = gr.Dataframe(label="Preview: dataset 1", interactive=False)
	preview_table_two = gr.Dataframe(label="Preview: dataset 2", interactive=False)

	with gr.Tab("Execution results"):
	status_box = gr.Textbox(label="Run status", interactive=False)
	execution_log = gr.Textbox(label="Execution log", lines=18, interactive=False)
	with gr.Row():
	output_table_one = gr.Dataframe(label="Detected output table 1", interactive=False)
	output_table_two = gr.Dataframe(label="Detected output table 2", interactive=False)
	run_metadata = gr.Code(label="Run metadata", language="json")

	with gr.Tab("Downloads"):
	executed_notebook_file = gr.File(label="Executed notebook")
	execution_zip_file = gr.File(label="ZIP of all outputs")

	preview_button.click(
	fn=preview_datasets,
	inputs=[csv_one_input, csv_two_input],
	outputs=[preview_table_one, preview_table_two, preview_status],
	)

	run_button.click(
	fn=run_analysis,
	inputs=[notebook_input, csv_one_input, csv_two_input],
	outputs=[
	status_box,
	execution_log,
	output_table_one,
	output_table_two,
	executed_notebook_file,
	execution_zip_file,
	run_metadata,
	],
	)

	demo.launch()