SE21AppTemplate / app.py
RimAlMoatassime's picture
Upload 6 files
a16ca13 verified
raw
history blame
10.8 kB
# AI-Assisted Code — Academic Integrity Notice
# Generated with The App Builder. ESCP coursework.
# Student must be able to explain all code when asked.
"""Interactive Hugging Face Space to execute a Jupyter notebook on uploaded CSV files."""
import io
import re
import json
import shutil
import zipfile
import tempfile
from pathlib import Path
import gradio as gr
import nbformat
import pandas as pd
from nbclient import NotebookClient
from nbclient.exceptions import CellExecutionError
APP_DIR = Path(__file__).resolve().parent
DEFAULT_NOTEBOOK = APP_DIR / "analysis_notebook.ipynb"
DEFAULT_CSVS = [
APP_DIR / "synthetic_book_reviews.csv",
APP_DIR / "synthetic_sales_data.csv",
]
EXPECTED_DATASET_NAMES = [
"synthetic_book_reviews.csv",
"synthetic_sales_data.csv",
]
def _display_name(file_obj):
"""Return a readable filename from a Gradio upload object."""
if file_obj is None:
return ""
if isinstance(file_obj, str):
return Path(file_obj).name
name = getattr(file_obj, "name", "")
return Path(name).name if name else ""
def _resolve_uploaded_path(file_obj):
"""Convert Gradio file input into a local Path."""
if file_obj is None:
return None
if isinstance(file_obj, str):
return Path(file_obj)
file_name = getattr(file_obj, "name", None)
return Path(file_name) if file_name else None
def _preview_csv(file_obj):
"""Load a small preview for the UI."""
file_path = _resolve_uploaded_path(file_obj)
if file_path is None:
return pd.DataFrame({"Info": ["No file uploaded yet."]})
try:
preview_df = pd.read_csv(file_path).head(10)
return preview_df
except Exception as error:
return pd.DataFrame({"Error": [f"Could not preview {file_path.name}: {error}"]})
def preview_datasets(csv_one, csv_two):
"""Return preview tables and a small status message."""
left = _preview_csv(csv_one)
right = _preview_csv(csv_two)
message = (
f"Preview ready. File 1: {_display_name(csv_one) or 'default / missing'} | "
f"File 2: {_display_name(csv_two) or 'default / missing'}"
)
return left, right, message
def _remove_runtime_install_cells(notebook_node):
"""Remove shell install cells because Hugging Face installs from requirements.txt."""
cleaned_cells = []
removed_count = 0
install_pattern = re.compile(r"^\s*!pip\s+install|^\s*%pip\s+install", re.IGNORECASE)
for cell in notebook_node.cells:
if cell.get("cell_type") != "code":
cleaned_cells.append(cell)
continue
source = cell.get("source", "")
if install_pattern.search(source.strip()):
removed_count += 1
continue
cleaned_cells.append(cell)
notebook_node.cells = cleaned_cells
return removed_count
def _prepare_run_directory(notebook_file, csv_one, csv_two):
"""Create a clean temp folder and standardize filenames for the notebook."""
run_dir = Path(tempfile.mkdtemp(prefix="hf_notebook_run_"))
notebook_source = _resolve_uploaded_path(notebook_file) or DEFAULT_NOTEBOOK
if not notebook_source.exists():
raise FileNotFoundError("No notebook found. Upload one or include analysis_notebook.ipynb.")
notebook_target = run_dir / "analysis_notebook.ipynb"
shutil.copy2(notebook_source, notebook_target)
csv_sources = [
_resolve_uploaded_path(csv_one) or DEFAULT_CSVS[0],
_resolve_uploaded_path(csv_two) or DEFAULT_CSVS[1],
]
for source, expected_name in zip(csv_sources, EXPECTED_DATASET_NAMES):
if not Path(source).exists():
raise FileNotFoundError(
f"Missing dataset: {expected_name}. Upload it or keep the bundled default file."
)
shutil.copy2(source, run_dir / expected_name)
return run_dir, notebook_target
def _extract_notebook_outputs(executed_notebook):
"""Collect text and tables from executed notebook cells."""
text_chunks = []
tables = []
for cell in executed_notebook.cells:
if cell.get("cell_type") != "code":
continue
for output in cell.get("outputs", []):
if output.get("output_type") == "stream":
text_chunks.append(output.get("text", ""))
elif output.get("output_type") in {"execute_result", "display_data"}:
data = output.get("data", {})
if "text/plain" in data:
text_chunks.append(str(data["text/plain"]))
if "text/html" in data:
try:
tables.append(pd.read_html(io.StringIO(data["text/html"]))[0])
except Exception:
pass
elif output.get("output_type") == "error":
traceback_text = "\n".join(output.get("traceback", []))
text_chunks.append(traceback_text)
combined_text = "\n\n".join(chunk.strip() for chunk in text_chunks if str(chunk).strip())
if not combined_text:
combined_text = "Notebook executed, but no text output was captured."
return combined_text, tables
def _save_output_bundle(run_dir):
"""Zip everything created during execution for download."""
zip_path = run_dir / "execution_outputs.zip"
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zip_file:
for item in run_dir.rglob("*"):
if item == zip_path:
continue
if item.is_file():
zip_file.write(item, item.relative_to(run_dir))
return zip_path
def run_analysis(notebook_file, csv_one, csv_two):
"""Run the notebook on the selected files and return UI-friendly outputs."""
try:
run_dir, notebook_path = _prepare_run_directory(notebook_file, csv_one, csv_two)
with notebook_path.open("r", encoding="utf-8") as notebook_handle:
notebook_node = nbformat.read(notebook_handle, as_version=4)
removed_cells = _remove_runtime_install_cells(notebook_node)
client = NotebookClient(
notebook_node,
timeout=900,
kernel_name="python3",
resources={"metadata": {"path": str(run_dir)}},
allow_errors=False,
)
client.execute()
executed_notebook_path = run_dir / "executed_analysis_notebook.ipynb"
with executed_notebook_path.open("w", encoding="utf-8") as notebook_handle:
nbformat.write(notebook_node, notebook_handle)
log_text, tables = _extract_notebook_outputs(notebook_node)
output_zip = _save_output_bundle(run_dir)
first_table = tables[0] if tables else pd.DataFrame({"Info": ["No table output detected."]})
second_table = tables[1] if len(tables) > 1 else pd.DataFrame({"Info": ["No second table detected."]})
summary = {
"status": "success",
"removed_install_cells": removed_cells,
"notebook_used": _display_name(notebook_file) or DEFAULT_NOTEBOOK.name,
"dataset_1": _display_name(csv_one) or DEFAULT_CSVS[0].name,
"dataset_2": _display_name(csv_two) or DEFAULT_CSVS[1].name,
"run_directory": str(run_dir),
}
return (
"✅ Execution finished successfully.",
log_text[:15000],
first_table,
second_table,
str(executed_notebook_path),
str(output_zip),
json.dumps(summary, indent=2),
)
except CellExecutionError as error:
return (
"❌ Notebook execution failed.",
str(error),
pd.DataFrame({"Error": ["Notebook cell execution failed. See log above."]}),
pd.DataFrame({"Error": ["No second table because execution stopped early."]}),
None,
None,
json.dumps({"status": "failed", "reason": "CellExecutionError"}, indent=2),
)
except Exception as error:
return (
"❌ App error.",
str(error),
pd.DataFrame({"Error": [f"App failed before completion: {error}"]}),
pd.DataFrame({"Error": ["No second table available."]}),
None,
None,
json.dumps({"status": "failed", "reason": str(error)}, indent=2),
)
CUSTOM_CSS = """
#run-btn {min-height: 52px; font-size: 18px;}
.gradio-container {max-width: 1200px !important;}
"""
with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# Interactive Notebook Runner
Upload a Jupyter notebook and two CSV files, preview the datasets, then run the notebook directly in the Space.
**Default behavior:** if you leave uploads empty, the app uses the bundled class notebook and bundled CSV files.
"""
)
with gr.Row():
notebook_input = gr.File(label="Notebook (.ipynb)", file_types=[".ipynb"])
csv_one_input = gr.File(label="Dataset 1 (.csv)", file_types=[".csv"])
csv_two_input = gr.File(label="Dataset 2 (.csv)", file_types=[".csv"])
with gr.Row():
preview_button = gr.Button("Refresh previews")
run_button = gr.Button("Run notebook", elem_id="run-btn")
preview_status = gr.Textbox(label="Preview status", interactive=False)
with gr.Tab("Dataset previews"):
with gr.Row():
preview_table_one = gr.Dataframe(label="Preview: dataset 1", interactive=False)
preview_table_two = gr.Dataframe(label="Preview: dataset 2", interactive=False)
with gr.Tab("Execution results"):
status_box = gr.Textbox(label="Run status", interactive=False)
execution_log = gr.Textbox(label="Execution log", lines=18, interactive=False)
with gr.Row():
output_table_one = gr.Dataframe(label="Detected output table 1", interactive=False)
output_table_two = gr.Dataframe(label="Detected output table 2", interactive=False)
run_metadata = gr.Code(label="Run metadata", language="json")
with gr.Tab("Downloads"):
executed_notebook_file = gr.File(label="Executed notebook")
execution_zip_file = gr.File(label="ZIP of all outputs")
preview_button.click(
fn=preview_datasets,
inputs=[csv_one_input, csv_two_input],
outputs=[preview_table_one, preview_table_two, preview_status],
)
run_button.click(
fn=run_analysis,
inputs=[notebook_input, csv_one_input, csv_two_input],
outputs=[
status_box,
execution_log,
output_table_one,
output_table_two,
executed_notebook_file,
execution_zip_file,
run_metadata,
],
)
demo.launch()