Spaces:
Running
Running
| # AI-Assisted Code — Academic Integrity Notice | |
| # Generated with The App Builder. ESCP coursework. | |
| # Student must be able to explain all code when asked. | |
| """Interactive Hugging Face Space to execute a Jupyter notebook on uploaded CSV files.""" | |
| import io | |
| import re | |
| import json | |
| import shutil | |
| import zipfile | |
| import tempfile | |
| from pathlib import Path | |
| import gradio as gr | |
| import nbformat | |
| import pandas as pd | |
| from nbclient import NotebookClient | |
| from nbclient.exceptions import CellExecutionError | |
| APP_DIR = Path(__file__).resolve().parent | |
| DEFAULT_NOTEBOOK = APP_DIR / "analysis_notebook.ipynb" | |
| DEFAULT_CSVS = [ | |
| APP_DIR / "synthetic_book_reviews.csv", | |
| APP_DIR / "synthetic_sales_data.csv", | |
| ] | |
| EXPECTED_DATASET_NAMES = [ | |
| "synthetic_book_reviews.csv", | |
| "synthetic_sales_data.csv", | |
| ] | |
| def _display_name(file_obj): | |
| """Return a readable filename from a Gradio upload object.""" | |
| if file_obj is None: | |
| return "" | |
| if isinstance(file_obj, str): | |
| return Path(file_obj).name | |
| name = getattr(file_obj, "name", "") | |
| return Path(name).name if name else "" | |
| def _resolve_uploaded_path(file_obj): | |
| """Convert Gradio file input into a local Path.""" | |
| if file_obj is None: | |
| return None | |
| if isinstance(file_obj, str): | |
| return Path(file_obj) | |
| file_name = getattr(file_obj, "name", None) | |
| return Path(file_name) if file_name else None | |
| def _preview_csv(file_obj): | |
| """Load a small preview for the UI.""" | |
| file_path = _resolve_uploaded_path(file_obj) | |
| if file_path is None: | |
| return pd.DataFrame({"Info": ["No file uploaded yet."]}) | |
| try: | |
| preview_df = pd.read_csv(file_path).head(10) | |
| return preview_df | |
| except Exception as error: | |
| return pd.DataFrame({"Error": [f"Could not preview {file_path.name}: {error}"]}) | |
| def preview_datasets(csv_one, csv_two): | |
| """Return preview tables and a small status message.""" | |
| left = _preview_csv(csv_one) | |
| right = _preview_csv(csv_two) | |
| message = ( | |
| f"Preview ready. File 1: {_display_name(csv_one) or 'default / missing'} | " | |
| f"File 2: {_display_name(csv_two) or 'default / missing'}" | |
| ) | |
| return left, right, message | |
| def _remove_runtime_install_cells(notebook_node): | |
| """Remove shell install cells because Hugging Face installs from requirements.txt.""" | |
| cleaned_cells = [] | |
| removed_count = 0 | |
| install_pattern = re.compile(r"^\s*!pip\s+install|^\s*%pip\s+install", re.IGNORECASE) | |
| for cell in notebook_node.cells: | |
| if cell.get("cell_type") != "code": | |
| cleaned_cells.append(cell) | |
| continue | |
| source = cell.get("source", "") | |
| if install_pattern.search(source.strip()): | |
| removed_count += 1 | |
| continue | |
| cleaned_cells.append(cell) | |
| notebook_node.cells = cleaned_cells | |
| return removed_count | |
| def _prepare_run_directory(notebook_file, csv_one, csv_two): | |
| """Create a clean temp folder and standardize filenames for the notebook.""" | |
| run_dir = Path(tempfile.mkdtemp(prefix="hf_notebook_run_")) | |
| notebook_source = _resolve_uploaded_path(notebook_file) or DEFAULT_NOTEBOOK | |
| if not notebook_source.exists(): | |
| raise FileNotFoundError("No notebook found. Upload one or include analysis_notebook.ipynb.") | |
| notebook_target = run_dir / "analysis_notebook.ipynb" | |
| shutil.copy2(notebook_source, notebook_target) | |
| csv_sources = [ | |
| _resolve_uploaded_path(csv_one) or DEFAULT_CSVS[0], | |
| _resolve_uploaded_path(csv_two) or DEFAULT_CSVS[1], | |
| ] | |
| for source, expected_name in zip(csv_sources, EXPECTED_DATASET_NAMES): | |
| if not Path(source).exists(): | |
| raise FileNotFoundError( | |
| f"Missing dataset: {expected_name}. Upload it or keep the bundled default file." | |
| ) | |
| shutil.copy2(source, run_dir / expected_name) | |
| return run_dir, notebook_target | |
| def _extract_notebook_outputs(executed_notebook): | |
| """Collect text and tables from executed notebook cells.""" | |
| text_chunks = [] | |
| tables = [] | |
| for cell in executed_notebook.cells: | |
| if cell.get("cell_type") != "code": | |
| continue | |
| for output in cell.get("outputs", []): | |
| if output.get("output_type") == "stream": | |
| text_chunks.append(output.get("text", "")) | |
| elif output.get("output_type") in {"execute_result", "display_data"}: | |
| data = output.get("data", {}) | |
| if "text/plain" in data: | |
| text_chunks.append(str(data["text/plain"])) | |
| if "text/html" in data: | |
| try: | |
| tables.append(pd.read_html(io.StringIO(data["text/html"]))[0]) | |
| except Exception: | |
| pass | |
| elif output.get("output_type") == "error": | |
| traceback_text = "\n".join(output.get("traceback", [])) | |
| text_chunks.append(traceback_text) | |
| combined_text = "\n\n".join(chunk.strip() for chunk in text_chunks if str(chunk).strip()) | |
| if not combined_text: | |
| combined_text = "Notebook executed, but no text output was captured." | |
| return combined_text, tables | |
| def _save_output_bundle(run_dir): | |
| """Zip everything created during execution for download.""" | |
| zip_path = run_dir / "execution_outputs.zip" | |
| with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zip_file: | |
| for item in run_dir.rglob("*"): | |
| if item == zip_path: | |
| continue | |
| if item.is_file(): | |
| zip_file.write(item, item.relative_to(run_dir)) | |
| return zip_path | |
| def run_analysis(notebook_file, csv_one, csv_two): | |
| """Run the notebook on the selected files and return UI-friendly outputs.""" | |
| try: | |
| run_dir, notebook_path = _prepare_run_directory(notebook_file, csv_one, csv_two) | |
| with notebook_path.open("r", encoding="utf-8") as notebook_handle: | |
| notebook_node = nbformat.read(notebook_handle, as_version=4) | |
| removed_cells = _remove_runtime_install_cells(notebook_node) | |
| client = NotebookClient( | |
| notebook_node, | |
| timeout=900, | |
| kernel_name="python3", | |
| resources={"metadata": {"path": str(run_dir)}}, | |
| allow_errors=False, | |
| ) | |
| client.execute() | |
| executed_notebook_path = run_dir / "executed_analysis_notebook.ipynb" | |
| with executed_notebook_path.open("w", encoding="utf-8") as notebook_handle: | |
| nbformat.write(notebook_node, notebook_handle) | |
| log_text, tables = _extract_notebook_outputs(notebook_node) | |
| output_zip = _save_output_bundle(run_dir) | |
| first_table = tables[0] if tables else pd.DataFrame({"Info": ["No table output detected."]}) | |
| second_table = tables[1] if len(tables) > 1 else pd.DataFrame({"Info": ["No second table detected."]}) | |
| summary = { | |
| "status": "success", | |
| "removed_install_cells": removed_cells, | |
| "notebook_used": _display_name(notebook_file) or DEFAULT_NOTEBOOK.name, | |
| "dataset_1": _display_name(csv_one) or DEFAULT_CSVS[0].name, | |
| "dataset_2": _display_name(csv_two) or DEFAULT_CSVS[1].name, | |
| "run_directory": str(run_dir), | |
| } | |
| return ( | |
| "✅ Execution finished successfully.", | |
| log_text[:15000], | |
| first_table, | |
| second_table, | |
| str(executed_notebook_path), | |
| str(output_zip), | |
| json.dumps(summary, indent=2), | |
| ) | |
| except CellExecutionError as error: | |
| return ( | |
| "❌ Notebook execution failed.", | |
| str(error), | |
| pd.DataFrame({"Error": ["Notebook cell execution failed. See log above."]}), | |
| pd.DataFrame({"Error": ["No second table because execution stopped early."]}), | |
| None, | |
| None, | |
| json.dumps({"status": "failed", "reason": "CellExecutionError"}, indent=2), | |
| ) | |
| except Exception as error: | |
| return ( | |
| "❌ App error.", | |
| str(error), | |
| pd.DataFrame({"Error": [f"App failed before completion: {error}"]}), | |
| pd.DataFrame({"Error": ["No second table available."]}), | |
| None, | |
| None, | |
| json.dumps({"status": "failed", "reason": str(error)}, indent=2), | |
| ) | |
| CUSTOM_CSS = """ | |
| #run-btn {min-height: 52px; font-size: 18px;} | |
| .gradio-container {max-width: 1200px !important;} | |
| """ | |
| with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # Interactive Notebook Runner | |
| Upload a Jupyter notebook and two CSV files, preview the datasets, then run the notebook directly in the Space. | |
| **Default behavior:** if you leave uploads empty, the app uses the bundled class notebook and bundled CSV files. | |
| """ | |
| ) | |
| with gr.Row(): | |
| notebook_input = gr.File(label="Notebook (.ipynb)", file_types=[".ipynb"]) | |
| csv_one_input = gr.File(label="Dataset 1 (.csv)", file_types=[".csv"]) | |
| csv_two_input = gr.File(label="Dataset 2 (.csv)", file_types=[".csv"]) | |
| with gr.Row(): | |
| preview_button = gr.Button("Refresh previews") | |
| run_button = gr.Button("Run notebook", elem_id="run-btn") | |
| preview_status = gr.Textbox(label="Preview status", interactive=False) | |
| with gr.Tab("Dataset previews"): | |
| with gr.Row(): | |
| preview_table_one = gr.Dataframe(label="Preview: dataset 1", interactive=False) | |
| preview_table_two = gr.Dataframe(label="Preview: dataset 2", interactive=False) | |
| with gr.Tab("Execution results"): | |
| status_box = gr.Textbox(label="Run status", interactive=False) | |
| execution_log = gr.Textbox(label="Execution log", lines=18, interactive=False) | |
| with gr.Row(): | |
| output_table_one = gr.Dataframe(label="Detected output table 1", interactive=False) | |
| output_table_two = gr.Dataframe(label="Detected output table 2", interactive=False) | |
| run_metadata = gr.Code(label="Run metadata", language="json") | |
| with gr.Tab("Downloads"): | |
| executed_notebook_file = gr.File(label="Executed notebook") | |
| execution_zip_file = gr.File(label="ZIP of all outputs") | |
| preview_button.click( | |
| fn=preview_datasets, | |
| inputs=[csv_one_input, csv_two_input], | |
| outputs=[preview_table_one, preview_table_two, preview_status], | |
| ) | |
| run_button.click( | |
| fn=run_analysis, | |
| inputs=[notebook_input, csv_one_input, csv_two_input], | |
| outputs=[ | |
| status_box, | |
| execution_log, | |
| output_table_one, | |
| output_table_two, | |
| executed_notebook_file, | |
| execution_zip_file, | |
| run_metadata, | |
| ], | |
| ) | |
| demo.launch() | |