Spaces:
Running
Running
Upload 4 files
Browse files- README.md +36 -7
- app.py +380 -0
- fix_notebook.py +163 -0
- requirements.txt +1 -0
README.md
CHANGED
|
@@ -1,14 +1,43 @@
|
|
| 1 |
---
|
| 2 |
-
title: Notebook
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
-
short_description: Fix .ipynb
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: GitHub Notebook Fixer
|
| 3 |
+
emoji: π§
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: "4.44.0"
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
+
short_description: Fix .ipynb files that show "Invalid Notebook" on GitHub
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# π§ GitHub Notebook Fixer
|
| 15 |
+
|
| 16 |
+
Upload a `.ipynb` notebook and this Space will:
|
| 17 |
+
|
| 18 |
+
1. **Predict** if it will render on GitHub or fail
|
| 19 |
+
2. **Diagnose** every issue (missing widget state, oversized outputs, bad metadata)
|
| 20 |
+
3. **Return a fixed `.ipynb`** ready to push
|
| 21 |
+
|
| 22 |
+
## Why do notebooks break on GitHub?
|
| 23 |
+
|
| 24 |
+
The most common cause is **Google Colab saving widget metadata without the required `state` key**. GitHub's nbconvert renderer requires `metadata.widgets["application/vnd.jupyter.widget-state+json"]["state"]` to exist β if it doesn't, you get the dreaded:
|
| 25 |
+
|
| 26 |
+
> **Invalid Notebook** β the 'state' key is missing from 'metadata.widgets'
|
| 27 |
+
|
| 28 |
+
Other causes include oversized cell outputs (>500 KB), giant base64 images, and missing `kernelspec` metadata.
|
| 29 |
+
|
| 30 |
+
## How to use
|
| 31 |
+
|
| 32 |
+
1. Upload your `.ipynb` file
|
| 33 |
+
2. Click **Analyze & Fix**
|
| 34 |
+
3. Read the diagnosis report
|
| 35 |
+
4. Download the fixed notebook
|
| 36 |
+
5. Push to GitHub β it will render!
|
| 37 |
+
|
| 38 |
+
## Run locally
|
| 39 |
+
|
| 40 |
+
```bash
|
| 41 |
+
pip install gradio
|
| 42 |
+
python app.py
|
| 43 |
+
```
|
app.py
ADDED
|
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GitHub Notebook Fixer β Hugging Face Space
|
| 3 |
+
Fixes .ipynb files so they render correctly on GitHub.
|
| 4 |
+
|
| 5 |
+
Common issues fixed:
|
| 6 |
+
1. Missing 'state' key in metadata.widgets
|
| 7 |
+
2. Oversized cell outputs (GitHub has a ~1MB render limit)
|
| 8 |
+
3. Invalid/missing notebook metadata (kernelspec, language_info)
|
| 9 |
+
4. Large base64-encoded images in outputs
|
| 10 |
+
5. Widget metadata without required 'state' field
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import gradio as gr
|
| 14 |
+
import json
|
| 15 |
+
import copy
|
| 16 |
+
import base64
|
| 17 |
+
import sys
|
| 18 |
+
import os
|
| 19 |
+
import tempfile
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# ---------------------------------------------------------------------------
|
| 24 |
+
# Analysis helpers
|
| 25 |
+
# ---------------------------------------------------------------------------
|
| 26 |
+
|
| 27 |
+
MAX_OUTPUT_SIZE = 500_000 # ~500 KB per cell output is risky for GitHub
|
| 28 |
+
MAX_TOTAL_SIZE = 10_000_000 # ~10 MB total notebook size warning
|
| 29 |
+
MAX_IMAGE_SIZE = 1_000_000 # ~1 MB per embedded image
|
| 30 |
+
|
| 31 |
+
GITHUB_ISSUES = {
|
| 32 |
+
"widget_state_missing": {
|
| 33 |
+
"severity": "critical",
|
| 34 |
+
"title": "Missing 'state' in metadata.widgets",
|
| 35 |
+
"desc": (
|
| 36 |
+
"GitHub requires a 'state' key inside metadata.widgets. "
|
| 37 |
+
"Without it the notebook preview shows 'Invalid Notebook'."
|
| 38 |
+
),
|
| 39 |
+
},
|
| 40 |
+
"widgets_empty_state": {
|
| 41 |
+
"severity": "warning",
|
| 42 |
+
"title": "metadata.widgets exists but 'state' is empty",
|
| 43 |
+
"desc": "The widget state dict is present but empty β harmless but adds noise.",
|
| 44 |
+
},
|
| 45 |
+
"no_kernelspec": {
|
| 46 |
+
"severity": "warning",
|
| 47 |
+
"title": "Missing kernelspec in metadata",
|
| 48 |
+
"desc": "GitHub may not detect the notebook language correctly.",
|
| 49 |
+
},
|
| 50 |
+
"no_language_info": {
|
| 51 |
+
"severity": "info",
|
| 52 |
+
"title": "Missing language_info in metadata",
|
| 53 |
+
"desc": "Minor β GitHub can usually infer the language from kernelspec.",
|
| 54 |
+
},
|
| 55 |
+
"oversized_output": {
|
| 56 |
+
"severity": "warning",
|
| 57 |
+
"title": "Cell output exceeds ~500 KB",
|
| 58 |
+
"desc": "Very large outputs can cause GitHub to skip rendering the notebook.",
|
| 59 |
+
},
|
| 60 |
+
"oversized_notebook": {
|
| 61 |
+
"severity": "critical",
|
| 62 |
+
"title": "Notebook exceeds ~10 MB",
|
| 63 |
+
"desc": "GitHub will refuse to render notebooks over ~10 MB.",
|
| 64 |
+
},
|
| 65 |
+
"large_embedded_image": {
|
| 66 |
+
"severity": "warning",
|
| 67 |
+
"title": "Large base64 image embedded in output",
|
| 68 |
+
"desc": "Images over ~1 MB bloat the notebook and slow GitHub rendering.",
|
| 69 |
+
},
|
| 70 |
+
"invalid_nbformat": {
|
| 71 |
+
"severity": "critical",
|
| 72 |
+
"title": "Missing or invalid nbformat version",
|
| 73 |
+
"desc": "GitHub needs nbformat >= 4 to render the notebook.",
|
| 74 |
+
},
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _sizeof(obj) -> int:
|
| 79 |
+
"""Rough byte size of a JSON-serializable object."""
|
| 80 |
+
return len(json.dumps(obj, ensure_ascii=False).encode())
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def analyze_notebook(nb: dict) -> list[dict]:
|
| 84 |
+
"""Return a list of issue dicts found in the notebook."""
|
| 85 |
+
issues: list[dict] = []
|
| 86 |
+
|
| 87 |
+
# ---- nbformat version ----
|
| 88 |
+
nbf = nb.get("nbformat")
|
| 89 |
+
if nbf is None or (isinstance(nbf, int) and nbf < 4):
|
| 90 |
+
issues.append({**GITHUB_ISSUES["invalid_nbformat"], "location": "root"})
|
| 91 |
+
|
| 92 |
+
# ---- metadata.widgets ----
|
| 93 |
+
meta = nb.get("metadata", {})
|
| 94 |
+
widgets = meta.get("widgets")
|
| 95 |
+
if widgets is not None:
|
| 96 |
+
if "application/vnd.jupyter.widget-state+json" in widgets:
|
| 97 |
+
ws = widgets["application/vnd.jupyter.widget-state+json"]
|
| 98 |
+
if "state" not in ws:
|
| 99 |
+
issues.append({**GITHUB_ISSUES["widget_state_missing"], "location": "metadata.widgets"})
|
| 100 |
+
elif not ws["state"]:
|
| 101 |
+
issues.append({**GITHUB_ISSUES["widgets_empty_state"], "location": "metadata.widgets"})
|
| 102 |
+
else:
|
| 103 |
+
# widgets key exists but no standard widget-state key
|
| 104 |
+
for key, val in widgets.items():
|
| 105 |
+
if isinstance(val, dict) and "state" not in val:
|
| 106 |
+
issues.append({
|
| 107 |
+
**GITHUB_ISSUES["widget_state_missing"],
|
| 108 |
+
"location": f"metadata.widgets['{key}']",
|
| 109 |
+
})
|
| 110 |
+
|
| 111 |
+
# ---- kernelspec / language_info ----
|
| 112 |
+
if "kernelspec" not in meta:
|
| 113 |
+
issues.append({**GITHUB_ISSUES["no_kernelspec"], "location": "metadata"})
|
| 114 |
+
if "language_info" not in meta:
|
| 115 |
+
issues.append({**GITHUB_ISSUES["no_language_info"], "location": "metadata"})
|
| 116 |
+
|
| 117 |
+
# ---- per-cell checks ----
|
| 118 |
+
for idx, cell in enumerate(nb.get("cells", [])):
|
| 119 |
+
for out in cell.get("outputs", []):
|
| 120 |
+
out_size = _sizeof(out)
|
| 121 |
+
if out_size > MAX_OUTPUT_SIZE:
|
| 122 |
+
issues.append({
|
| 123 |
+
**GITHUB_ISSUES["oversized_output"],
|
| 124 |
+
"location": f"cells[{idx}]",
|
| 125 |
+
"detail": f"{out_size / 1_000_000:.2f} MB",
|
| 126 |
+
})
|
| 127 |
+
# check base64 images
|
| 128 |
+
data = out.get("data", {})
|
| 129 |
+
for mime, content in data.items():
|
| 130 |
+
if mime.startswith("image/") and isinstance(content, str):
|
| 131 |
+
try:
|
| 132 |
+
img_bytes = len(base64.b64decode(content, validate=False))
|
| 133 |
+
except Exception:
|
| 134 |
+
img_bytes = len(content)
|
| 135 |
+
if img_bytes > MAX_IMAGE_SIZE:
|
| 136 |
+
issues.append({
|
| 137 |
+
**GITHUB_ISSUES["large_embedded_image"],
|
| 138 |
+
"location": f"cells[{idx}] ({mime})",
|
| 139 |
+
"detail": f"{img_bytes / 1_000_000:.2f} MB",
|
| 140 |
+
})
|
| 141 |
+
|
| 142 |
+
# ---- total size ----
|
| 143 |
+
total = _sizeof(nb)
|
| 144 |
+
if total > MAX_TOTAL_SIZE:
|
| 145 |
+
issues.append({
|
| 146 |
+
**GITHUB_ISSUES["oversized_notebook"],
|
| 147 |
+
"location": "entire file",
|
| 148 |
+
"detail": f"{total / 1_000_000:.2f} MB",
|
| 149 |
+
})
|
| 150 |
+
|
| 151 |
+
return issues
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def predict_github_render(issues: list[dict]) -> str:
|
| 155 |
+
"""Return a human-readable prediction."""
|
| 156 |
+
crits = [i for i in issues if i["severity"] == "critical"]
|
| 157 |
+
warns = [i for i in issues if i["severity"] == "warning"]
|
| 158 |
+
if crits:
|
| 159 |
+
return "β Will NOT render on GitHub"
|
| 160 |
+
if warns:
|
| 161 |
+
return "β οΈ Might render, but with issues"
|
| 162 |
+
return "β
Should render fine on GitHub"
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
# ---------------------------------------------------------------------------
|
| 166 |
+
# Fixer
|
| 167 |
+
# ---------------------------------------------------------------------------
|
| 168 |
+
|
| 169 |
+
def fix_notebook(nb: dict, strip_widgets: bool = False, strip_large_outputs: bool = True) -> dict:
|
| 170 |
+
"""Return a fixed copy of the notebook."""
|
| 171 |
+
nb = copy.deepcopy(nb)
|
| 172 |
+
|
| 173 |
+
# ---- Ensure nbformat ----
|
| 174 |
+
if nb.get("nbformat") is None or nb.get("nbformat") < 4:
|
| 175 |
+
nb["nbformat"] = 4
|
| 176 |
+
nb.setdefault("nbformat_minor", 5)
|
| 177 |
+
|
| 178 |
+
# ---- metadata ----
|
| 179 |
+
meta = nb.setdefault("metadata", {})
|
| 180 |
+
|
| 181 |
+
# Fix widgets
|
| 182 |
+
widgets = meta.get("widgets")
|
| 183 |
+
if widgets is not None:
|
| 184 |
+
if strip_widgets:
|
| 185 |
+
del meta["widgets"]
|
| 186 |
+
else:
|
| 187 |
+
# Add missing 'state' key to every widget-state entry
|
| 188 |
+
for key, val in list(widgets.items()):
|
| 189 |
+
if isinstance(val, dict) and "state" not in val:
|
| 190 |
+
val["state"] = {}
|
| 191 |
+
# Also handle the standard key specifically
|
| 192 |
+
if "application/vnd.jupyter.widget-state+json" in widgets:
|
| 193 |
+
ws = widgets["application/vnd.jupyter.widget-state+json"]
|
| 194 |
+
ws.setdefault("state", {})
|
| 195 |
+
|
| 196 |
+
# Ensure kernelspec
|
| 197 |
+
if "kernelspec" not in meta:
|
| 198 |
+
meta["kernelspec"] = {
|
| 199 |
+
"display_name": "Python 3",
|
| 200 |
+
"language": "python",
|
| 201 |
+
"name": "python3",
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
# Ensure language_info
|
| 205 |
+
if "language_info" not in meta:
|
| 206 |
+
meta["language_info"] = {
|
| 207 |
+
"name": "python",
|
| 208 |
+
"version": "3.10.0",
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
# ---- Per-cell fixes ----
|
| 212 |
+
for cell in nb.get("cells", []):
|
| 213 |
+
new_outputs = []
|
| 214 |
+
for out in cell.get("outputs", []):
|
| 215 |
+
out_size = _sizeof(out)
|
| 216 |
+
|
| 217 |
+
# Strip very large outputs if requested
|
| 218 |
+
if strip_large_outputs and out_size > MAX_OUTPUT_SIZE:
|
| 219 |
+
new_outputs.append({
|
| 220 |
+
"output_type": "stream",
|
| 221 |
+
"name": "stdout",
|
| 222 |
+
"text": ["[Output removed β too large for GitHub rendering]\n"],
|
| 223 |
+
})
|
| 224 |
+
continue
|
| 225 |
+
|
| 226 |
+
# Compress oversized base64 images by keeping a placeholder
|
| 227 |
+
data = out.get("data", {})
|
| 228 |
+
for mime in list(data.keys()):
|
| 229 |
+
if mime.startswith("image/") and isinstance(data[mime], str):
|
| 230 |
+
try:
|
| 231 |
+
img_bytes = len(base64.b64decode(data[mime], validate=False))
|
| 232 |
+
except Exception:
|
| 233 |
+
img_bytes = len(data[mime])
|
| 234 |
+
if strip_large_outputs and img_bytes > MAX_IMAGE_SIZE:
|
| 235 |
+
data[mime] = "" # clear the giant image
|
| 236 |
+
data.setdefault("text/plain", ["[Large image removed for GitHub compatibility]"])
|
| 237 |
+
|
| 238 |
+
new_outputs.append(out)
|
| 239 |
+
cell["outputs"] = new_outputs
|
| 240 |
+
|
| 241 |
+
# Ensure every cell has an 'id' field (nbformat 4.5+)
|
| 242 |
+
cell.setdefault("id", f"cell-{id(cell)}")
|
| 243 |
+
|
| 244 |
+
return nb
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
# ---------------------------------------------------------------------------
|
| 248 |
+
# Report builder
|
| 249 |
+
# ---------------------------------------------------------------------------
|
| 250 |
+
|
| 251 |
+
def build_report(issues: list[dict], prediction: str) -> str:
|
| 252 |
+
lines = [f"## {prediction}\n"]
|
| 253 |
+
|
| 254 |
+
if not issues:
|
| 255 |
+
lines.append("No issues detected β this notebook looks good for GitHub!\n")
|
| 256 |
+
return "\n".join(lines)
|
| 257 |
+
|
| 258 |
+
severity_emoji = {"critical": "π΄", "warning": "π‘", "info": "π΅"}
|
| 259 |
+
|
| 260 |
+
lines.append(f"**Found {len(issues)} issue(s):**\n")
|
| 261 |
+
for i, issue in enumerate(issues, 1):
|
| 262 |
+
emoji = severity_emoji.get(issue["severity"], "βͺ")
|
| 263 |
+
detail = f" β {issue.get('detail', '')}" if "detail" in issue else ""
|
| 264 |
+
lines.append(f"{i}. {emoji} **{issue['title']}**{detail}")
|
| 265 |
+
lines.append(f" *Location:* `{issue['location']}`")
|
| 266 |
+
lines.append(f" {issue['desc']}\n")
|
| 267 |
+
|
| 268 |
+
return "\n".join(lines)
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
# ---------------------------------------------------------------------------
|
| 272 |
+
# Gradio handler
|
| 273 |
+
# ---------------------------------------------------------------------------
|
| 274 |
+
|
| 275 |
+
def process_notebook(file, strip_widgets: bool, strip_large_outputs: bool):
|
| 276 |
+
"""Main handler: analyse β predict β fix β return."""
|
| 277 |
+
if file is None:
|
| 278 |
+
return "Upload a `.ipynb` file first.", None
|
| 279 |
+
|
| 280 |
+
# Read the notebook
|
| 281 |
+
try:
|
| 282 |
+
with open(file.name, "r", encoding="utf-8") as f:
|
| 283 |
+
nb = json.load(f)
|
| 284 |
+
except json.JSONDecodeError:
|
| 285 |
+
return "β The uploaded file is not valid JSON. Are you sure it's a `.ipynb`?", None
|
| 286 |
+
except Exception as e:
|
| 287 |
+
return f"β Could not read file: {e}", None
|
| 288 |
+
|
| 289 |
+
# Analyse
|
| 290 |
+
issues = analyze_notebook(nb)
|
| 291 |
+
prediction = predict_github_render(issues)
|
| 292 |
+
report = build_report(issues, prediction)
|
| 293 |
+
|
| 294 |
+
# Fix
|
| 295 |
+
fixed_nb = fix_notebook(nb, strip_widgets=strip_widgets, strip_large_outputs=strip_large_outputs)
|
| 296 |
+
|
| 297 |
+
# Re-analyse fixed version
|
| 298 |
+
fixed_issues = analyze_notebook(fixed_nb)
|
| 299 |
+
fixed_prediction = predict_github_render(fixed_issues)
|
| 300 |
+
report += "\n---\n"
|
| 301 |
+
report += f"### After fix: {fixed_prediction}\n"
|
| 302 |
+
if fixed_issues:
|
| 303 |
+
remaining = [i for i in fixed_issues if i["severity"] in ("critical", "warning")]
|
| 304 |
+
if remaining:
|
| 305 |
+
report += f"β οΈ {len(remaining)} issue(s) remain (may need manual attention).\n"
|
| 306 |
+
else:
|
| 307 |
+
report += "Only informational notes remain β notebook should render on GitHub.\n"
|
| 308 |
+
else:
|
| 309 |
+
report += "All issues resolved! β
\n"
|
| 310 |
+
|
| 311 |
+
# Write fixed notebook to temp file
|
| 312 |
+
out_path = tempfile.NamedTemporaryFile(
|
| 313 |
+
suffix=".ipynb", delete=False, prefix="fixed_", dir=tempfile.gettempdir()
|
| 314 |
+
)
|
| 315 |
+
with open(out_path.name, "w", encoding="utf-8") as f:
|
| 316 |
+
json.dump(fixed_nb, f, ensure_ascii=False, indent=1)
|
| 317 |
+
|
| 318 |
+
return report, out_path.name
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
# ---------------------------------------------------------------------------
|
| 322 |
+
# UI
|
| 323 |
+
# ---------------------------------------------------------------------------
|
| 324 |
+
|
| 325 |
+
DESCRIPTION = """
|
| 326 |
+
# π§ GitHub Notebook Fixer
|
| 327 |
+
|
| 328 |
+
**Upload a `.ipynb` file** and this tool will:
|
| 329 |
+
|
| 330 |
+
1. **Predict** whether it will render on GitHub
|
| 331 |
+
2. **Diagnose** all issues (missing widget state, oversized outputs, bad metadataβ¦)
|
| 332 |
+
3. **Fix** the problems and return a clean `.ipynb` you can push to GitHub
|
| 333 |
+
|
| 334 |
+
### Common issues fixed
|
| 335 |
+
- `metadata.widgets` missing the `state` key β **"Invalid Notebook"** on GitHub
|
| 336 |
+
- Oversized cell outputs (>500 KB) that block rendering
|
| 337 |
+
- Missing `kernelspec` / `language_info` metadata
|
| 338 |
+
- Giant base64-encoded images bloating the file
|
| 339 |
+
"""
|
| 340 |
+
|
| 341 |
+
with gr.Blocks(
|
| 342 |
+
title="GitHub Notebook Fixer",
|
| 343 |
+
theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"),
|
| 344 |
+
) as demo:
|
| 345 |
+
gr.Markdown(DESCRIPTION)
|
| 346 |
+
|
| 347 |
+
with gr.Row():
|
| 348 |
+
with gr.Column(scale=1):
|
| 349 |
+
file_input = gr.File(
|
| 350 |
+
label="Upload .ipynb file",
|
| 351 |
+
file_types=[".ipynb"],
|
| 352 |
+
type="filepath",
|
| 353 |
+
)
|
| 354 |
+
strip_widgets = gr.Checkbox(
|
| 355 |
+
label="Remove widget metadata entirely (instead of fixing it)",
|
| 356 |
+
value=False,
|
| 357 |
+
)
|
| 358 |
+
strip_large = gr.Checkbox(
|
| 359 |
+
label="Strip oversized outputs (>500 KB per cell)",
|
| 360 |
+
value=True,
|
| 361 |
+
)
|
| 362 |
+
btn = gr.Button("π Analyze & Fix", variant="primary", size="lg")
|
| 363 |
+
|
| 364 |
+
with gr.Column(scale=2):
|
| 365 |
+
report_output = gr.Markdown(label="Diagnosis Report")
|
| 366 |
+
file_output = gr.File(label="Download Fixed Notebook")
|
| 367 |
+
|
| 368 |
+
btn.click(
|
| 369 |
+
fn=process_notebook,
|
| 370 |
+
inputs=[file_input, strip_widgets, strip_large],
|
| 371 |
+
outputs=[report_output, file_output],
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
gr.Markdown(
|
| 375 |
+
"---\n*Built to solve the classic GitHub "Invalid Notebook" error. "
|
| 376 |
+
"Works for Colab, Jupyter, and any nbformat-4 notebook.*"
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
if __name__ == "__main__":
|
| 380 |
+
demo.launch()
|
fix_notebook.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
fix_notebook.py β CLI tool to fix .ipynb files for GitHub rendering.
|
| 4 |
+
|
| 5 |
+
Usage:
|
| 6 |
+
python fix_notebook.py input.ipynb # writes fixed_input.ipynb
|
| 7 |
+
python fix_notebook.py input.ipynb -o output.ipynb # custom output name
|
| 8 |
+
python fix_notebook.py input.ipynb --strip-widgets # remove widget metadata entirely
|
| 9 |
+
python fix_notebook.py input.ipynb --analyze-only # just print the diagnosis
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import argparse
|
| 13 |
+
import json
|
| 14 |
+
import sys
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
|
| 17 |
+
# Reuse the core logic from app.py (copy the functions for standalone use)
|
| 18 |
+
|
| 19 |
+
MAX_OUTPUT_SIZE = 500_000
|
| 20 |
+
MAX_TOTAL_SIZE = 10_000_000
|
| 21 |
+
MAX_IMAGE_SIZE = 1_000_000
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _sizeof(obj) -> int:
|
| 25 |
+
return len(json.dumps(obj, ensure_ascii=False).encode())
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def analyze_notebook(nb: dict) -> list[dict]:
|
| 29 |
+
issues = []
|
| 30 |
+
|
| 31 |
+
nbf = nb.get("nbformat")
|
| 32 |
+
if nbf is None or (isinstance(nbf, int) and nbf < 4):
|
| 33 |
+
issues.append({"severity": "critical", "title": "Missing/invalid nbformat", "location": "root"})
|
| 34 |
+
|
| 35 |
+
meta = nb.get("metadata", {})
|
| 36 |
+
widgets = meta.get("widgets")
|
| 37 |
+
if widgets is not None:
|
| 38 |
+
for key, val in widgets.items():
|
| 39 |
+
if isinstance(val, dict) and "state" not in val:
|
| 40 |
+
issues.append({
|
| 41 |
+
"severity": "critical",
|
| 42 |
+
"title": f"Missing 'state' in widgets['{key}']",
|
| 43 |
+
"location": "metadata.widgets",
|
| 44 |
+
})
|
| 45 |
+
|
| 46 |
+
if "kernelspec" not in meta:
|
| 47 |
+
issues.append({"severity": "warning", "title": "Missing kernelspec", "location": "metadata"})
|
| 48 |
+
|
| 49 |
+
for idx, cell in enumerate(nb.get("cells", [])):
|
| 50 |
+
for out in cell.get("outputs", []):
|
| 51 |
+
if _sizeof(out) > MAX_OUTPUT_SIZE:
|
| 52 |
+
issues.append({
|
| 53 |
+
"severity": "warning",
|
| 54 |
+
"title": f"Oversized output in cell {idx}",
|
| 55 |
+
"location": f"cells[{idx}]",
|
| 56 |
+
})
|
| 57 |
+
|
| 58 |
+
total = _sizeof(nb)
|
| 59 |
+
if total > MAX_TOTAL_SIZE:
|
| 60 |
+
issues.append({
|
| 61 |
+
"severity": "critical",
|
| 62 |
+
"title": f"Notebook is {total / 1e6:.1f} MB (>10 MB limit)",
|
| 63 |
+
"location": "entire file",
|
| 64 |
+
})
|
| 65 |
+
|
| 66 |
+
return issues
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def fix_notebook(nb: dict, strip_widgets=False, strip_large=True) -> dict:
|
| 70 |
+
import copy
|
| 71 |
+
nb = copy.deepcopy(nb)
|
| 72 |
+
|
| 73 |
+
if nb.get("nbformat", 0) < 4:
|
| 74 |
+
nb["nbformat"] = 4
|
| 75 |
+
nb.setdefault("nbformat_minor", 5)
|
| 76 |
+
|
| 77 |
+
meta = nb.setdefault("metadata", {})
|
| 78 |
+
|
| 79 |
+
widgets = meta.get("widgets")
|
| 80 |
+
if widgets is not None:
|
| 81 |
+
if strip_widgets:
|
| 82 |
+
del meta["widgets"]
|
| 83 |
+
else:
|
| 84 |
+
for key, val in widgets.items():
|
| 85 |
+
if isinstance(val, dict):
|
| 86 |
+
val.setdefault("state", {})
|
| 87 |
+
|
| 88 |
+
meta.setdefault("kernelspec", {
|
| 89 |
+
"display_name": "Python 3", "language": "python", "name": "python3"
|
| 90 |
+
})
|
| 91 |
+
meta.setdefault("language_info", {"name": "python", "version": "3.10.0"})
|
| 92 |
+
|
| 93 |
+
for cell in nb.get("cells", []):
|
| 94 |
+
if strip_large:
|
| 95 |
+
new_outputs = []
|
| 96 |
+
for out in cell.get("outputs", []):
|
| 97 |
+
if _sizeof(out) > MAX_OUTPUT_SIZE:
|
| 98 |
+
new_outputs.append({
|
| 99 |
+
"output_type": "stream",
|
| 100 |
+
"name": "stdout",
|
| 101 |
+
"text": ["[Output removed β too large for GitHub]\n"],
|
| 102 |
+
})
|
| 103 |
+
else:
|
| 104 |
+
new_outputs.append(out)
|
| 105 |
+
cell["outputs"] = new_outputs
|
| 106 |
+
cell.setdefault("id", f"cell-{id(cell)}")
|
| 107 |
+
|
| 108 |
+
return nb
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def main():
|
| 112 |
+
parser = argparse.ArgumentParser(description="Fix .ipynb files for GitHub rendering")
|
| 113 |
+
parser.add_argument("input", help="Path to the input .ipynb file")
|
| 114 |
+
parser.add_argument("-o", "--output", help="Output file path (default: fixed_<input>)")
|
| 115 |
+
parser.add_argument("--strip-widgets", action="store_true", help="Remove widget metadata entirely")
|
| 116 |
+
parser.add_argument("--keep-large-outputs", action="store_true", help="Don't strip oversized outputs")
|
| 117 |
+
parser.add_argument("--analyze-only", action="store_true", help="Only print diagnosis, don't fix")
|
| 118 |
+
args = parser.parse_args()
|
| 119 |
+
|
| 120 |
+
input_path = Path(args.input)
|
| 121 |
+
if not input_path.exists():
|
| 122 |
+
print(f"Error: {input_path} not found", file=sys.stderr)
|
| 123 |
+
sys.exit(1)
|
| 124 |
+
|
| 125 |
+
with open(input_path, "r", encoding="utf-8") as f:
|
| 126 |
+
nb = json.load(f)
|
| 127 |
+
|
| 128 |
+
issues = analyze_notebook(nb)
|
| 129 |
+
|
| 130 |
+
# Print diagnosis
|
| 131 |
+
crits = [i for i in issues if i["severity"] == "critical"]
|
| 132 |
+
warns = [i for i in issues if i["severity"] == "warning"]
|
| 133 |
+
|
| 134 |
+
if crits:
|
| 135 |
+
print("β PREDICTION: Will NOT render on GitHub")
|
| 136 |
+
elif warns:
|
| 137 |
+
print("β οΈ PREDICTION: Might render with issues")
|
| 138 |
+
else:
|
| 139 |
+
print("β
PREDICTION: Should render fine")
|
| 140 |
+
|
| 141 |
+
if issues:
|
| 142 |
+
print(f"\nFound {len(issues)} issue(s):")
|
| 143 |
+
for i, iss in enumerate(issues, 1):
|
| 144 |
+
icon = {"critical": "π΄", "warning": "π‘"}.get(iss["severity"], "π΅")
|
| 145 |
+
print(f" {i}. {icon} {iss['title']} [{iss['location']}]")
|
| 146 |
+
else:
|
| 147 |
+
print("\nNo issues found!")
|
| 148 |
+
|
| 149 |
+
if args.analyze_only:
|
| 150 |
+
sys.exit(0)
|
| 151 |
+
|
| 152 |
+
# Fix
|
| 153 |
+
fixed = fix_notebook(nb, strip_widgets=args.strip_widgets, strip_large=not args.keep_large_outputs)
|
| 154 |
+
|
| 155 |
+
out_path = args.output or f"fixed_{input_path.name}"
|
| 156 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 157 |
+
json.dump(fixed, f, ensure_ascii=False, indent=1)
|
| 158 |
+
|
| 159 |
+
print(f"\nβ
Fixed notebook saved to: {out_path}")
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
if __name__ == "__main__":
|
| 163 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|