Notebook-github-fixer / fix_notebook.py
priyadip's picture
Upload 4 files
96ba38e verified
#!/usr/bin/env python3
"""
fix_notebook.py — CLI tool to fix .ipynb files for GitHub rendering.
Usage:
python fix_notebook.py input.ipynb # writes fixed_input.ipynb
python fix_notebook.py input.ipynb -o output.ipynb # custom output name
python fix_notebook.py input.ipynb --strip-widgets # remove widget metadata entirely
python fix_notebook.py input.ipynb --analyze-only # just print the diagnosis
"""
import argparse
import json
import sys
from pathlib import Path
# Reuse the core logic from app.py (copy the functions for standalone use)
MAX_OUTPUT_SIZE = 500_000
MAX_TOTAL_SIZE = 10_000_000
MAX_IMAGE_SIZE = 1_000_000
def _sizeof(obj) -> int:
return len(json.dumps(obj, ensure_ascii=False).encode())
def analyze_notebook(nb: dict) -> list[dict]:
issues = []
nbf = nb.get("nbformat")
if nbf is None or (isinstance(nbf, int) and nbf < 4):
issues.append({"severity": "critical", "title": "Missing/invalid nbformat", "location": "root"})
meta = nb.get("metadata", {})
widgets = meta.get("widgets")
if widgets is not None:
for key, val in widgets.items():
if isinstance(val, dict) and "state" not in val:
issues.append({
"severity": "critical",
"title": f"Missing 'state' in widgets['{key}']",
"location": "metadata.widgets",
})
if "kernelspec" not in meta:
issues.append({"severity": "warning", "title": "Missing kernelspec", "location": "metadata"})
for idx, cell in enumerate(nb.get("cells", [])):
for out in cell.get("outputs", []):
if _sizeof(out) > MAX_OUTPUT_SIZE:
issues.append({
"severity": "warning",
"title": f"Oversized output in cell {idx}",
"location": f"cells[{idx}]",
})
total = _sizeof(nb)
if total > MAX_TOTAL_SIZE:
issues.append({
"severity": "critical",
"title": f"Notebook is {total / 1e6:.1f} MB (>10 MB limit)",
"location": "entire file",
})
return issues
def fix_notebook(nb: dict, strip_widgets=False, strip_large=True) -> dict:
import copy
nb = copy.deepcopy(nb)
if nb.get("nbformat", 0) < 4:
nb["nbformat"] = 4
nb.setdefault("nbformat_minor", 5)
meta = nb.setdefault("metadata", {})
widgets = meta.get("widgets")
if widgets is not None:
if strip_widgets:
del meta["widgets"]
else:
for key, val in widgets.items():
if isinstance(val, dict):
val.setdefault("state", {})
meta.setdefault("kernelspec", {
"display_name": "Python 3", "language": "python", "name": "python3"
})
meta.setdefault("language_info", {"name": "python", "version": "3.10.0"})
for cell in nb.get("cells", []):
if strip_large:
new_outputs = []
for out in cell.get("outputs", []):
if _sizeof(out) > MAX_OUTPUT_SIZE:
new_outputs.append({
"output_type": "stream",
"name": "stdout",
"text": ["[Output removed — too large for GitHub]\n"],
})
else:
new_outputs.append(out)
cell["outputs"] = new_outputs
cell.setdefault("id", f"cell-{id(cell)}")
return nb
def main():
parser = argparse.ArgumentParser(description="Fix .ipynb files for GitHub rendering")
parser.add_argument("input", help="Path to the input .ipynb file")
parser.add_argument("-o", "--output", help="Output file path (default: fixed_<input>)")
parser.add_argument("--strip-widgets", action="store_true", help="Remove widget metadata entirely")
parser.add_argument("--keep-large-outputs", action="store_true", help="Don't strip oversized outputs")
parser.add_argument("--analyze-only", action="store_true", help="Only print diagnosis, don't fix")
args = parser.parse_args()
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: {input_path} not found", file=sys.stderr)
sys.exit(1)
with open(input_path, "r", encoding="utf-8") as f:
nb = json.load(f)
issues = analyze_notebook(nb)
# Print diagnosis
crits = [i for i in issues if i["severity"] == "critical"]
warns = [i for i in issues if i["severity"] == "warning"]
if crits:
print("❌ PREDICTION: Will NOT render on GitHub")
elif warns:
print("⚠️ PREDICTION: Might render with issues")
else:
print("✅ PREDICTION: Should render fine")
if issues:
print(f"\nFound {len(issues)} issue(s):")
for i, iss in enumerate(issues, 1):
icon = {"critical": "🔴", "warning": "🟡"}.get(iss["severity"], "🔵")
print(f" {i}. {icon} {iss['title']} [{iss['location']}]")
else:
print("\nNo issues found!")
if args.analyze_only:
sys.exit(0)
# Fix
fixed = fix_notebook(nb, strip_widgets=args.strip_widgets, strip_large=not args.keep_large_outputs)
out_path = args.output or f"fixed_{input_path.name}"
with open(out_path, "w", encoding="utf-8") as f:
json.dump(fixed, f, ensure_ascii=False, indent=1)
print(f"\n✅ Fixed notebook saved to: {out_path}")
if __name__ == "__main__":
main()