Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| fix_notebook.py — CLI tool to fix .ipynb files for GitHub rendering. | |
| Usage: | |
| python fix_notebook.py input.ipynb # writes fixed_input.ipynb | |
| python fix_notebook.py input.ipynb -o output.ipynb # custom output name | |
| python fix_notebook.py input.ipynb --strip-widgets # remove widget metadata entirely | |
| python fix_notebook.py input.ipynb --analyze-only # just print the diagnosis | |
| """ | |
| import argparse | |
| import json | |
| import sys | |
| from pathlib import Path | |
| # Reuse the core logic from app.py (copy the functions for standalone use) | |
| MAX_OUTPUT_SIZE = 500_000 | |
| MAX_TOTAL_SIZE = 10_000_000 | |
| MAX_IMAGE_SIZE = 1_000_000 | |
| def _sizeof(obj) -> int: | |
| return len(json.dumps(obj, ensure_ascii=False).encode()) | |
| def analyze_notebook(nb: dict) -> list[dict]: | |
| issues = [] | |
| nbf = nb.get("nbformat") | |
| if nbf is None or (isinstance(nbf, int) and nbf < 4): | |
| issues.append({"severity": "critical", "title": "Missing/invalid nbformat", "location": "root"}) | |
| meta = nb.get("metadata", {}) | |
| widgets = meta.get("widgets") | |
| if widgets is not None: | |
| for key, val in widgets.items(): | |
| if isinstance(val, dict) and "state" not in val: | |
| issues.append({ | |
| "severity": "critical", | |
| "title": f"Missing 'state' in widgets['{key}']", | |
| "location": "metadata.widgets", | |
| }) | |
| if "kernelspec" not in meta: | |
| issues.append({"severity": "warning", "title": "Missing kernelspec", "location": "metadata"}) | |
| for idx, cell in enumerate(nb.get("cells", [])): | |
| for out in cell.get("outputs", []): | |
| if _sizeof(out) > MAX_OUTPUT_SIZE: | |
| issues.append({ | |
| "severity": "warning", | |
| "title": f"Oversized output in cell {idx}", | |
| "location": f"cells[{idx}]", | |
| }) | |
| total = _sizeof(nb) | |
| if total > MAX_TOTAL_SIZE: | |
| issues.append({ | |
| "severity": "critical", | |
| "title": f"Notebook is {total / 1e6:.1f} MB (>10 MB limit)", | |
| "location": "entire file", | |
| }) | |
| return issues | |
| def fix_notebook(nb: dict, strip_widgets=False, strip_large=True) -> dict: | |
| import copy | |
| nb = copy.deepcopy(nb) | |
| if nb.get("nbformat", 0) < 4: | |
| nb["nbformat"] = 4 | |
| nb.setdefault("nbformat_minor", 5) | |
| meta = nb.setdefault("metadata", {}) | |
| widgets = meta.get("widgets") | |
| if widgets is not None: | |
| if strip_widgets: | |
| del meta["widgets"] | |
| else: | |
| for key, val in widgets.items(): | |
| if isinstance(val, dict): | |
| val.setdefault("state", {}) | |
| meta.setdefault("kernelspec", { | |
| "display_name": "Python 3", "language": "python", "name": "python3" | |
| }) | |
| meta.setdefault("language_info", {"name": "python", "version": "3.10.0"}) | |
| for cell in nb.get("cells", []): | |
| if strip_large: | |
| new_outputs = [] | |
| for out in cell.get("outputs", []): | |
| if _sizeof(out) > MAX_OUTPUT_SIZE: | |
| new_outputs.append({ | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": ["[Output removed — too large for GitHub]\n"], | |
| }) | |
| else: | |
| new_outputs.append(out) | |
| cell["outputs"] = new_outputs | |
| cell.setdefault("id", f"cell-{id(cell)}") | |
| return nb | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Fix .ipynb files for GitHub rendering") | |
| parser.add_argument("input", help="Path to the input .ipynb file") | |
| parser.add_argument("-o", "--output", help="Output file path (default: fixed_<input>)") | |
| parser.add_argument("--strip-widgets", action="store_true", help="Remove widget metadata entirely") | |
| parser.add_argument("--keep-large-outputs", action="store_true", help="Don't strip oversized outputs") | |
| parser.add_argument("--analyze-only", action="store_true", help="Only print diagnosis, don't fix") | |
| args = parser.parse_args() | |
| input_path = Path(args.input) | |
| if not input_path.exists(): | |
| print(f"Error: {input_path} not found", file=sys.stderr) | |
| sys.exit(1) | |
| with open(input_path, "r", encoding="utf-8") as f: | |
| nb = json.load(f) | |
| issues = analyze_notebook(nb) | |
| # Print diagnosis | |
| crits = [i for i in issues if i["severity"] == "critical"] | |
| warns = [i for i in issues if i["severity"] == "warning"] | |
| if crits: | |
| print("❌ PREDICTION: Will NOT render on GitHub") | |
| elif warns: | |
| print("⚠️ PREDICTION: Might render with issues") | |
| else: | |
| print("✅ PREDICTION: Should render fine") | |
| if issues: | |
| print(f"\nFound {len(issues)} issue(s):") | |
| for i, iss in enumerate(issues, 1): | |
| icon = {"critical": "🔴", "warning": "🟡"}.get(iss["severity"], "🔵") | |
| print(f" {i}. {icon} {iss['title']} [{iss['location']}]") | |
| else: | |
| print("\nNo issues found!") | |
| if args.analyze_only: | |
| sys.exit(0) | |
| # Fix | |
| fixed = fix_notebook(nb, strip_widgets=args.strip_widgets, strip_large=not args.keep_large_outputs) | |
| out_path = args.output or f"fixed_{input_path.name}" | |
| with open(out_path, "w", encoding="utf-8") as f: | |
| json.dump(fixed, f, ensure_ascii=False, indent=1) | |
| print(f"\n✅ Fixed notebook saved to: {out_path}") | |
| if __name__ == "__main__": | |
| main() | |