File size: 3,354 Bytes
7165154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python3
"""Extract all images from a Jupyter notebook."""

import json
import base64
import os
from pathlib import Path
import sys

def extract_images_from_notebook(notebook_path, output_dir):
    """Extract all images from a Jupyter notebook.
    
    Args:
        notebook_path: Path to the .ipynb file
        output_dir: Directory to save extracted images
    """
    # Create output directory
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Load notebook
    with open(notebook_path, 'r') as f:
        notebook = json.load(f)
    
    image_count = 0
    
    # Iterate through cells
    for cell_idx, cell in enumerate(notebook['cells']):
        if 'outputs' in cell:
            for output_idx, output in enumerate(cell['outputs']):
                # Check for image data in different formats
                if 'data' in output:
                    data = output['data']
                    
                    # PNG images
                    if 'image/png' in data:
                        image_count += 1
                        image_data = data['image/png']
                        # Decode base64
                        image_bytes = base64.b64decode(image_data)
                        # Save image
                        filename = f"cell_{cell_idx+1}_output_{output_idx+1}_fig_{image_count}.png"
                        filepath = output_dir / filename
                        with open(filepath, 'wb') as img_file:
                            img_file.write(image_bytes)
                        print(f"Saved: {filename}")
                    
                    # JPEG images
                    elif 'image/jpeg' in data:
                        image_count += 1
                        image_data = data['image/jpeg']
                        # Decode base64
                        image_bytes = base64.b64decode(image_data)
                        # Save image
                        filename = f"cell_{cell_idx+1}_output_{output_idx+1}_fig_{image_count}.jpg"
                        filepath = output_dir / filename
                        with open(filepath, 'wb') as img_file:
                            img_file.write(image_bytes)
                        print(f"Saved: {filename}")
                    
                    # SVG images
                    elif 'image/svg+xml' in data:
                        image_count += 1
                        svg_data = data['image/svg+xml']
                        # SVG is usually not base64 encoded
                        if isinstance(svg_data, list):
                            svg_data = ''.join(svg_data)
                        filename = f"cell_{cell_idx+1}_output_{output_idx+1}_fig_{image_count}.svg"
                        filepath = output_dir / filename
                        with open(filepath, 'w') as img_file:
                            img_file.write(svg_data)
                        print(f"Saved: {filename}")
    
    print(f"\nTotal images extracted: {image_count}")
    return image_count

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: python extract_notebook_images.py <notebook.ipynb> <output_dir>")
        sys.exit(1)
    
    notebook_path = sys.argv[1]
    output_dir = sys.argv[2]
    
    extract_images_from_notebook(notebook_path, output_dir)