File size: 3,542 Bytes
b9c5c0f
b58ebe5
24ddf3c
 
b58ebe5
5ceeeaa
b58ebe5
0f9d1ba
b58ebe5
 
0f9d1ba
b58ebe5
 
 
0f9d1ba
b58ebe5
0f9d1ba
b58ebe5
 
 
 
 
 
 
 
 
 
 
 
 
0f9d1ba
 
 
 
b58ebe5
 
 
 
 
 
 
0f9d1ba
b58ebe5
 
 
 
 
 
 
 
 
0f9d1ba
b58ebe5
0f9d1ba
b58ebe5
 
0f9d1ba
b58ebe5
 
 
 
 
cb54174
b58ebe5
fd6e006
b58ebe5
b7eeea9
423e0a5
b58ebe5
 
 
 
 
 
514f14e
 
 
0f9d1ba
514f14e
 
 
 
b58ebe5
 
 
 
 
 
 
514f14e
b58ebe5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import streamlit as st
import fitz  # PyMuPDF
import io
import zipfile
from PIL import Image

def convert_pdf_to_images(pdf_data, dpi_value):
    """
    Convert each page of a PDF into a list of PIL.Image objects using PyMuPDF.

    Args:
        pdf_data (bytes): PDF content in bytes
        dpi_value (int): DPI (resolution) for rasterization

    Returns:
        list: A list of PIL.Image objects representing each page
    """
    images_list = []
    # open PDF from memory
    with fitz.open(stream=pdf_data, filetype="pdf") as doc:
        for page_index in range(len(doc)):
            page = doc.load_page(page_index)
            # `matrix = fitz.Matrix(scale_x, scale_y)` で拡大率を指定
            # DPI は (72 * scale_x, 72 * scale_y) になるイメージ
            scale_val = dpi_value / 72.0  # PyMuPDF は 72dpi 基準
            matrix = fitz.Matrix(scale_val, scale_val)
            pix = page.get_pixmap(matrix=matrix)
            mode = "RGBA" if pix.alpha else "RGB"
            pil_image = Image.frombytes(mode, [pix.width, pix.height], pix.samples)
            images_list.append(pil_image)
    return images_list

def create_zip_from_images(images_list):
    """
    Create a ZIP file in memory from a list of PIL.Image objects.

    Args:
        images_list (list): A list of PIL.Image objects

    Returns:
        BytesIO: A BytesIO object representing the ZIP file
    """
    zip_buffer = io.BytesIO()
    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf:
        for idx, img in enumerate(images_list):
            img_bytes = io.BytesIO()
            img.save(img_bytes, format="PNG")
            img_bytes.seek(0)
            zf.writestr(f"page_{idx+1}.png", img_bytes.read())
    zip_buffer.seek(0)
    return zip_buffer

def main():
    """
    Streamlit application entry point.
    Provides a PDF-to-image converter that outputs a ZIP file without using Poppler.
    """
    st.title("PDFを画像に変換 (PyMuPDF 版)")
    st.write("""
        Poppler を使わずに PyMuPDF を用いて PDF を各ページごとに PNG へ変換し、  
        ZIP にまとめてダウンロードできるサンプルアプリです。
    """)

    uploaded_file = st.file_uploader("PDFファイルをアップロードしてください", type=["pdf"])

    if uploaded_file is not None:
        dpi = st.slider("解像度(dpi)を選択してください", min_value=10, max_value=6000, value=300, step=10)

        if st.button("変換開始"):
            try:
                with st.spinner("変換中...しばらくお待ちください"):
                    images_converted = convert_pdf_to_images(uploaded_file.read(), dpi)
                    zip_file_bytes = create_zip_from_images(images_converted)
                
                st.success("変換が完了しました!")
                st.download_button(
                    label="ZIPファイルをダウンロード",
                    data=zip_file_bytes,
                    file_name="pdf_images.zip",
                    mime="application/zip"
                )

                # Preview images (optional)
                st.write("変換された画像のプレビュー:")
                for idx, image_item in enumerate(images_converted):
                    st.image(image_item, caption=f"ページ {idx+1}", use_column_width=True)

            except Exception as e:
                st.error(f"変換中にエラーが発生しました: {str(e)}")

if __name__ == "__main__":
    main()