import streamlit as st import fitz # PyMuPDF import io import zipfile from PIL import Image def convert_pdf_to_images(pdf_data, dpi_value): """ Convert each page of a PDF into a list of PIL.Image objects using PyMuPDF. Args: pdf_data (bytes): PDF content in bytes dpi_value (int): DPI (resolution) for rasterization Returns: list: A list of PIL.Image objects representing each page """ images_list = [] # open PDF from memory with fitz.open(stream=pdf_data, filetype="pdf") as doc: for page_index in range(len(doc)): page = doc.load_page(page_index) # `matrix = fitz.Matrix(scale_x, scale_y)` で拡大率を指定 # DPI は (72 * scale_x, 72 * scale_y) になるイメージ scale_val = dpi_value / 72.0 # PyMuPDF は 72dpi 基準 matrix = fitz.Matrix(scale_val, scale_val) pix = page.get_pixmap(matrix=matrix) mode = "RGBA" if pix.alpha else "RGB" pil_image = Image.frombytes(mode, [pix.width, pix.height], pix.samples) images_list.append(pil_image) return images_list def create_zip_from_images(images_list): """ Create a ZIP file in memory from a list of PIL.Image objects. Args: images_list (list): A list of PIL.Image objects Returns: BytesIO: A BytesIO object representing the ZIP file """ zip_buffer = io.BytesIO() with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf: for idx, img in enumerate(images_list): img_bytes = io.BytesIO() img.save(img_bytes, format="PNG") img_bytes.seek(0) zf.writestr(f"page_{idx+1}.png", img_bytes.read()) zip_buffer.seek(0) return zip_buffer def main(): """ Streamlit application entry point. Provides a PDF-to-image converter that outputs a ZIP file without using Poppler. """ st.title("PDFを画像に変換 (PyMuPDF 版)") st.write(""" Poppler を使わずに PyMuPDF を用いて PDF を各ページごとに PNG へ変換し、 ZIP にまとめてダウンロードできるサンプルアプリです。 """) uploaded_file = st.file_uploader("PDFファイルをアップロードしてください", type=["pdf"]) if uploaded_file is not None: dpi = st.slider("解像度(dpi)を選択してください", min_value=10, max_value=6000, value=300, step=10) if st.button("変換開始"): try: with st.spinner("変換中...しばらくお待ちください"): images_converted = convert_pdf_to_images(uploaded_file.read(), dpi) zip_file_bytes = create_zip_from_images(images_converted) st.success("変換が完了しました!") st.download_button( label="ZIPファイルをダウンロード", data=zip_file_bytes, file_name="pdf_images.zip", mime="application/zip" ) # Preview images (optional) st.write("変換された画像のプレビュー:") for idx, image_item in enumerate(images_converted): st.image(image_item, caption=f"ページ {idx+1}", use_column_width=True) except Exception as e: st.error(f"変換中にエラーが発生しました: {str(e)}") if __name__ == "__main__": main()