naohiro701's picture
Update app.py
b7eeea9 verified
import streamlit as st
import fitz # PyMuPDF
import io
import zipfile
from PIL import Image
def convert_pdf_to_images(pdf_data, dpi_value):
"""
Convert each page of a PDF into a list of PIL.Image objects using PyMuPDF.
Args:
pdf_data (bytes): PDF content in bytes
dpi_value (int): DPI (resolution) for rasterization
Returns:
list: A list of PIL.Image objects representing each page
"""
images_list = []
# open PDF from memory
with fitz.open(stream=pdf_data, filetype="pdf") as doc:
for page_index in range(len(doc)):
page = doc.load_page(page_index)
# `matrix = fitz.Matrix(scale_x, scale_y)` で拡大率を指定
# DPI は (72 * scale_x, 72 * scale_y) になるイメージ
scale_val = dpi_value / 72.0 # PyMuPDF は 72dpi 基準
matrix = fitz.Matrix(scale_val, scale_val)
pix = page.get_pixmap(matrix=matrix)
mode = "RGBA" if pix.alpha else "RGB"
pil_image = Image.frombytes(mode, [pix.width, pix.height], pix.samples)
images_list.append(pil_image)
return images_list
def create_zip_from_images(images_list):
"""
Create a ZIP file in memory from a list of PIL.Image objects.
Args:
images_list (list): A list of PIL.Image objects
Returns:
BytesIO: A BytesIO object representing the ZIP file
"""
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf:
for idx, img in enumerate(images_list):
img_bytes = io.BytesIO()
img.save(img_bytes, format="PNG")
img_bytes.seek(0)
zf.writestr(f"page_{idx+1}.png", img_bytes.read())
zip_buffer.seek(0)
return zip_buffer
def main():
"""
Streamlit application entry point.
Provides a PDF-to-image converter that outputs a ZIP file without using Poppler.
"""
st.title("PDFを画像に変換 (PyMuPDF 版)")
st.write("""
Poppler を使わずに PyMuPDF を用いて PDF を各ページごとに PNG へ変換し、
ZIP にまとめてダウンロードできるサンプルアプリです。
""")
uploaded_file = st.file_uploader("PDFファイルをアップロードしてください", type=["pdf"])
if uploaded_file is not None:
dpi = st.slider("解像度(dpi)を選択してください", min_value=10, max_value=6000, value=300, step=10)
if st.button("変換開始"):
try:
with st.spinner("変換中...しばらくお待ちください"):
images_converted = convert_pdf_to_images(uploaded_file.read(), dpi)
zip_file_bytes = create_zip_from_images(images_converted)
st.success("変換が完了しました!")
st.download_button(
label="ZIPファイルをダウンロード",
data=zip_file_bytes,
file_name="pdf_images.zip",
mime="application/zip"
)
# Preview images (optional)
st.write("変換された画像のプレビュー:")
for idx, image_item in enumerate(images_converted):
st.image(image_item, caption=f"ページ {idx+1}", use_column_width=True)
except Exception as e:
st.error(f"変換中にエラーが発生しました: {str(e)}")
if __name__ == "__main__":
main()