File size: 3,542 Bytes
b9c5c0f b58ebe5 24ddf3c b58ebe5 5ceeeaa b58ebe5 0f9d1ba b58ebe5 0f9d1ba b58ebe5 0f9d1ba b58ebe5 0f9d1ba b58ebe5 0f9d1ba b58ebe5 0f9d1ba b58ebe5 0f9d1ba b58ebe5 0f9d1ba b58ebe5 0f9d1ba b58ebe5 cb54174 b58ebe5 fd6e006 b58ebe5 b7eeea9 423e0a5 b58ebe5 514f14e 0f9d1ba 514f14e b58ebe5 514f14e b58ebe5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import streamlit as st
import fitz # PyMuPDF
import io
import zipfile
from PIL import Image
def convert_pdf_to_images(pdf_data, dpi_value):
"""
Convert each page of a PDF into a list of PIL.Image objects using PyMuPDF.
Args:
pdf_data (bytes): PDF content in bytes
dpi_value (int): DPI (resolution) for rasterization
Returns:
list: A list of PIL.Image objects representing each page
"""
images_list = []
# open PDF from memory
with fitz.open(stream=pdf_data, filetype="pdf") as doc:
for page_index in range(len(doc)):
page = doc.load_page(page_index)
# `matrix = fitz.Matrix(scale_x, scale_y)` で拡大率を指定
# DPI は (72 * scale_x, 72 * scale_y) になるイメージ
scale_val = dpi_value / 72.0 # PyMuPDF は 72dpi 基準
matrix = fitz.Matrix(scale_val, scale_val)
pix = page.get_pixmap(matrix=matrix)
mode = "RGBA" if pix.alpha else "RGB"
pil_image = Image.frombytes(mode, [pix.width, pix.height], pix.samples)
images_list.append(pil_image)
return images_list
def create_zip_from_images(images_list):
"""
Create a ZIP file in memory from a list of PIL.Image objects.
Args:
images_list (list): A list of PIL.Image objects
Returns:
BytesIO: A BytesIO object representing the ZIP file
"""
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf:
for idx, img in enumerate(images_list):
img_bytes = io.BytesIO()
img.save(img_bytes, format="PNG")
img_bytes.seek(0)
zf.writestr(f"page_{idx+1}.png", img_bytes.read())
zip_buffer.seek(0)
return zip_buffer
def main():
"""
Streamlit application entry point.
Provides a PDF-to-image converter that outputs a ZIP file without using Poppler.
"""
st.title("PDFを画像に変換 (PyMuPDF 版)")
st.write("""
Poppler を使わずに PyMuPDF を用いて PDF を各ページごとに PNG へ変換し、
ZIP にまとめてダウンロードできるサンプルアプリです。
""")
uploaded_file = st.file_uploader("PDFファイルをアップロードしてください", type=["pdf"])
if uploaded_file is not None:
dpi = st.slider("解像度(dpi)を選択してください", min_value=10, max_value=6000, value=300, step=10)
if st.button("変換開始"):
try:
with st.spinner("変換中...しばらくお待ちください"):
images_converted = convert_pdf_to_images(uploaded_file.read(), dpi)
zip_file_bytes = create_zip_from_images(images_converted)
st.success("変換が完了しました!")
st.download_button(
label="ZIPファイルをダウンロード",
data=zip_file_bytes,
file_name="pdf_images.zip",
mime="application/zip"
)
# Preview images (optional)
st.write("変換された画像のプレビュー:")
for idx, image_item in enumerate(images_converted):
st.image(image_item, caption=f"ページ {idx+1}", use_column_width=True)
except Exception as e:
st.error(f"変換中にエラーが発生しました: {str(e)}")
if __name__ == "__main__":
main()
|