Spaces:

naohiro701
/

High_Resolution_PNG_Generater

Sleeping

App Files Files Community

naohiro701 commited on Dec 25, 2024

Commit

b58ebe5

verified ·

1 Parent(s): e1521b4

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -65

app.py CHANGED Viewed

@@ -1,81 +1,77 @@
 import streamlit as st
-from pdf2image import convert_from_bytes
-from PIL import Image
 import io
 import zipfile
-import sys
-import os
-def convert_pdf_to_images(pdf_byte_data, dpi_value, poppler_path_value):
     """
-    Convert a PDF file in bytes to a list of PIL images using pdf2image.
     Args:
-        pdf_byte_data (bytes): PDF file in memory
-        dpi_value (int): DPI setting for the conversion
-        poppler_path_value (str): poppler path (Windows user only)
     Returns:
-        list: A list of PIL Image objects
     """
-    images_list = convert_from_bytes(
-        pdf_byte_data,
-        dpi=dpi_value,
-        poppler_path=poppler_path_value if poppler_path_value else None
-    )
     return images_list
 def create_zip_from_images(images_list):
     """
-    Create an in-memory ZIP file from a list of PIL images.
-    Each image is saved as a PNG in the ZIP file.
     """
-    zip_buf = io.BytesIO()
-    with zipfile.ZipFile(zip_buf, "w", zipfile.ZIP_DEFLATED) as zf:
-        for idx, single_image in enumerate(images_list):
-            img_buf = io.BytesIO()
-            single_image.save(img_buf, format='PNG')
-            img_buf.seek(0)
-            zf.writestr(f"page_{idx+1}.png", img_buf.read())
-    zip_buf.seek(0)
-    return zip_buf
-# Streamlit App
-st.title("PDFを高画質PNG画像に変換")
-st.write(
     """
-    このアプリケーションでは、アップロードしたPDFファイルを高解像度（例：1000 dpi）のPNG画像に変換します。
-    変換後、各ページの画像をダウンロード可能なZIPファイルとして提供します。
     """
-)
-uploaded_file = st.file_uploader("PDFファイルをアップロードしてください", type=["pdf"])
-if uploaded_file is not None:
-    # DPIの設定
-    dpi = st.slider("解像度（dpi）を選択してください", min_value=100, max_value=3000, value=1000, step=100)
-    # Windows向けのPopplerパス入力 (Mac/LinuxなどPOPPLERがPATHに通っている環境なら不要)
-    poppler_path_input = None
-    if sys.platform.startswith("win"):
-        poppler_path_input = st.text_input(
-            "Popplerのパスを入力してください（Windowsユーザーのみ）",
-            value=""  # 例: "C:\\poppler-xx\\bin"
-        )
-        if poppler_path_input and not os.path.exists(poppler_path_input):
-            st.warning("指定されたPopplerのパスが存在しません。正しいパスを入力してください。")
-    if st.button("変換開始"):
-        try:
-            with st.spinner("PDFを画像に変換中..."):
-                # PDF→画像へ変換
-                images_converted = convert_pdf_to_images(
-                    uploaded_file.read(),
-                    dpi_value=dpi,
-                    poppler_path_value=poppler_path_input
-                )
-                # ZIPファ��ル作成
-                zip_file_bytes = create_zip_from_images(images_converted)
                 st.success("変換が完了しました！")
                 st.download_button(
                     label="ZIPファイルをダウンロード",
@@ -84,10 +80,13 @@ if uploaded_file is not None:
                     mime="application/zip"
                 )
-                # 各ページの画像を表示（オプション）
-                st.write("変換された画像:")
-                for i, image in enumerate(images_converted):
-                    st.image(image, caption=f"ページ {i+1}", use_column_width=True)
-        except Exception as e:
-            st.error(f"変換中にエラーが発生しました: {e}")

 import streamlit as st
+import fitz  # PyMuPDF
 import io
 import zipfile
+from PIL import Image
+def convert_pdf_to_images(pdf_data, dpi_value):
     """
+    Convert each page of a PDF into a list of PIL.Image objects using PyMuPDF.
     Args:
+        pdf_data (bytes): PDF content in bytes
+        dpi_value (int): DPI (resolution) for rasterization
     Returns:
+        list: A list of PIL.Image objects representing each page
     """
+    images_list = []
+    # open PDF from memory
+    with fitz.open(stream=pdf_data, filetype="pdf") as doc:
+        for page_index in range(len(doc)):
+            page = doc.load_page(page_index)
+            # `matrix = fitz.Matrix(scale_x, scale_y)` で拡大率を指定
+            # DPI は (72 * scale_x, 72 * scale_y) になるイメージ
+            scale_val = dpi_value / 72.0  # PyMuPDF は 72dpi 基準
+            matrix = fitz.Matrix(scale_val, scale_val)
+            pix = page.get_pixmap(matrix=matrix)
+            mode = "RGBA" if pix.alpha else "RGB"
+            pil_image = Image.frombytes(mode, [pix.width, pix.height], pix.samples)
+            images_list.append(pil_image)
     return images_list
 def create_zip_from_images(images_list):
     """
+    Create a ZIP file in memory from a list of PIL.Image objects.
+    Args:
+        images_list (list): A list of PIL.Image objects
+    Returns:
+        BytesIO: A BytesIO object representing the ZIP file
     """
+    zip_buffer = io.BytesIO()
+    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf:
+        for idx, img in enumerate(images_list):
+            img_bytes = io.BytesIO()
+            img.save(img_bytes, format="PNG")
+            img_bytes.seek(0)
+            zf.writestr(f"page_{idx+1}.png", img_bytes.read())
+    zip_buffer.seek(0)
+    return zip_buffer
+def main():
     """
+    Streamlit application entry point.
+    Provides a PDF-to-image converter that outputs a ZIP file without using Poppler.
     """
+    st.title("PDFを画像に変換 (PyMuPDF 版)")
+    st.write("""
+        Poppler を使わずに PyMuPDF を用いて PDF を各ページごとに PNG へ変換し、
+        ZIP にまとめてダウンロードできるサンプルアプリです。
+    """)
+    uploaded_file = st.file_uploader("PDFファイルをアップロードしてください", type=["pdf"])
+    if uploaded_file is not None:
+        dpi = st.slider("解像度（dpi）を選択してください", min_value=72, max_value=600, value=144, step=72)
+        if st.button("変換開始"):
+            try:
+                with st.spinner("変換中...しばらくお待ちください"):
+                    images_converted = convert_pdf_to_images(uploaded_file.read(), dpi)
+                    zip_file_bytes = create_zip_from_images(images_converted)
                 st.success("変換が完了しました！")
                 st.download_button(
                     label="ZIPファイルをダウンロード",
                     mime="application/zip"
                 )
+                # Preview images (optional)
+                st.write("変換された画像のプレビュー:")
+                for idx, image_item in enumerate(images_converted):
+                    st.image(image_item, caption=f"ページ {idx+1}", use_column_width=True)
+            except Exception as e:
+                st.error(f"変換中にエラーが発生しました: {str(e)}")
+if __name__ == "__main__":
+    main()