Spaces:

Bayhaqy
/

PDF-Manipulation-App

Runtime error

App Files Files Community

Bayhaqy commited on Oct 3, 2023

Commit

de86199

1 Parent(s): a77a76e

Create app.py

Browse files

Files changed (1) hide show

app.py +215 -0

app.py ADDED Viewed

	@@ -0,0 +1,215 @@

+## Import Library
+import tempfile
+from datetime import datetime
+from io import BytesIO
+from pathlib import Path
+from zipfile import ZipFile
+import streamlit as st
+import streamlit_pydantic as sp
+from typing import Optional, List
+from streamlit_pydantic.types import FileContent
+from pydantic import BaseModel, Field
+from PyPDF2 import PdfFileWriter, PdfFileReader
+from pdf2image import convert_from_path
+from PIL import Image
+import os
+# Set page configuration
+st.set_page_config(
+    page_title="PDF Manipulation App",
+    page_icon=":page_with_curl:",
+    layout="wide",
+    initial_sidebar_state="auto",
+)
+# Add a title
+st.title("PDF Manipulation App")
+st.caption("Created by Bayhaqy")
+st.markdown("This is tools for join and split file PDF")
+# Make folder for storing user uploads
+destination_folder = Path("downloads")
+destination_folder.mkdir(exist_ok=True, parents=True)
+# Defines what options are in the form
+class PDFMergeRequest(BaseModel):
+    """
+    This code snippet defines a class called PDFMergeRequest that inherits from BaseModel.
+    It has a property called pdf_uploads which is an optional list of FileContent objects.
+    The Field function is used to specify additional information about this property, such as its default value,
+    alias, and description. In this case, the alias is set to "PDF File to Split" and the description is set
+    to "PDF that needs to be split".
+    """
+    pdf_uploads: Optional[List[FileContent]] = Field(
+        None,
+        alias="PDF File to Split",
+        description="PDF that needs to be split",
+    )
+class PDFSplitRequest(BaseModel):
+    """
+    This code snippet defines a PDFSplitRequest class that inherits from BaseModel.
+    It has two attributes: pages_per_pdf and pdf_upload. pages_per_pdf is an integer field with a default value of 1.
+    pdf_upload is an optional field that can accept a FileContent object or None as its value.
+    Both attributes have aliases and descriptions that provide additional information about their purpose.
+    """
+    pages_per_pdf: int = Field(
+        1,
+        alias="Pages per Split",
+        description="How many pages will be in each output pdf. Should evenly divide the total number of pages.",
+    )
+    pdf_upload: Optional[FileContent] = Field(
+        None,
+        alias="PDF File to Split",
+        description="PDF that needs to be split",
+    )
+def stack_images(images):
+        """
+    Generate a stacked image by vertically stacking a list of input images.
+    Parameters:
+    - images (List[Image]): A list of input images to stack vertically.
+    Returns:
+    - output_image (Image): The output stacked image, with each input image vertically stacked on top of each other.
+    """
+    first_image = images[0]
+    output_image = Image.new("RGB", (first_image.width, sum((image.height for image in images))))
+    output_image.paste(first_image, (0, 0))
+    starting_y_value = first_image.height
+    for image in images[1:]:
+        output_image.paste(image, (0, starting_y_value))
+        starting_y_value += image.height
+    return output_image
+# Radio buttons for selecting the file type
+pdf_output = ".pdf"
+jpg_output = ".jpg"
+png_output = ".png"
+#output_suffix = st.radio("Output File Type", [pdf_output, jpg_output, png_output], key="output_format")
+output_suffix = (pdf_output)
+# Add a heading
+st.markdown("### PDF Manipulation Options")
+# Radio buttons for selecting the function
+view_choice = st.radio("Select a PDF Function", ("Merge Multiple PDFs into One", "Split One PDF into Multiple"))
+# Display relevant instructions
+if view_choice == "Merge Multiple PDFs into One":
+    st.markdown("**Upload multiple PDFs**")
+    # Get the data from the form, stop running if user hasn"t submitted pdfs yet
+    data = sp.pydantic_form(key="pdf_merge_form", model=PDFMergeRequest)
+    if data is None or data.pdf_uploads is None or len(data.pdf_uploads) < 2:
+        st.warning("Upload at least 2 PDFs and press Submit")
+        st.stop()
+    # Save Uploaded PDFs
+    uploaded_paths = []
+    for pdf_data in data.pdf_uploads:
+        input_pdf_path = destination_folder / f"input_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}.pdf"
+        input_pdf_path.write_bytes(pdf_data.as_bytes())
+        uploaded_paths.append(input_pdf_path)
+    pdf_writer = PdfFileWriter()
+    for path in uploaded_paths:
+        pdf_reader = PdfFileReader(str(path))
+        for page in range(pdf_reader.getNumPages()):
+            # Add each page to the writer object
+            pdf_writer.addPage(pdf_reader.getPage(page))
+    # Write out the merged PDF
+    output_pdf_path = destination_folder / f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}.pdf"
+    with open(str(output_pdf_path), "wb") as out:
+        pdf_writer.write(out)
+    output_path = output_pdf_path
+    # Convert to stacked / merged image
+    if output_suffix in (png_output, jpg_output):
+        images = convert_from_path(output_pdf_path)
+        stacked_image = stack_images(images)
+        output_path = destination_folder / f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}{output_suffix}"
+        stacked_image.save(output_path)  # format inferred
+    # Allow download
+    if output_suffix == pdf_output:
+        output_mime = "application/pdf"
+    elif output_suffix == jpg_output:
+        output_mime = "image/jpeg"
+    elif output_suffix == png_output:
+        output_mime = "image/png"
+    # Create a download button with a custom label
+    st.download_button("Download Merged Document", output_path.read_bytes(), f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}{output_suffix}", mime=output_mime)
+    # Delete temporary files
+    for path in uploaded_paths:
+        os.remove(path)
+    if output_suffix in (jpg_output, png_output):
+        os.remove(output_pdf_path)
+    # Delete the output file after download
+    os.remove(output_path)
+elif view_choice == "Split One PDF into Multiple":
+    st.markdown("**Upload a single PDF to split**")
+    # Get the data from the form, stop running if user hasn"t submitted pdf yet
+    data = sp.pydantic_form(key="pdf_split_form", model=PDFSplitRequest)
+    if data is None or data.pdf_upload is None:
+        st.warning("Upload a PDF and press Submit")
+        st.stop()
+    # Save Uploaded PDF
+    input_pdf_path = destination_folder / f"input_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}.pdf"
+    input_pdf_path.write_bytes(data.pdf_upload.as_bytes())
+    # Get PDF Reader
+    pdf = PdfFileReader(BytesIO(input_pdf_path.read_bytes()))
+    if pdf.numPages % data.pages_per_pdf != 0:
+        st.warning(f"Cannot divide pdf with {pdf.numPages} pages into pdfs with {data.pages_per_pdf} pages per")
+        st.stop()
+    # Split pdf every pages per pdf. Save each split pdf to file
+    downloads = []
+    for letter_start in range(0, pdf.numPages, data.pages_per_pdf):
+        output = PdfFileWriter()
+        output_path = input_pdf_path.with_name(f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}.pdf")
+        for letter_page in range(data.pages_per_pdf):
+            output.addPage(pdf.getPage(letter_start + letter_page))
+        with open(output_path, "wb") as f:
+            output.write(f)
+        # Convert to stacked / merged image
+        if output_suffix in (png_output, jpg_output):
+            images = convert_from_path(output_path)
+            stacked_image = stack_images(images)
+            output_path = destination_folder / f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}{output_suffix}"
+            stacked_image.save(output_path)  # format inferred
+        downloads.append(output_path)
+        st.success(f"Saved file {str(output_path)} (original start page {letter_start + 1 } / {pdf.numPages})")
+    # Make zip file of all split pdfs
+    zip_path = destination_folder / f"output_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")}.zip"
+    output_zip = ZipFile(str(zip_path), "w")
+    for download_path in downloads:
+        output_zip.write(str(download_path), arcname=download_path.name)
+    output_zip.close()
+    # Provide download button of the zip of split pdfs
+    st.download_button(f"Download {str(zip_path)}", zip_path.read_bytes(), str(zip_path), mime="application/zip", key=str(zip_path))
+    # Delete temporary files
+    for download_path in downloads:
+        os.remove(download_path)
+    os.remove(zip_path)
+    os.remove(input_pdf_path)
+    # Delete the output file after download
+    os.remove(output_path)