Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from PyPDF2 import PdfReader, PdfWriter, PageObject | |
| from PIL import Image | |
| import tempfile | |
| import os | |
| import atexit | |
| import zipfile | |
| from pdf2docx import Converter | |
| def merge_pdfs(pdf_files, order, start_on_odd=False): | |
| pdf_writer = PdfWriter() | |
| # Sort the PDF files based on the specified order, skipping files marked with '0' | |
| sorted_pdfs = [pdf_files[i-1] for i in order if i != 0] | |
| # Define default page size (A4) | |
| default_width = 595.276 # 8.27 inches | |
| default_height = 841.890 # 11.69 inches | |
| # Read and add each PDF file to the writer in the specified order | |
| for i, pdf in enumerate(sorted_pdfs): | |
| pdf_reader = PdfReader(pdf.name) | |
| # If start_on_odd is True and it's not the first PDF and the current total page count is odd, add a blank page | |
| if start_on_odd and i > 0 and len(pdf_writer.pages) % 2 != 0: | |
| blank_page = PageObject.create_blank_page(width=default_width, height=default_height) | |
| pdf_writer.add_page(blank_page) | |
| for page in pdf_reader.pages: | |
| pdf_writer.add_page(page) | |
| # Create a named temporary file for the merged PDF | |
| temp_file_path = os.path.join(tempfile.gettempdir(), "combine.pdf") | |
| with open(temp_file_path, 'wb') as temp_file: | |
| pdf_writer.write(temp_file) | |
| return temp_file_path | |
| def pdf_to_images(pdf_file, image_format="JPEG"): | |
| # Convert PDF to images using PIL | |
| from pdf2image import convert_from_bytes | |
| with open(pdf_file.name, "rb") as f: | |
| pdf_bytes = f.read() | |
| images = convert_from_bytes(pdf_bytes, fmt=image_format) | |
| temp_dir = tempfile.mkdtemp() | |
| image_paths = [] | |
| for i, image in enumerate(images): | |
| ext = "jpg" if image_format == "JPEG" else "png" | |
| image_path = os.path.join(temp_dir, f"page_{i + 1}.{ext}") | |
| image.save(image_path, image_format) | |
| image_paths.append(image_path) | |
| return image_paths | |
| def images_to_pdf(image_files): | |
| # Convert images to a single PDF | |
| temp_file_path = os.path.join(tempfile.gettempdir(), "images_to_pdf.pdf") | |
| image_list = [Image.open(image.name).convert("RGB") for image in image_files] | |
| image_list[0].save(temp_file_path, save_all=True, append_images=image_list[1:]) | |
| return temp_file_path | |
| def images_to_zip(image_paths): | |
| # Create a zip file containing all images | |
| zip_file_path = os.path.join(tempfile.gettempdir(), "images.zip") | |
| with zipfile.ZipFile(zip_file_path, 'w') as zipf: | |
| for image_path in image_paths: | |
| zipf.write(image_path, os.path.basename(image_path)) | |
| return zip_file_path | |
| def pdf_to_docx(pdf_file): | |
| # Convert PDF to DOCX | |
| temp_file_path = os.path.join(tempfile.gettempdir(), "converted.docx") | |
| converter = Converter(pdf_file.name) | |
| converter.convert(temp_file_path) | |
| converter.close() | |
| return temp_file_path | |
| # Create Gradio interface | |
| with gr.Blocks(theme="gstaff/xkcd") as demo: | |
| gr.Markdown("# PDF Merger and Converter") | |
| with gr.Tabs(): | |
| with gr.TabItem("PDF Merger"): | |
| pdf_input = gr.File(label="Upload PDF Files to Merge", file_types=[".pdf"], file_count="multiple") | |
| order_input = gr.Textbox(label="Enter the order of PDFs as comma-separated numbers, skip the number if you want to skip the file", placeholder="1,2,3,... or 3,1,2") | |
| with gr.Row(): | |
| merge_button = gr.Button("Merge PDFs (Normal)") | |
| merge_odd_button = gr.Button("Merge PDFs (Each PDF starts on odd page)") | |
| merged_result = gr.File(label="Download Merged PDF") | |
| def merge_and_preview(pdf_files, order, start_on_odd=False): | |
| n = len(pdf_files) | |
| if not order: | |
| # Default to natural order if order is empty | |
| order = list(range(1, n + 1)) | |
| else: | |
| try: | |
| # Convert the input string to a list of integers | |
| order = [int(x.strip()) for x in order.split(',')] | |
| except ValueError: | |
| return gr.Error("Invalid order format. Ensure it is comma-separated numbers.") | |
| # Ensure the order does not reference non-existing files | |
| if any(i < 0 or i > n for i in order): | |
| return gr.Error(f"Order values must be between 0 and {n} (0 means to skip the file).") | |
| # Merge PDFs with the specified start_on_odd option | |
| merged_pdf_path = merge_pdfs(pdf_files, order, start_on_odd) | |
| return merged_pdf_path | |
| merge_button.click( | |
| lambda *args: merge_and_preview(*args, False), | |
| inputs=[pdf_input, order_input], | |
| outputs=[merged_result] | |
| ) | |
| merge_odd_button.click( | |
| lambda *args: merge_and_preview(*args, True), | |
| inputs=[pdf_input, order_input], | |
| outputs=[merged_result] | |
| ) | |
| with gr.TabItem("PDF to Image Converter"): | |
| single_pdf_input = gr.File(label="Upload PDF File to Convert", file_types=[".pdf"], file_count="single") | |
| image_format_option = gr.Radio(label="Select Image Format", choices=["JPEG", "PNG"], value="JPEG") | |
| image_output = gr.Gallery(label="Converted Images", show_label=True) | |
| download_zip_button = gr.Button("Download All Images as ZIP") | |
| zip_result = gr.File(label="Download ZIP") | |
| def convert_pdf_to_images_with_format(pdf_file, image_format): | |
| return pdf_to_images(pdf_file, image_format) | |
| def download_images_as_zip_with_format(pdf_file, image_format): | |
| image_paths = pdf_to_images(pdf_file, image_format) | |
| return images_to_zip(image_paths) | |
| single_pdf_input.change( | |
| convert_pdf_to_images_with_format, | |
| inputs=[single_pdf_input, image_format_option], | |
| outputs=[image_output] | |
| ) | |
| download_zip_button.click( | |
| download_images_as_zip_with_format, | |
| inputs=[single_pdf_input, image_format_option], | |
| outputs=[zip_result] | |
| ) | |
| with gr.TabItem("Image to PDF Converter"): | |
| image_input = gr.File(label="Upload Images to Convert to PDF", file_types=[".jpg", ".png"], file_count="multiple") | |
| order_option = gr.Radio(label="Select Order Type", choices=["Ordered", "Reverse", "Custom"], value="Ordered") | |
| custom_order_input = gr.Textbox(label="Enter custom order (comma-separated indices)", visible=False) | |
| image_gallery = gr.Gallery(label="Images Preview (Arrange Order)", show_label=True) | |
| pdf_result = gr.File(label="Download PDF") | |
| def update_custom_order_visibility(order_type): | |
| return gr.update(visible=(order_type == "Custom")) | |
| def sort_images(order_type, custom_order, images): | |
| if order_type == "Reverse": | |
| return images[::-1] | |
| elif order_type == "Custom": | |
| try: | |
| indices = [int(i.strip()) - 1 for i in custom_order.split(',')] | |
| return [images[i] for i in indices] | |
| except (ValueError, IndexError): | |
| return gr.Error("Invalid custom order. Ensure all indices are valid and within range.") | |
| return images | |
| order_option.change( | |
| update_custom_order_visibility, | |
| inputs=[order_option], | |
| outputs=[custom_order_input] | |
| ) | |
| gr.Button("Preview Sorted Images").click( | |
| lambda order_type, custom_order, images: sort_images(order_type, custom_order, images), | |
| inputs=[order_option, custom_order_input, image_input], | |
| outputs=[image_gallery] | |
| ) | |
| gr.Button("Generate PDF").click( | |
| lambda order_type, custom_order, images: images_to_pdf(sort_images(order_type, custom_order, images)), | |
| inputs=[order_option, custom_order_input, image_input], | |
| outputs=[pdf_result] | |
| ) | |
| with gr.TabItem("PDF to DOCX Converter"): | |
| gr.Markdown("Some PDF files may not be converted properly due to the complexity of the PDF file") | |
| pdf_to_docx_input = gr.File(label="Upload PDF File to Convert to DOCX", file_types=[".pdf"], file_count="single") | |
| convert_button = gr.Button("Convert to DOCX") | |
| docx_result = gr.File(label="Download DOCX") | |
| def convert_pdf_to_docx_with_button(pdf_file): | |
| return pdf_to_docx(pdf_file) | |
| convert_button.click( | |
| convert_pdf_to_docx_with_button, | |
| inputs=[pdf_to_docx_input], | |
| outputs=[docx_result] | |
| ) | |
| # Launch the Gradio app | |
| demo.launch() | |
| # Clean up temporary files | |
| def cleanup_temp_files(): | |
| temp_dir = tempfile.gettempdir() | |
| for filename in os.listdir(temp_dir): | |
| if filename.endswith('.pdf') or filename.endswith('.jpg') or filename.endswith('.png') or filename.endswith('.docx'): | |
| os.remove(os.path.join(temp_dir, filename)) | |
| atexit.register(cleanup_temp_files) | |