docs_merger / app.py
george021219's picture
Update app.py
9507aff verified
import gradio as gr
import pandas as pd
from PIL import Image
import fitz # PyMuPDF
from docx2pdf import convert
def convert_to_pdf(file):
pdf_document = fitz.open()
if file.name.endswith('.docx'):
# docx2pdfを使用してdocxファイルをPDFに変換
pdf_path = file.name.replace('.docx', '.pdf')
convert(file.name, pdf_path)
existing_pdf = fitz.open(pdf_path)
for page in existing_pdf:
pdf_document.insert_pdf(existing_pdf, from_page=page.number, to_page=page.number)
elif file.name.endswith('.xlsx'):
df = pd.read_excel(file.name, engine='openpyxl')
text = df.to_string(index=False)
pdf_page = pdf_document.new_page()
pdf_page.insert_text((72, 72), text)
elif file.name.endswith('.png') or file.name.endswith('.jpg') or file.name.endswith('.jpeg'):
img = Image.open(file.name)
img_byte_arr = img.tobytes()
pdf_page = pdf_document.new_page(width=img.width, height=img.height)
pdf_page.insert_image(fitz.Rect(0, 0, img.width, img.height), stream=img_byte_arr)
elif file.name.endswith('.pdf'):
existing_pdf = fitz.open(file.name)
for page in existing_pdf:
pdf_document.insert_pdf(existing_pdf, from_page=page.number, to_page=page.number)
return pdf_document
def merge_pdfs(files):
merged_pdf = fitz.open()
for file in files:
pdf_document = convert_to_pdf(file)
merged_pdf.insert_pdf(pdf_document)
output_path = "merged_output.pdf"
merged_pdf.save(output_path)
return output_path
def main(files):
output_path = merge_pdfs(files)
return output_path
iface = gr.Interface(
fn=main,
inputs=gr.File(file_count="multiple"),
outputs=gr.File(),
title="File Merger to PDF",
description="Upload Word, Excel, Image or PDF files to merge them into a single PDF."
)
iface.launch()