|
|
import streamlit as st |
|
|
from pypdf import PdfReader, PdfWriter |
|
|
import io |
|
|
|
|
|
|
|
|
description = st.empty() |
|
|
|
|
|
description.markdown(""" |
|
|
If the PDF is more than 100 pages, it cannot be uploaded to Google Translate as is, so split the PDF into an appropriate number of pages. |
|
|
The guideline is to keep it within 100 pages and 10MB, and if it exceeds this limit, Google Translate will not translate it. |
|
|
|
|
|
To simply split into 10 pages each: |
|
|
Group size (pages per group) == 10""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
def split_pdf_by_pages(reader): |
|
|
total_pages = len(reader.pages) |
|
|
split_files = {} |
|
|
|
|
|
for page_num in range(total_pages): |
|
|
try: |
|
|
writer = PdfWriter() |
|
|
writer.add_page(reader.pages[page_num]) |
|
|
output_pdf = io.BytesIO() |
|
|
writer.write(output_pdf) |
|
|
output_pdf.seek(0) |
|
|
split_files[page_num] = output_pdf |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Error processing page {page_num + 1}: {e}") |
|
|
raise |
|
|
|
|
|
return split_files |
|
|
|
|
|
|
|
|
def merge_pdfs_in_groups(split_files, group_size=50): |
|
|
pdf_files = sorted(split_files.keys()) |
|
|
merged_files = {} |
|
|
|
|
|
for i in range(0, len(pdf_files), group_size): |
|
|
group = pdf_files[i:i + group_size] |
|
|
try: |
|
|
writer = PdfWriter() |
|
|
for page_num in group: |
|
|
pdf_file = split_files[page_num] |
|
|
reader = PdfReader(pdf_file) |
|
|
for page in reader.pages: |
|
|
writer.add_page(page) |
|
|
|
|
|
output_pdf = io.BytesIO() |
|
|
writer.write(output_pdf) |
|
|
output_pdf.seek(0) |
|
|
merged_files[i // group_size] = output_pdf |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Error merging group {i // group_size + 1}: {e}") |
|
|
raise |
|
|
|
|
|
return merged_files |
|
|
|
|
|
|
|
|
def create_download_link(file_data, display_name): |
|
|
st.download_button( |
|
|
label=f"Download {display_name}", |
|
|
data=file_data, |
|
|
file_name=display_name, |
|
|
mime="application/pdf" |
|
|
) |
|
|
|
|
|
|
|
|
def main(): |
|
|
st.title("PDF Splitter and Merger with Streamlit") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") |
|
|
|
|
|
|
|
|
group_size = st.slider( |
|
|
"Group size (pages per group)", |
|
|
min_value=1, |
|
|
max_value=99, |
|
|
value=10, |
|
|
step=1 |
|
|
) |
|
|
if uploaded_file is not None: |
|
|
try: |
|
|
file_bytes = uploaded_file.read() |
|
|
reader = PdfReader(io.BytesIO(file_bytes)) |
|
|
split_files = split_pdf_by_pages(reader) |
|
|
merged_files = merge_pdfs_in_groups(split_files, group_size) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.subheader("Merged Groups") |
|
|
for i, file_obj in merged_files.items(): |
|
|
create_download_link(file_obj.getvalue(), f"group_{i+1}_{i*group_size}-{(i+1)*group_size-1}.pdf") |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Error processing PDF: {e}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |