import streamlit as st from pypdf import PdfReader, PdfWriter import io description = st.empty() description.markdown(""" If the PDF is more than 100 pages, it cannot be uploaded to Google Translate as is, so split the PDF into an appropriate number of pages. The guideline is to keep it within 100 pages and 10MB, and if it exceeds this limit, Google Translate will not translate it. To simply split into 10 pages each: Group size (pages per group) == 10""", unsafe_allow_html=True) # ページごとに分割する関数 def split_pdf_by_pages(reader): total_pages = len(reader.pages) split_files = {} for page_num in range(total_pages): try: writer = PdfWriter() writer.add_page(reader.pages[page_num]) output_pdf = io.BytesIO() writer.write(output_pdf) output_pdf.seek(0) split_files[page_num] = output_pdf #st.write(f"Page {page_num + 1} processed.") # Streamlit での表示 except Exception as e: st.error(f"Error processing page {page_num + 1}: {e}") # Streamlit でのエラー表示 raise return split_files # グループごとにマージする関数 def merge_pdfs_in_groups(split_files, group_size=50): pdf_files = sorted(split_files.keys()) merged_files = {} for i in range(0, len(pdf_files), group_size): group = pdf_files[i:i + group_size] try: writer = PdfWriter() for page_num in group: pdf_file = split_files[page_num] reader = PdfReader(pdf_file) for page in reader.pages: writer.add_page(page) output_pdf = io.BytesIO() writer.write(output_pdf) output_pdf.seek(0) merged_files[i // group_size] = output_pdf #st.write(f"Merged group {i // group_size + 1} processed.") # Streamlit での表示 except Exception as e: st.error(f"Error merging group {i // group_size + 1}: {e}") # Streamlit でのエラー表示 raise return merged_files # ファイルをダウンロード用リンクとして表示する関数 (Streamlit 版) def create_download_link(file_data, display_name): st.download_button( label=f"Download {display_name}", data=file_data, file_name=display_name, mime="application/pdf" ) # メイン処理 def main(): st.title("PDF Splitter and Merger with Streamlit") uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") #group_size = st.number_input("Group size (pages per group)", min_value=1, value=10) group_size = st.slider( "Group size (pages per group)", min_value=1, max_value=99, # 最大値を設定 (必要に応じて調整) value=10, # 初期値 step=1 # ステップ数 ) if uploaded_file is not None: try: file_bytes = uploaded_file.read() reader = PdfReader(io.BytesIO(file_bytes)) split_files = split_pdf_by_pages(reader) merged_files = merge_pdfs_in_groups(split_files, group_size) # 分割されたページのダウンロードリンク #st.subheader("Split Pages") # サブヘッダーを追加 #for i, file_obj in split_files.items(): # create_download_link(file_obj.getvalue(), f"page_{i+1}.pdf") # グループ化されたページのダウンロードリンク st.subheader("Merged Groups") # サブヘッダーを追加 for i, file_obj in merged_files.items(): create_download_link(file_obj.getvalue(), f"group_{i+1}_{i*group_size}-{(i+1)*group_size-1}.pdf") except Exception as e: st.error(f"Error processing PDF: {e}") if __name__ == "__main__": main()