Spaces:
Paused
Paused
| import base64 | |
| import io | |
| import zipfile | |
| import logging | |
| from threading import Thread | |
| import json | |
| import time | |
| import dash | |
| import dash_bootstrap_components as dbc | |
| from dash import dcc, html, Input, Output, State, callback, MATCH, ALL | |
| from dash.exceptions import PreventUpdate | |
| from PyPDF2 import PdfReader, PdfWriter | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) | |
| # Global variables | |
| generated_file = None | |
| progress = 0 | |
| is_processing = False | |
| # Layout | |
| app.layout = dbc.Container([ | |
| html.H1("PDF Splitter", className="my-4"), | |
| dcc.Upload( | |
| id='upload-pdf', | |
| children=html.Div(['Drag and Drop or ', html.A('Select PDF')]), | |
| style={ | |
| 'width': '100%', | |
| 'height': '60px', | |
| 'lineHeight': '60px', | |
| 'borderWidth': '1px', | |
| 'borderStyle': 'dashed', | |
| 'borderRadius': '5px', | |
| 'textAlign': 'center', | |
| 'margin': '10px' | |
| }, | |
| multiple=False | |
| ), | |
| dbc.Spinner(html.Div(id='pdf-name'), color="primary", type="grow"), | |
| dbc.Card([ | |
| dbc.CardBody([ | |
| html.Div(id='ranges-container', children=[]), | |
| dbc.Button("Add Range", id='add-range', color="secondary", className="mt-2"), | |
| ]) | |
| ], className="my-3"), | |
| dbc.Button("Split PDF", id='split-button', color="primary", className="mt-3", disabled=True), | |
| dbc.Progress(id='progress-bar', className="my-3"), | |
| html.Div([ | |
| dbc.Spinner(html.Div(), id="processing-spinner", color="primary", type="border"), | |
| html.Div(id='processing-status') | |
| ], id='processing-container', style={'display': 'none'}), | |
| dbc.Button("Download ZIP", id='download-button', color="success", className="mt-3", disabled=True), | |
| dcc.Download(id="download-zip"), | |
| html.Div(id='log-output', style={'whiteSpace': 'pre-line'}), | |
| dcc.Interval(id='interval-component', interval=1000, n_intervals=0), # 1 second interval | |
| ], fluid=True) | |
| def update_output(contents, filename): | |
| if contents is not None: | |
| logger.info(f"PDF uploaded: {filename}") | |
| initial_range = create_range_input(0) | |
| return html.Div(f"Uploaded: {filename}"), False, [initial_range] | |
| return "", True, [] | |
| def create_range_input(index): | |
| return dbc.Row([ | |
| dbc.Col(dbc.Input(id={'type': 'range-input', 'index': index}, type='text', placeholder='Enter page range (e.g., 1-3)'), width=10), | |
| dbc.Col(dbc.Button("Remove", id={'type': 'remove-range', 'index': index}, color="danger", size="sm"), width=2), | |
| ], className="mb-2") | |
| def manage_ranges(add_clicks, remove_clicks, existing_ranges): | |
| ctx = dash.callback_context | |
| triggered_id = ctx.triggered[0]['prop_id'].split('.')[0] | |
| if triggered_id == 'add-range': | |
| new_index = len(existing_ranges) | |
| new_range = create_range_input(new_index) | |
| existing_ranges.append(new_range) | |
| elif 'remove-range' in triggered_id: | |
| remove_index = json.loads(triggered_id)['index'] | |
| existing_ranges = [range for i, range in enumerate(existing_ranges) if i != remove_index] | |
| return existing_ranges | |
| def split_pdf(n_clicks, contents, filename, ranges): | |
| global progress, is_processing | |
| if not contents or not ranges: | |
| logger.warning("Split PDF clicked but no content or ranges provided") | |
| raise PreventUpdate | |
| logger.info("Split PDF button clicked") | |
| ranges = [r for r in ranges if r] # Filter out empty ranges | |
| logger.info(f"Processing {len(ranges)} ranges") | |
| progress = 0 # Reset progress | |
| is_processing = True | |
| thread = Thread(target=process_pdf, args=(contents, filename, ranges)) | |
| thread.start() | |
| return "Processing started...", True, True, {'display': 'block'} | |
| def process_pdf(contents, filename, ranges): | |
| global progress, generated_file, is_processing | |
| try: | |
| # Decode PDF content | |
| content_type, content_string = contents.split(',') | |
| decoded = base64.b64decode(content_string) | |
| # Read the PDF | |
| pdf = PdfReader(io.BytesIO(decoded)) | |
| total_pages = len(pdf.pages) | |
| # Create a ZIP file in memory | |
| zip_buffer = io.BytesIO() | |
| with zipfile.ZipFile(zip_buffer, 'w') as zf: | |
| for i, page_range in enumerate(ranges): | |
| start, end = map(int, page_range.split('-')) | |
| writer = PdfWriter() | |
| for page_num in range(start - 1, min(end, total_pages)): | |
| writer.add_page(pdf.pages[page_num]) | |
| # Save the split PDF to the ZIP file | |
| output = io.BytesIO() | |
| writer.write(output) | |
| output.seek(0) | |
| zf.writestr(f'split_{i+1}.pdf', output.getvalue()) | |
| progress = (i + 1) / len(ranges) * 100 | |
| time.sleep(0.1) # Simulate some processing time | |
| zip_buffer.seek(0) | |
| generated_file = zip_buffer.getvalue() | |
| progress = 100 | |
| except Exception as e: | |
| logger.error(f"Error processing PDF: {str(e)}") | |
| progress = -1 | |
| finally: | |
| is_processing = False | |
| def update_progress(n): | |
| global progress, is_processing, generated_file | |
| if is_processing: | |
| return progress, f"Processing... {progress:.0f}% complete", True, {'display': 'block'} | |
| elif progress == 100 and generated_file is not None: | |
| return 100, "PDF splitting completed. Click 'Download ZIP' to get your files.", False, {'display': 'none'} | |
| elif progress == -1: | |
| return 0, "Error occurred during PDF splitting. Please try again.", True, {'display': 'none'} | |
| else: | |
| raise PreventUpdate | |
| def download_zip(n_clicks): | |
| global generated_file | |
| if generated_file is not None: | |
| logger.info("Initiating ZIP file download") | |
| return dcc.send_bytes(generated_file, "split_pdfs.zip") | |
| logger.warning("Download attempted but no file generated") | |
| raise PreventUpdate | |
| if __name__ == '__main__': | |
| logger.info("Starting the Dash application...") | |
| app.run(debug=True, host='0.0.0.0', port=7860) | |
| logger.info("Dash application has finished running.") |