Spaces:
Paused
Paused
| import base64 | |
| import io | |
| import os | |
| import threading | |
| import time | |
| import zipfile | |
| from dash import Dash, dcc, html, Input, Output, State, ctx | |
| import dash_bootstrap_components as dbc | |
| from pdf2docx import Converter | |
| import tempfile | |
| app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) | |
| # Global variables | |
| uploaded_files = {} | |
| converted_files = {} | |
| current_file = "" | |
| conversion_complete = False | |
| def convert_pdf_to_docx(pdf_path, docx_path): | |
| cv = Converter(pdf_path) | |
| cv.convert(docx_path) | |
| cv.close() | |
| def process_contents(contents, filename): | |
| content_type, content_string = contents.split(',') | |
| decoded = base64.b64decode(content_string) | |
| return io.BytesIO(decoded) | |
| def convert_files(filenames): | |
| global converted_files, current_file, conversion_complete | |
| for filename in filenames: | |
| current_file = filename | |
| pdf_file = uploaded_files[filename] | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf: | |
| temp_pdf.write(pdf_file.getvalue()) | |
| temp_pdf_path = temp_pdf.name | |
| docx_filename = os.path.splitext(filename)[0] + '.docx' | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as temp_docx: | |
| temp_docx_path = temp_docx.name | |
| convert_pdf_to_docx(temp_pdf_path, temp_docx_path) | |
| with open(temp_docx_path, 'rb') as docx_file: | |
| converted_files[docx_filename] = io.BytesIO(docx_file.read()) | |
| os.unlink(temp_pdf_path) | |
| os.unlink(temp_docx_path) | |
| current_file = "" | |
| conversion_complete = True | |
| app.layout = dbc.Container([ | |
| dbc.Card( | |
| dbc.CardBody([ | |
| html.H1("PDF to DOCX Converter", className="text-center mb-4"), | |
| dcc.Upload( | |
| id='upload-data', | |
| children=html.Div([ | |
| 'Drag and Drop or ', | |
| html.A('Select PDF Files') | |
| ]), | |
| style={ | |
| 'width': '100%', | |
| 'height': '60px', | |
| 'lineHeight': '60px', | |
| 'borderWidth': '1px', | |
| 'borderStyle': 'dashed', | |
| 'borderRadius': '5px', | |
| 'textAlign': 'center', | |
| 'margin': '10px' | |
| }, | |
| multiple=True | |
| ), | |
| html.Div(id='upload-output'), | |
| dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3 mb-3", disabled=True), | |
| html.Div(id='conversion-output'), | |
| dcc.Download(id="download-zip"), | |
| dcc.Interval(id='interval-component', interval=500, n_intervals=0, disabled=True) | |
| ]), | |
| className="mt-3" | |
| ) | |
| ], fluid=True) | |
| def update_output(list_of_contents, list_of_names): | |
| if list_of_contents is not None: | |
| global uploaded_files | |
| uploaded_files.clear() | |
| children = [] | |
| for content, name in zip(list_of_contents, list_of_names): | |
| if name.lower().endswith('.pdf'): | |
| uploaded_files[name] = process_contents(content, name) | |
| children.append(html.Div(f"Uploaded: {name}")) | |
| else: | |
| children.append(html.Div(f"Skipped: {name} (Not a PDF file)", style={'color': 'red'})) | |
| return children, False | |
| return [], True | |
| def start_conversion(n_clicks): | |
| if n_clicks is None: | |
| return True | |
| global converted_files, conversion_complete | |
| converted_files.clear() | |
| conversion_complete = False | |
| threading.Thread(target=convert_files, args=(list(uploaded_files.keys()),)).start() | |
| return False | |
| def update_status(n): | |
| if current_file: | |
| return [ | |
| html.Div([ | |
| dbc.Spinner(size="sm", color="primary", type="grow"), | |
| html.Span(f" Converting: {current_file}", className="ml-2") | |
| ], className="d-flex align-items-center") | |
| ] | |
| elif conversion_complete: | |
| return [html.Div("Conversion complete! Preparing download...")] | |
| else: | |
| return [html.Div("Starting conversion...")] | |
| def check_conversion_complete(n): | |
| if conversion_complete: | |
| with io.BytesIO() as zip_buffer: | |
| with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: | |
| for filename, file_content in converted_files.items(): | |
| zip_file.writestr(filename, file_content.getvalue()) | |
| return dcc.send_bytes(zip_buffer.getvalue(), "converted_files.zip"), True | |
| return None, False | |
| if __name__ == '__main__': | |
| print("Starting the Dash application...") | |
| app.run(debug=False, host='0.0.0.0', port=7860) | |
| print("Dash application has finished running.") |