Spaces:
Paused
Paused
| import base64 | |
| import io | |
| import os | |
| import zipfile | |
| from dash import Dash, dcc, html, Input, Output, State, callback_context, no_update | |
| import dash_bootstrap_components as dbc | |
| import threading | |
| import time | |
| import pypandoc | |
| import os | |
| # Check if pandoc is installed | |
| try: | |
| pypandoc.get_pandoc_version() | |
| except OSError: | |
| # If not found, attempt to download and install | |
| print("Pandoc not found. Attempting to download...") | |
| pypandoc.download_pandoc() | |
| # Verify installation | |
| try: | |
| pypandoc.get_pandoc_version() | |
| print("Pandoc successfully installed.") | |
| except OSError: | |
| print("Failed to install Pandoc. Please install it manually.") | |
| exit(1) | |
| app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) | |
| app.layout = dbc.Container([ | |
| html.H1("Auto-Wiki", className="my-4"), | |
| dcc.Upload( | |
| id='upload-data', | |
| children=html.Div([ | |
| 'Drag and Drop or ', | |
| html.A('Select Files') | |
| ]), | |
| style={ | |
| 'width': '100%', | |
| 'height': '60px', | |
| 'lineHeight': '60px', | |
| 'borderWidth': '1px', | |
| 'borderStyle': 'dashed', | |
| 'borderRadius': '5px', | |
| 'textAlign': 'center', | |
| 'margin': '10px' | |
| }, | |
| multiple=True, | |
| accept='.docx,.pdf' | |
| ), | |
| html.Div(id='upload-output'), | |
| html.Div(id="upload-status", style={"display": "none"}), | |
| html.Div(id="conversion-status", style={"display": "none"}), | |
| dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3", disabled=True), | |
| dcc.Download(id="download-zip") | |
| ]) | |
| def process_file(contents, filename): | |
| content_type, content_string = contents.split(',') | |
| decoded = base64.b64decode(content_string) | |
| with open(filename, 'wb') as f: | |
| f.write(decoded) | |
| md_content = pypandoc.convert_file(filename, 'md') | |
| os.remove(filename) # Clean up the temporary file | |
| return md_content | |
| def process_files(contents, filenames): | |
| processed_files = [] | |
| for c, n in zip(contents, filenames): | |
| if n.lower().endswith(('.docx', '.pdf')): | |
| text = process_file(c, n) | |
| processed_files.append((n.rsplit('.', 1)[0] + '.md', text)) | |
| time.sleep(0.1) # Simulate processing time | |
| zip_buffer = io.BytesIO() | |
| with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: | |
| for name, content in processed_files: | |
| zip_file.writestr(name, content) | |
| return zip_buffer.getvalue() | |
| def update_output(list_of_contents, list_of_names, n_clicks, contents, filenames): | |
| ctx = callback_context | |
| if not ctx.triggered: | |
| return no_update | |
| if ctx.triggered[0]['prop_id'] == 'upload-data.contents': | |
| if list_of_contents is not None: | |
| children = [ | |
| html.Div([ | |
| html.H5(f"File uploaded: {name}"), | |
| html.Hr() | |
| ]) for name in list_of_names | |
| ] | |
| return children, False, "Files uploaded successfully", {"display": "block"}, "", {"display": "none"}, None | |
| return no_update | |
| if ctx.triggered[0]['prop_id'] == 'convert-button.n_clicks': | |
| if n_clicks is None or not contents: | |
| return no_update | |
| def process_and_download(): | |
| zip_data = process_files(contents, filenames) | |
| return dcc.send_bytes(zip_data, "converted_files.zip") | |
| return ( | |
| no_update, | |
| True, | |
| "", | |
| {"display": "none"}, | |
| "Converting files... This may take a moment.", | |
| {"display": "block"}, | |
| process_and_download() | |
| ) | |
| return no_update | |
| if __name__ == '__main__': | |
| print("Starting the Dash application...") | |
| app.run(debug=False, host='0.0.0.0', port=7860) | |
| print("Dash application has finished running.") |