Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,9 @@ import PyPDF2
|
|
| 12 |
|
| 13 |
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
app.layout = dbc.Container([
|
| 16 |
html.H1("Auto-Wiki", className="my-4"),
|
| 17 |
dcc.Upload(
|
|
@@ -37,7 +40,8 @@ app.layout = dbc.Container([
|
|
| 37 |
dbc.Progress(id="upload-progress", label="Upload Progress", style={"visibility": "hidden"}),
|
| 38 |
dbc.Progress(id="conversion-progress", label="Conversion Progress", style={"visibility": "hidden"}),
|
| 39 |
dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3", disabled=True),
|
| 40 |
-
dcc.Download(id="download-zip")
|
|
|
|
| 41 |
])
|
| 42 |
|
| 43 |
def process_docx(contents, filename):
|
|
@@ -59,6 +63,28 @@ def process_pdf(contents, filename):
|
|
| 59 |
full_text.append(page.extract_text())
|
| 60 |
return '\n\n'.join(full_text)
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
@app.callback(
|
| 63 |
[Output('upload-output', 'children'),
|
| 64 |
Output('convert-button', 'disabled'),
|
|
@@ -69,11 +95,13 @@ def process_pdf(contents, filename):
|
|
| 69 |
Output('download-zip', 'data')],
|
| 70 |
[Input('upload-data', 'contents'),
|
| 71 |
Input('upload-data', 'filename'),
|
| 72 |
-
Input('convert-button', 'n_clicks')
|
|
|
|
| 73 |
[State('upload-data', 'contents'),
|
| 74 |
State('upload-data', 'filename')]
|
| 75 |
)
|
| 76 |
-
def update_output(list_of_contents, list_of_names, n_clicks, contents, filenames):
|
|
|
|
| 77 |
ctx = callback_context
|
| 78 |
if not ctx.triggered:
|
| 79 |
return no_update
|
|
@@ -96,31 +124,17 @@ def update_output(list_of_contents, list_of_names, n_clicks, contents, filenames
|
|
| 96 |
if not contents:
|
| 97 |
return no_update
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
for i, (c, n) in enumerate(zip(contents, filenames)):
|
| 102 |
-
if n.lower().endswith('.docx'):
|
| 103 |
-
text = process_docx(c, n)
|
| 104 |
-
elif n.lower().endswith('.pdf'):
|
| 105 |
-
text = process_pdf(c, n)
|
| 106 |
-
else:
|
| 107 |
-
continue # Skip unsupported file types
|
| 108 |
-
md = markdown.markdown(text)
|
| 109 |
-
processed_files.append((n.replace('.docx', '.md').replace('.pdf', '.md'), md))
|
| 110 |
-
time.sleep(0.1) # Simulate processing time
|
| 111 |
-
app.callback_context.response.set_data(f'{{"progress": {(i+1)/len(contents)*100}}}')
|
| 112 |
-
|
| 113 |
-
zip_buffer = io.BytesIO()
|
| 114 |
-
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
| 115 |
-
for name, content in processed_files:
|
| 116 |
-
zip_file.writestr(name, content)
|
| 117 |
-
|
| 118 |
-
return zip_buffer.getvalue()
|
| 119 |
-
|
| 120 |
-
thread = threading.Thread(target=process_files)
|
| 121 |
thread.start()
|
| 122 |
|
| 123 |
-
return no_update, True, 100, {"visibility": "visible"}, 0, {"visibility": "visible"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
return no_update
|
| 126 |
|
|
|
|
| 12 |
|
| 13 |
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
| 14 |
|
| 15 |
+
# Global variable to track conversion progress
|
| 16 |
+
conversion_progress = 0
|
| 17 |
+
|
| 18 |
app.layout = dbc.Container([
|
| 19 |
html.H1("Auto-Wiki", className="my-4"),
|
| 20 |
dcc.Upload(
|
|
|
|
| 40 |
dbc.Progress(id="upload-progress", label="Upload Progress", style={"visibility": "hidden"}),
|
| 41 |
dbc.Progress(id="conversion-progress", label="Conversion Progress", style={"visibility": "hidden"}),
|
| 42 |
dbc.Button("Convert and Download", id="convert-button", color="primary", className="mt-3", disabled=True),
|
| 43 |
+
dcc.Download(id="download-zip"),
|
| 44 |
+
dcc.Interval(id='interval-component', interval=500, n_intervals=0)
|
| 45 |
])
|
| 46 |
|
| 47 |
def process_docx(contents, filename):
|
|
|
|
| 63 |
full_text.append(page.extract_text())
|
| 64 |
return '\n\n'.join(full_text)
|
| 65 |
|
| 66 |
+
def process_files(contents, filenames):
|
| 67 |
+
global conversion_progress
|
| 68 |
+
processed_files = []
|
| 69 |
+
for i, (c, n) in enumerate(zip(contents, filenames)):
|
| 70 |
+
if n.lower().endswith('.docx'):
|
| 71 |
+
text = process_docx(c, n)
|
| 72 |
+
elif n.lower().endswith('.pdf'):
|
| 73 |
+
text = process_pdf(c, n)
|
| 74 |
+
else:
|
| 75 |
+
continue # Skip unsupported file types
|
| 76 |
+
md = markdown.markdown(text)
|
| 77 |
+
processed_files.append((n.replace('.docx', '.md').replace('.pdf', '.md'), md))
|
| 78 |
+
conversion_progress = (i + 1) / len(contents) * 100
|
| 79 |
+
time.sleep(0.1) # Simulate processing time
|
| 80 |
+
|
| 81 |
+
zip_buffer = io.BytesIO()
|
| 82 |
+
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
| 83 |
+
for name, content in processed_files:
|
| 84 |
+
zip_file.writestr(name, content)
|
| 85 |
+
|
| 86 |
+
return zip_buffer.getvalue()
|
| 87 |
+
|
| 88 |
@app.callback(
|
| 89 |
[Output('upload-output', 'children'),
|
| 90 |
Output('convert-button', 'disabled'),
|
|
|
|
| 95 |
Output('download-zip', 'data')],
|
| 96 |
[Input('upload-data', 'contents'),
|
| 97 |
Input('upload-data', 'filename'),
|
| 98 |
+
Input('convert-button', 'n_clicks'),
|
| 99 |
+
Input('interval-component', 'n_intervals')],
|
| 100 |
[State('upload-data', 'contents'),
|
| 101 |
State('upload-data', 'filename')]
|
| 102 |
)
|
| 103 |
+
def update_output(list_of_contents, list_of_names, n_clicks, n_intervals, contents, filenames):
|
| 104 |
+
global conversion_progress
|
| 105 |
ctx = callback_context
|
| 106 |
if not ctx.triggered:
|
| 107 |
return no_update
|
|
|
|
| 124 |
if not contents:
|
| 125 |
return no_update
|
| 126 |
|
| 127 |
+
conversion_progress = 0
|
| 128 |
+
thread = threading.Thread(target=process_files, args=(contents, filenames))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
thread.start()
|
| 130 |
|
| 131 |
+
return no_update, True, 100, {"visibility": "visible"}, 0, {"visibility": "visible"}, None
|
| 132 |
+
|
| 133 |
+
if ctx.triggered[0]['prop_id'] == 'interval-component.n_intervals':
|
| 134 |
+
if conversion_progress == 100:
|
| 135 |
+
return no_update, False, 100, {"visibility": "visible"}, 100, {"visibility": "visible"}, dcc.send_bytes(process_files(contents, filenames), "converted_files.zip")
|
| 136 |
+
else:
|
| 137 |
+
return no_update, True, 100, {"visibility": "visible"}, conversion_progress, {"visibility": "visible"}, None
|
| 138 |
|
| 139 |
return no_update
|
| 140 |
|