Update app.py
Browse files
app.py
CHANGED
|
@@ -354,13 +354,32 @@ def process_pdf_url(pdf_url):
|
|
| 354 |
return error_msg, "", ""
|
| 355 |
|
| 356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
# Create Gradio interface
|
| 358 |
with gr.Blocks(
|
| 359 |
title="PDF to HTML Converter",
|
| 360 |
-
theme=gr.themes.Soft(
|
| 361 |
-
primary_hue="indigo",
|
| 362 |
-
secondary_hue="purple",
|
| 363 |
-
),
|
| 364 |
css="""
|
| 365 |
.gradio-container {
|
| 366 |
max-width: 1200px !important;
|
|
@@ -397,14 +416,12 @@ with gr.Blocks(
|
|
| 397 |
pdf_url_input = gr.Textbox(
|
| 398 |
label="PDF URL",
|
| 399 |
placeholder="https://example.com/document.pdf",
|
| 400 |
-
lines=1
|
| 401 |
-
max_lines=1
|
| 402 |
)
|
| 403 |
with gr.Column(scale=1):
|
| 404 |
process_btn = gr.Button(
|
| 405 |
"🚀 Process PDF",
|
| 406 |
-
variant="primary"
|
| 407 |
-
size="lg"
|
| 408 |
)
|
| 409 |
|
| 410 |
summary_output = gr.Markdown(label="Summary")
|
|
@@ -442,28 +459,6 @@ with gr.Blocks(
|
|
| 442 |
outputs=[summary_output, html_preview, html_source]
|
| 443 |
)
|
| 444 |
|
| 445 |
-
def create_download_file(html_content):
|
| 446 |
-
if not html_content:
|
| 447 |
-
return None
|
| 448 |
-
|
| 449 |
-
# Create full HTML document
|
| 450 |
-
full_html = f"""<!DOCTYPE html>
|
| 451 |
-
<html lang="en">
|
| 452 |
-
<head>
|
| 453 |
-
<meta charset="UTF-8">
|
| 454 |
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 455 |
-
<title>Extracted PDF Content</title>
|
| 456 |
-
</head>
|
| 457 |
-
<body>
|
| 458 |
-
{html_content}
|
| 459 |
-
</body>
|
| 460 |
-
</html>"""
|
| 461 |
-
|
| 462 |
-
temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.html', encoding='utf-8')
|
| 463 |
-
temp_file.write(full_html)
|
| 464 |
-
temp_file.close()
|
| 465 |
-
return temp_file.name
|
| 466 |
-
|
| 467 |
download_btn.click(
|
| 468 |
fn=create_download_file,
|
| 469 |
inputs=[html_source],
|
|
|
|
| 354 |
return error_msg, "", ""
|
| 355 |
|
| 356 |
|
| 357 |
+
def create_download_file(html_content):
|
| 358 |
+
if not html_content:
|
| 359 |
+
return None
|
| 360 |
+
|
| 361 |
+
# Create full HTML document
|
| 362 |
+
full_html = f"""<!DOCTYPE html>
|
| 363 |
+
<html lang="en">
|
| 364 |
+
<head>
|
| 365 |
+
<meta charset="UTF-8">
|
| 366 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 367 |
+
<title>Extracted PDF Content</title>
|
| 368 |
+
</head>
|
| 369 |
+
<body>
|
| 370 |
+
{html_content}
|
| 371 |
+
</body>
|
| 372 |
+
</html>"""
|
| 373 |
+
|
| 374 |
+
temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.html', encoding='utf-8')
|
| 375 |
+
temp_file.write(full_html)
|
| 376 |
+
temp_file.close()
|
| 377 |
+
return temp_file.name
|
| 378 |
+
|
| 379 |
+
|
| 380 |
# Create Gradio interface
|
| 381 |
with gr.Blocks(
|
| 382 |
title="PDF to HTML Converter",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
css="""
|
| 384 |
.gradio-container {
|
| 385 |
max-width: 1200px !important;
|
|
|
|
| 416 |
pdf_url_input = gr.Textbox(
|
| 417 |
label="PDF URL",
|
| 418 |
placeholder="https://example.com/document.pdf",
|
| 419 |
+
lines=1
|
|
|
|
| 420 |
)
|
| 421 |
with gr.Column(scale=1):
|
| 422 |
process_btn = gr.Button(
|
| 423 |
"🚀 Process PDF",
|
| 424 |
+
variant="primary"
|
|
|
|
| 425 |
)
|
| 426 |
|
| 427 |
summary_output = gr.Markdown(label="Summary")
|
|
|
|
| 459 |
outputs=[summary_output, html_preview, html_source]
|
| 460 |
)
|
| 461 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
download_btn.click(
|
| 463 |
fn=create_download_file,
|
| 464 |
inputs=[html_source],
|