Spaces:

sblumenf
/

pdf-convert

Sleeping

App Files Files Community

sblumenf commited on Dec 12, 2024

Commit

7e49296

verified ·

1 Parent(s): ef0bab0

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -4

app.py CHANGED Viewed

@@ -73,7 +73,7 @@ def parse_pdf(pdf_file, output_format, progress=gr.Progress()):
                         "tables": [table.to_dict(orient='records') for table in tables if not table.columns.duplicated().any()],
                         "images": images
                     }
-                    json.dump(json_data, tmp, indent=4)
                 elif output_format == "Markdown":
                     markdown_text = f"# Extracted Text\n\n{text}\n\n# Tables\n"
                     for i, table in enumerate(tables):
@@ -84,7 +84,7 @@ def parse_pdf(pdf_file, output_format, progress=gr.Progress()):
                     for image in images:
                         image_path = os.path.join(os.getcwd(), image["filename"])
                         markdown_text += f'![Image]({image_path})\n'
-                    tmp.write(markdown_text.encode('utf-8'))
                 elif output_format == "HTML":
                     html_text = f"<p>{text}</p>\n\n<h2>Tables</h2>\n"
                     for i, table in enumerate(tables):
@@ -95,7 +95,7 @@ def parse_pdf(pdf_file, output_format, progress=gr.Progress()):
                     for image in images:
                         image_path = os.path.join(os.getcwd(), image["filename"])
                         html_text += f'<img src="{image_path}" alt="Image"><br>\n'
-                    tmp.write(html_text.encode('utf-8'))
                 download_path = tmp.name
             return text, download_path
@@ -117,4 +117,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()  # Temporarily disable sharing

                         "tables": [table.to_dict(orient='records') for table in tables if not table.columns.duplicated().any()],
                         "images": images
                     }
+                    json.dump(json_data, tmp, indent=4)
                 elif output_format == "Markdown":
                     markdown_text = f"# Extracted Text\n\n{text}\n\n# Tables\n"
                     for i, table in enumerate(tables):
                     for image in images:
                         image_path = os.path.join(os.getcwd(), image["filename"])
                         markdown_text += f'![Image]({image_path})\n'
+                    tmp.write(markdown_text.encode('utf-8'))
                 elif output_format == "HTML":
                     html_text = f"<p>{text}</p>\n\n<h2>Tables</h2>\n"
                     for i, table in enumerate(tables):
                     for image in images:
                         image_path = os.path.join(os.getcwd(), image["filename"])
                         html_text += f'<img src="{image_path}" alt="Image"><br>\n'
+                    tmp.write(html_text.encode('utf-8'))
                 download_path = tmp.name
             return text, download_path
 )
 if __name__ == "__main__":
+    iface.launch()  # Temporarily disable sharing for debugging