Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| import polars as pl | |
| import gradio as gr | |
| from gradio_pdf import PDF | |
| from common import Interface | |
| from parser import PDFTableParser | |
| class WebUI: | |
| def __init__(self): | |
| pass | |
| def process_pdf(pdf_file, output_path, edge_tol, row_tol, pages): | |
| ts, tempd = Interface.get_tempdir() | |
| tempf = os.path.join(tempd, output_path) | |
| parser = PDFTableParser([pdf_file], [tempf], ',', edge_tol, row_tol, pages) | |
| tables = parser.read_tables(pdf_file) | |
| if tables: | |
| parser.save_tables_as_csv(tables, tempf) | |
| df = pl.concat([pl.DataFrame(table.df) for table in tables]) | |
| return df, [tempf], {"status": "success", "message": f"Processed PDF and saved as {tempf}"} | |
| return None, None, {"status": "error", "message": "Failed to process PDF"} | |
| def run(self): | |
| with gr.Blocks(title="PDF Table Parser", css="body { font-family: Arial, sans-serif; } footer { visibility: hidden; }") as app: | |
| gr.Markdown("# PDF Table Parser") | |
| description="Upload a PDF file to extract tables" | |
| gr.Markdown(f"### {description}") | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_in = PDF(label="Document") | |
| with gr.Row(): | |
| edge_tol = gr.Number(50, label="Edge tol") | |
| row_tol = gr.Number(50, label="Row tol") | |
| pages = gr.Textbox('1', label="Pages", info="You can pass 'all', '3-end', etc.") | |
| output_path = gr.Textbox(f"output.csv", label="Output Path") | |
| with gr.Column(): | |
| status_msg = gr.JSON(label="Status Message") | |
| output_files = gr.Files(label="Output Files") | |
| with gr.Row(): | |
| output_df = gr.Dataframe(label="Extracted Table") | |
| examples = gr.Examples([["data/demo.pdf"]], inputs=pdf_in) | |
| pdf_in.change(WebUI.process_pdf, | |
| inputs=[pdf_in, output_path, edge_tol, row_tol, pages], | |
| outputs=[output_df, output_files, status_msg]) | |
| app.launch() | |
| def main(args): | |
| parser = PDFTableParser(args.input_files, args.output_files, args.delimiter, args.edge_tol, args.row_tol, args.pages) | |
| parser.process_files() | |
| if __name__ == "__main__": | |
| webui = WebUI() | |
| webui.run() |