jobian's picture
Enhance PDF extraction functionality and update .gitignore
aa93542
import gradio as gr
from gradio_pdf import PDF
from services.extraction_service import extract_tables
from config import SERVER_NAME, SERVER_PORT, IN_SPACES
with gr.Blocks(title="Tables Extractor", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# Table Extraction
Upload up to **15 text-based PDFs** and extract tables to structured JSON.
The UI renders detected tables; you can also download JSON + metrics.
"""
)
with gr.Row():
inp = gr.File(
file_types=[".pdf"],
label="Upload PDFs",
type="filepath",
file_count="multiple"
)
with gr.Row():
run_btn = gr.Button("Extract Tables", variant="primary")
with gr.Row():
status = gr.Markdown()
with gr.Row():
with gr.Column(scale=1):
pdf_preview = PDF(
label="PDF Preview (First Uploaded)",
interactive=True,
height=400
)
with gr.Column(scale=1):
gallery = gr.Gallery(
label="Detected Table Previews",
height=400,
columns=2,
object_fit="contain"
)
with gr.Row():
downloads = gr.Files(label="Download (tables.json, metrics.json)")
with gr.Row():
html_view = gr.HTML()
run_btn.click(
fn=extract_tables,
inputs=[inp],
outputs=[status, downloads, gallery, html_view, pdf_preview]
)
if __name__ == "__main__":
demo.launch(
server_name=SERVER_NAME,
server_port=SERVER_PORT,
debug=not IN_SPACES
)