# app.py import gradio as gr import tempfile import pandas as pd import os import sys import subprocess # Instalar dependências subprocess.run(["pip", "install", "gradio", "pandas", "transformers", "torch"], check=True) # Instalar docling-core diretamente do GitHub subprocess.run(["pip", "install", "git+https://github.com/docling-project/docling-core.git"], check=True) # Definir variável de ambiente para os modelos os.environ["DOCLING_MODEL_HOME"] = os.path.expanduser("~/.docling/models") # Se necessário, criar o diretório de modelos model_dir = os.path.expanduser("~/.docling/models/tableformer/accurate") os.makedirs(model_dir, exist_ok=True) # Importar após a instalação from docling_core.pipelines.table import TableExtractionPipeline from docling_core.models import ModelManager def process_pdf(file): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf: tmp_pdf.write(file.read()) pdf_path = tmp_pdf.name manager = ModelManager(auto_download=True) pipeline = TableExtractionPipeline(model_manager=manager) result = pipeline.run(pdf_path) tables = result["tables"] outputs = [] if not tables: return "Nenhuma tabela detectada.", None for i, table in enumerate(tables): df = pd.DataFrame(table.rows) csv_path = f"tabela_{i+1}.csv" df.to_csv(csv_path, index=False) outputs.append((f"Tabela {i+1} (página {table.page_number})", df)) os.unlink(pdf_path) return "Tabelas extraídas com sucesso!", outputs def show_tables(file): status, results = process_pdf(file) if not results: return status, None, None, None, None views = [None, None, None, None] for i, (title, df) in enumerate(results[:4]): views[i] = (gr.Markdown(f"### {title}"), gr.Dataframe(df)) return (status,) + tuple(x for pair in views if pair for x in pair) with gr.Blocks() as demo: gr.Markdown("# 🧾 TableFormer via Docling") with gr.Row(): file = gr.File(label="Envie o PDF do balancete", file_types=[".pdf"]) btn = gr.Button("Processar") status = gr.Textbox(label="Status") output1_md = gr.Markdown(visible=False) output1_df = gr.Dataframe(visible=False) output2_md = gr.Markdown(visible=False) output2_df = gr.Dataframe(visible=False) btn.click( fn=show_tables, inputs=file, outputs=[status, output1_md, output1_df, output2_md, output2_df] ) demo.launch()