balancete / app.py
vcollos's picture
Update app.py
c10800e verified
# app.py
import gradio as gr
import tempfile
import pandas as pd
import os
import sys
import subprocess
# Instalar dependências
subprocess.run(["pip", "install", "gradio", "pandas", "transformers", "torch"], check=True)
# Instalar docling-core diretamente do GitHub
subprocess.run(["pip", "install", "git+https://github.com/docling-project/docling-core.git"], check=True)
# Definir variável de ambiente para os modelos
os.environ["DOCLING_MODEL_HOME"] = os.path.expanduser("~/.docling/models")
# Se necessário, criar o diretório de modelos
model_dir = os.path.expanduser("~/.docling/models/tableformer/accurate")
os.makedirs(model_dir, exist_ok=True)
# Importar após a instalação
from docling_core.pipelines.table import TableExtractionPipeline
from docling_core.models import ModelManager
def process_pdf(file):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
tmp_pdf.write(file.read())
pdf_path = tmp_pdf.name
manager = ModelManager(auto_download=True)
pipeline = TableExtractionPipeline(model_manager=manager)
result = pipeline.run(pdf_path)
tables = result["tables"]
outputs = []
if not tables:
return "Nenhuma tabela detectada.", None
for i, table in enumerate(tables):
df = pd.DataFrame(table.rows)
csv_path = f"tabela_{i+1}.csv"
df.to_csv(csv_path, index=False)
outputs.append((f"Tabela {i+1} (página {table.page_number})", df))
os.unlink(pdf_path)
return "Tabelas extraídas com sucesso!", outputs
def show_tables(file):
status, results = process_pdf(file)
if not results:
return status, None, None, None, None
views = [None, None, None, None]
for i, (title, df) in enumerate(results[:4]):
views[i] = (gr.Markdown(f"### {title}"), gr.Dataframe(df))
return (status,) + tuple(x for pair in views if pair for x in pair)
with gr.Blocks() as demo:
gr.Markdown("# 🧾 TableFormer via Docling")
with gr.Row():
file = gr.File(label="Envie o PDF do balancete", file_types=[".pdf"])
btn = gr.Button("Processar")
status = gr.Textbox(label="Status")
output1_md = gr.Markdown(visible=False)
output1_df = gr.Dataframe(visible=False)
output2_md = gr.Markdown(visible=False)
output2_df = gr.Dataframe(visible=False)
btn.click(
fn=show_tables,
inputs=file,
outputs=[status, output1_md, output1_df, output2_md, output2_df]
)
demo.launch()