Spaces:

ESJL
/

CUB_PDF

Sleeping

File size: 4,888 Bytes

10d228a
 
 
 
 
 
 
 
 
6a9e928
10d228a
 
 
 
 
 
 
 
 
 
f527499
10d228a
 
1b3ebf8
 
 
 
 
 
 
10d228a
1b3ebf8
 
10d228a
 
f527499
1b3ebf8
f527499
10d228a
 
1b3ebf8
10d228a
f527499
10d228a
 
1b3ebf8
 
 
 
10d228a
1b3ebf8
f527499
1b3ebf8
 
f527499
1b3ebf8
 
 
 
 
10d228a
1b3ebf8
 
 
 
f527499
10d228a
1b3ebf8
 
f527499
1b3ebf8
 
 
 
10d228a
1b3ebf8
10d228a
1b3ebf8
 
10d228a
 
1b3ebf8
10d228a
1b3ebf8
 
10d228a
1b3ebf8
 
6a9e928
f527499
1b3ebf8
f527499
6a9e928
1b3ebf8
 
 
 
 
 
 
 
 
f527499
10d228a
 
 
1b3ebf8
 
f527499
10d228a
1b3ebf8
10d228a
1b3ebf8
 
 
 
 
6d76bf1
1b3ebf8

import re
import os
import tempfile
from datetime import datetime
from pypdf import PdfReader
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment
import gradio as gr

# --- CONFIGURAÇÕES SINDUSCON ---
LINHAS = [
    "R 1-B (Res. Unifamiliar)", "R 1-N (Res. Unifamiliar)", "R 1-A (Res. Unifamiliar)",
    "PP 4-B (Prédio Popular)", "PP 4-N (Prédio Popular)", "R 8-B (Res. Multifamiliar)",
    "R 8-N (Res. Multifamiliar)", "R 8-A (Res. Multifamiliar)", "R 16-N (Res. Multifamiliar)",
    "R 16-A (Res. Multifamiliar)", "PIS (Projeto Inter. Social)", "RP1Q (Residência Popular)",
    "CAL 8-N (Com. Andar Livres)", "CAL 8-A (Com. Andar Livres)", "CSL 8-N (Com.Salas e Lojas)",
    "CSL 8-A (Com.Salas e Lojas)", "CSL 16-N (Com.Salas e Lojas)", "CSL 16-A (Com.Salas e Lojas)",
    "GI (Galpão Industrial)"
]
NUM_LINHAS = len(LINHAS)
MESES_PT = {i: m for i, m in enumerate(["Jan", "Fev", "Mar", "Abr", "Mai", "Jun", "Jul", "Ago", "Set", "Out", "Nov", "Dez"], 1)}
CINZA = PatternFill(start_color="D9D9D9", end_color="D9D9D9", fill_type="solid")

def limpar_e_converter(valor_str):
    """Converte string '1.234,56' ou '-0,45' em float."""
    try:
        # Remove pontos de milhar e troca vírgula por ponto
        return float(valor_str.replace(".", "").replace(",", "."))
    except:
        return None

def processar_pdf_universal(arquivo_pdf, anos_retroativos):
    if arquivo_pdf is None: return None, "Aguardando arquivo..."
    
    log = []
    dados_globais = {}
    eh_porcentagem = False # Flag para detectar tipo de dado
    
    try:
        reader = PdfReader(arquivo_pdf.name)
        ano_limite = datetime.now().year - anos_retroativos
        
        for i, page in enumerate(reader.pages):
            text = page.extract_text()
            
            # Busca o ano na página
            ano_match = re.search(r"(?:20)\d{2}", text)
            if not ano_match: continue
            ano_pag = int(ano_match.group(0))
            
            if ano_pag < ano_limite: continue
            
            # Detecta se é PDF de variação (geralmente contém o símbolo % no texto)
            if "%" in text: eh_porcentagem = True
            
            # REGEX UNIVERSAL: Pega números como 2.500,00 | 900,00 | 0,45 | -0,10
            # Explicação: Sinal opcional | dígitos com pontos opcionais | vírgula | 2 decimais
            padrao = r"-?\d{1,3}(?:\.\d{3})*,\d{2}"
            valores_encontrados = re.findall(padrao, text)
            valores_float = [limpar_e_converter(v) for v in valores_encontrados]
            
            if not valores_float: continue

            num_meses = len(valores_float) // NUM_LINHAS
            if num_meses == 0: continue

            for linha_idx in range(NUM_LINHAS):
                for mes_off in range(num_meses):
                    pos = (linha_idx * num_meses) + mes_off
                    if pos < len(valores_float):
                        mes_idx = mes_off + 1
                        dados_globais[(linha_idx, f"{ano_pag}-{mes_idx:02d}")] = valores_float[pos]
            
            log.append(f"Página {i+1} (Ano {ano_pag}) processada.")

        if not dados_globais: return None, "Nenhum dado extraído."

        # Gerar Excel
        colunas = sorted(set(c for _, c in dados_globais.keys()))
        wb = Workbook()
        ws = wb.active
        ws.title = "Dados CUB-RS"
        
        # Cabeçalho
        ws.append(["PROJETO-PADRÃO"] + [f"{MESES_PT[int(c.split('-')[1])]}/{c.split('-')[0]}" for c in colunas])
        
        for l_idx, nome in enumerate(LINHAS):
            row = [nome] + [dados_globais.get((l_idx, c)) for c in colunas]
            ws.append(row)
            if (l_idx // 3) % 2 == 0:
                for c_idx in range(1, len(colunas) + 2):
                    ws.cell(row=ws.max_row, column=c_idx).fill = CINZA

        # Formatação de Células
        for r in range(2, ws.max_row + 1):
            for c in range(2, ws.max_column + 1):
                cell = ws.cell(r, c)
                if eh_porcentagem:
                    cell.number_format = '0.00"%"'
                else:
                    cell.number_format = '#,##0.00'

        ws.column_dimensions['A'].width = 30
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
        wb.save(temp_file.name)
        
        tipo = "Variação (%)" if eh_porcentagem else "Valores (R$)"
        return temp_file.name, f"Tipo detectado: {tipo}\n" + "\n".join(log)

    except Exception as e:
        return None, f"Erro: {str(e)}"

# Interface Gradio permanece a mesma...
demo = gr.Interface(
    fn=processar_pdf_universal,
    inputs=[gr.File(label="Suba o PDF (Valores ou Variação)"), gr.Slider(1, 25, 5, label="Anos")],
    outputs=[gr.File(label="Download"), gr.Textbox(label="Log")],
    title="🏗️ Extrator CUB-RS"
)
demo.launch()