#!/usr/bin/env python3 """Valida o schema real da Apoena (f2m_local.duckdb) vs. o que o cco espera. Reporta: colunas faltantes por tabela, valores categóricos chave, formato de origem_subarea (regex de cota/liberação), coluna de teor, e range de datas. """ from __future__ import annotations import sys from pathlib import Path import duckdb ROOT = Path(__file__).resolve().parent.parent DB = sys.argv[1] if len(sys.argv) > 1 else str(ROOT / "f2m_local.duckdb") # Colunas que o cco (queries.py/relatorios.py/recomendacoes) assume existir. EXPECTED = { "current_transporte": [ "id", "data_inicio", "hora_inicio", "data_fim", "hora_fim", "production_date", "hora", "turma", "turno", "caminhao", "frota_caminhao", "operador_caminhao", "carga", "frota_carga", "origem", "origem_subarea", "destino", "destino_subarea", "material", "grupo_material", "tipo_movimentacao", "massa_calculada", "tempo_ciclo", "tempo_fila_carga", "tempo_carga", "tempo_fila_basc", "tempo_basc", "tempo_vazio", "tempo_cheio", "tempo_manobra_vazio", "tempo_manobra_cheio", "dist_vazio", "dist_cheio", "tipo_ciclo", "correct_destination", ], "current_registro_estados": [ "data_inicio", "hora_inicio", "data_fim", "hora_fim", "hora", "production_date", "tipo_equipamento", "turma", "turno", "equipamento", "frota", "duracao", "codigo", "codigo_grupo", "codigo_status", "operador", ], "current_categorias_codigos": [ "production_date", "turno", "turma", "tipo_equip", "equip", "h_calendario", "h_disponivel", "st_operacao", "st_paradas_operacionais", "st_manutencao", "st_ocioso", "st_inativo", "gp_corretiva", "gp_preventiva", ], "current_qualidade": [ "production_date", "caminhao", "carga", "material", "origem_subarea", "massa_calculada", # + coluna de teor: AU (Apoena) ou Ouro (Almas) ], } CATEGORICALS = { "current_transporte": ["grupo_material", "tipo_ciclo", "tipo_movimentacao"], "current_registro_estados": ["codigo_status", "codigo_grupo", "tipo_equipamento"], "current_categorias_codigos": ["tipo_equip"], } def main() -> int: con = duckdb.connect(DB, read_only=True) def cols(tbl: str) -> list[str]: return [r[0] for r in con.execute(f'DESCRIBE "{tbl}"').fetchall()] print(f"=== DB: {DB} ===\n") print("### Colunas faltantes vs. esperado pelo cco ###") for tbl, expected in EXPECTED.items(): actual = set(cols(tbl)) missing = [c for c in expected if c not in actual] flag = "OK" if not missing else f"FALTAM: {missing}" print(f" {tbl:32s} {flag}") # teor em qualidade qcols = cols("current_qualidade") teor = [c for c in qcols if c.upper() in ("AU", "OURO")] print(f"\n### Coluna de teor em current_qualidade: {teor or 'NENHUMA'} ###") print(f" (todas as colunas qualidade: {qcols})") print("\n### Valores categóricos chave ###") for tbl, colnames in CATEGORICALS.items(): for c in colnames: try: rows = con.execute( f'SELECT TRIM("{c}") v, COUNT(*) n FROM "{tbl}" GROUP BY 1 ORDER BY 2 DESC LIMIT 15' ).fetchall() vals = ", ".join(f"{v!r}({n})" for v, n in rows) print(f" {tbl}.{c}: {vals}") except Exception as e: print(f" {tbl}.{c}: ERRO {e}") print("\n### material (top 20) ###") rows = con.execute( 'SELECT TRIM(material) v, COUNT(*) n FROM current_transporte GROUP BY 1 ORDER BY 2 DESC LIMIT 20' ).fetchall() for v, n in rows: print(f" {v!r}: {n}") print("\n### Amostras de origem_subarea (distintas, 25) ###") rows = con.execute( "SELECT DISTINCT TRIM(origem_subarea) FROM current_transporte WHERE origem_subarea IS NOT NULL AND TRIM(origem_subarea) <> '' LIMIT 25" ).fetchall() for (v,) in rows: print(f" {v!r}") print("\n### Amostras de origem / destino (distintas) ###") for c in ("origem", "destino"): rows = con.execute( f"SELECT TRIM({c}) v, COUNT(*) n FROM current_transporte GROUP BY 1 ORDER BY 2 DESC LIMIT 15" ).fetchall() print(f" {c}: " + ", ".join(f"{v!r}({n})" for v, n in rows)) print("\n### Formato de data/hora (current_transporte) ###") rows = con.execute( "SELECT data_inicio, hora_inicio, production_date FROM current_transporte LIMIT 3" ).fetchall() for r in rows: print(f" data_inicio={r[0]!r} hora_inicio={r[1]!r} production_date={r[2]!r}") rng = con.execute( "SELECT MIN(production_date), MAX(production_date) FROM current_transporte" ).fetchone() print(f" production_date range (junho): {rng}") # nomes de equipamento (prefixos) print("\n### Prefixos de equipamento ###") for tbl, c in [("current_transporte", "caminhao"), ("current_transporte", "carga"), ("current_registro_estados", "equipamento")]: rows = con.execute( f"SELECT DISTINCT TRIM({c}) v FROM \"{tbl}\" WHERE {c} IS NOT NULL AND TRIM({c})<>'' ORDER BY 1 LIMIT 20" ).fetchall() print(f" {tbl}.{c}: " + ", ".join(repr(v) for (v,) in rows)) con.close() return 0 if __name__ == "__main__": raise SystemExit(main())