| |
| """Valida o schema real da Apoena (f2m_local.duckdb) vs. o que o cco espera. |
| |
| Reporta: colunas faltantes por tabela, valores categóricos chave, formato de |
| origem_subarea (regex de cota/liberação), coluna de teor, e range de datas. |
| """ |
| from __future__ import annotations |
|
|
| import sys |
| from pathlib import Path |
|
|
| import duckdb |
|
|
| ROOT = Path(__file__).resolve().parent.parent |
| DB = sys.argv[1] if len(sys.argv) > 1 else str(ROOT / "f2m_local.duckdb") |
|
|
| |
| EXPECTED = { |
| "current_transporte": [ |
| "id", "data_inicio", "hora_inicio", "data_fim", "hora_fim", "production_date", |
| "hora", "turma", "turno", "caminhao", "frota_caminhao", "operador_caminhao", |
| "carga", "frota_carga", "origem", "origem_subarea", "destino", "destino_subarea", |
| "material", "grupo_material", "tipo_movimentacao", "massa_calculada", "tempo_ciclo", |
| "tempo_fila_carga", "tempo_carga", "tempo_fila_basc", "tempo_basc", "tempo_vazio", |
| "tempo_cheio", "tempo_manobra_vazio", "tempo_manobra_cheio", "dist_vazio", |
| "dist_cheio", "tipo_ciclo", "correct_destination", |
| ], |
| "current_registro_estados": [ |
| "data_inicio", "hora_inicio", "data_fim", "hora_fim", "hora", "production_date", |
| "tipo_equipamento", "turma", "turno", "equipamento", "frota", "duracao", |
| "codigo", "codigo_grupo", "codigo_status", "operador", |
| ], |
| "current_categorias_codigos": [ |
| "production_date", "turno", "turma", "tipo_equip", "equip", "h_calendario", |
| "h_disponivel", "st_operacao", "st_paradas_operacionais", "st_manutencao", |
| "st_ocioso", "st_inativo", "gp_corretiva", "gp_preventiva", |
| ], |
| "current_qualidade": [ |
| "production_date", "caminhao", "carga", "material", "origem_subarea", |
| "massa_calculada", |
| ], |
| } |
|
|
| CATEGORICALS = { |
| "current_transporte": ["grupo_material", "tipo_ciclo", "tipo_movimentacao"], |
| "current_registro_estados": ["codigo_status", "codigo_grupo", "tipo_equipamento"], |
| "current_categorias_codigos": ["tipo_equip"], |
| } |
|
|
|
|
| def main() -> int: |
| con = duckdb.connect(DB, read_only=True) |
|
|
| def cols(tbl: str) -> list[str]: |
| return [r[0] for r in con.execute(f'DESCRIBE "{tbl}"').fetchall()] |
|
|
| print(f"=== DB: {DB} ===\n") |
|
|
| print("### Colunas faltantes vs. esperado pelo cco ###") |
| for tbl, expected in EXPECTED.items(): |
| actual = set(cols(tbl)) |
| missing = [c for c in expected if c not in actual] |
| flag = "OK" if not missing else f"FALTAM: {missing}" |
| print(f" {tbl:32s} {flag}") |
|
|
| |
| qcols = cols("current_qualidade") |
| teor = [c for c in qcols if c.upper() in ("AU", "OURO")] |
| print(f"\n### Coluna de teor em current_qualidade: {teor or 'NENHUMA'} ###") |
| print(f" (todas as colunas qualidade: {qcols})") |
|
|
| print("\n### Valores categóricos chave ###") |
| for tbl, colnames in CATEGORICALS.items(): |
| for c in colnames: |
| try: |
| rows = con.execute( |
| f'SELECT TRIM("{c}") v, COUNT(*) n FROM "{tbl}" GROUP BY 1 ORDER BY 2 DESC LIMIT 15' |
| ).fetchall() |
| vals = ", ".join(f"{v!r}({n})" for v, n in rows) |
| print(f" {tbl}.{c}: {vals}") |
| except Exception as e: |
| print(f" {tbl}.{c}: ERRO {e}") |
|
|
| print("\n### material (top 20) ###") |
| rows = con.execute( |
| 'SELECT TRIM(material) v, COUNT(*) n FROM current_transporte GROUP BY 1 ORDER BY 2 DESC LIMIT 20' |
| ).fetchall() |
| for v, n in rows: |
| print(f" {v!r}: {n}") |
|
|
| print("\n### Amostras de origem_subarea (distintas, 25) ###") |
| rows = con.execute( |
| "SELECT DISTINCT TRIM(origem_subarea) FROM current_transporte WHERE origem_subarea IS NOT NULL AND TRIM(origem_subarea) <> '' LIMIT 25" |
| ).fetchall() |
| for (v,) in rows: |
| print(f" {v!r}") |
|
|
| print("\n### Amostras de origem / destino (distintas) ###") |
| for c in ("origem", "destino"): |
| rows = con.execute( |
| f"SELECT TRIM({c}) v, COUNT(*) n FROM current_transporte GROUP BY 1 ORDER BY 2 DESC LIMIT 15" |
| ).fetchall() |
| print(f" {c}: " + ", ".join(f"{v!r}({n})" for v, n in rows)) |
|
|
| print("\n### Formato de data/hora (current_transporte) ###") |
| rows = con.execute( |
| "SELECT data_inicio, hora_inicio, production_date FROM current_transporte LIMIT 3" |
| ).fetchall() |
| for r in rows: |
| print(f" data_inicio={r[0]!r} hora_inicio={r[1]!r} production_date={r[2]!r}") |
| rng = con.execute( |
| "SELECT MIN(production_date), MAX(production_date) FROM current_transporte" |
| ).fetchone() |
| print(f" production_date range (junho): {rng}") |
|
|
| |
| print("\n### Prefixos de equipamento ###") |
| for tbl, c in [("current_transporte", "caminhao"), ("current_transporte", "carga"), |
| ("current_registro_estados", "equipamento")]: |
| rows = con.execute( |
| f"SELECT DISTINCT TRIM({c}) v FROM \"{tbl}\" WHERE {c} IS NOT NULL AND TRIM({c})<>'' ORDER BY 1 LIMIT 20" |
| ).fetchall() |
| print(f" {tbl}.{c}: " + ", ".join(repr(v) for (v,) in rows)) |
|
|
| con.close() |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|