Cco / scripts /validate_schema.py
Gabriel Sapucaia
Deploy CCO Apoena (réplica do cco)
d88a9ad verified
Raw
History Blame Contribute Delete
5.35 kB
#!/usr/bin/env python3
"""Valida o schema real da Apoena (f2m_local.duckdb) vs. o que o cco espera.
Reporta: colunas faltantes por tabela, valores categóricos chave, formato de
origem_subarea (regex de cota/liberação), coluna de teor, e range de datas.
"""
from __future__ import annotations
import sys
from pathlib import Path
import duckdb
ROOT = Path(__file__).resolve().parent.parent
DB = sys.argv[1] if len(sys.argv) > 1 else str(ROOT / "f2m_local.duckdb")
# Colunas que o cco (queries.py/relatorios.py/recomendacoes) assume existir.
EXPECTED = {
"current_transporte": [
"id", "data_inicio", "hora_inicio", "data_fim", "hora_fim", "production_date",
"hora", "turma", "turno", "caminhao", "frota_caminhao", "operador_caminhao",
"carga", "frota_carga", "origem", "origem_subarea", "destino", "destino_subarea",
"material", "grupo_material", "tipo_movimentacao", "massa_calculada", "tempo_ciclo",
"tempo_fila_carga", "tempo_carga", "tempo_fila_basc", "tempo_basc", "tempo_vazio",
"tempo_cheio", "tempo_manobra_vazio", "tempo_manobra_cheio", "dist_vazio",
"dist_cheio", "tipo_ciclo", "correct_destination",
],
"current_registro_estados": [
"data_inicio", "hora_inicio", "data_fim", "hora_fim", "hora", "production_date",
"tipo_equipamento", "turma", "turno", "equipamento", "frota", "duracao",
"codigo", "codigo_grupo", "codigo_status", "operador",
],
"current_categorias_codigos": [
"production_date", "turno", "turma", "tipo_equip", "equip", "h_calendario",
"h_disponivel", "st_operacao", "st_paradas_operacionais", "st_manutencao",
"st_ocioso", "st_inativo", "gp_corretiva", "gp_preventiva",
],
"current_qualidade": [
"production_date", "caminhao", "carga", "material", "origem_subarea",
"massa_calculada", # + coluna de teor: AU (Apoena) ou Ouro (Almas)
],
}
CATEGORICALS = {
"current_transporte": ["grupo_material", "tipo_ciclo", "tipo_movimentacao"],
"current_registro_estados": ["codigo_status", "codigo_grupo", "tipo_equipamento"],
"current_categorias_codigos": ["tipo_equip"],
}
def main() -> int:
con = duckdb.connect(DB, read_only=True)
def cols(tbl: str) -> list[str]:
return [r[0] for r in con.execute(f'DESCRIBE "{tbl}"').fetchall()]
print(f"=== DB: {DB} ===\n")
print("### Colunas faltantes vs. esperado pelo cco ###")
for tbl, expected in EXPECTED.items():
actual = set(cols(tbl))
missing = [c for c in expected if c not in actual]
flag = "OK" if not missing else f"FALTAM: {missing}"
print(f" {tbl:32s} {flag}")
# teor em qualidade
qcols = cols("current_qualidade")
teor = [c for c in qcols if c.upper() in ("AU", "OURO")]
print(f"\n### Coluna de teor em current_qualidade: {teor or 'NENHUMA'} ###")
print(f" (todas as colunas qualidade: {qcols})")
print("\n### Valores categóricos chave ###")
for tbl, colnames in CATEGORICALS.items():
for c in colnames:
try:
rows = con.execute(
f'SELECT TRIM("{c}") v, COUNT(*) n FROM "{tbl}" GROUP BY 1 ORDER BY 2 DESC LIMIT 15'
).fetchall()
vals = ", ".join(f"{v!r}({n})" for v, n in rows)
print(f" {tbl}.{c}: {vals}")
except Exception as e:
print(f" {tbl}.{c}: ERRO {e}")
print("\n### material (top 20) ###")
rows = con.execute(
'SELECT TRIM(material) v, COUNT(*) n FROM current_transporte GROUP BY 1 ORDER BY 2 DESC LIMIT 20'
).fetchall()
for v, n in rows:
print(f" {v!r}: {n}")
print("\n### Amostras de origem_subarea (distintas, 25) ###")
rows = con.execute(
"SELECT DISTINCT TRIM(origem_subarea) FROM current_transporte WHERE origem_subarea IS NOT NULL AND TRIM(origem_subarea) <> '' LIMIT 25"
).fetchall()
for (v,) in rows:
print(f" {v!r}")
print("\n### Amostras de origem / destino (distintas) ###")
for c in ("origem", "destino"):
rows = con.execute(
f"SELECT TRIM({c}) v, COUNT(*) n FROM current_transporte GROUP BY 1 ORDER BY 2 DESC LIMIT 15"
).fetchall()
print(f" {c}: " + ", ".join(f"{v!r}({n})" for v, n in rows))
print("\n### Formato de data/hora (current_transporte) ###")
rows = con.execute(
"SELECT data_inicio, hora_inicio, production_date FROM current_transporte LIMIT 3"
).fetchall()
for r in rows:
print(f" data_inicio={r[0]!r} hora_inicio={r[1]!r} production_date={r[2]!r}")
rng = con.execute(
"SELECT MIN(production_date), MAX(production_date) FROM current_transporte"
).fetchone()
print(f" production_date range (junho): {rng}")
# nomes de equipamento (prefixos)
print("\n### Prefixos de equipamento ###")
for tbl, c in [("current_transporte", "caminhao"), ("current_transporte", "carga"),
("current_registro_estados", "equipamento")]:
rows = con.execute(
f"SELECT DISTINCT TRIM({c}) v FROM \"{tbl}\" WHERE {c} IS NOT NULL AND TRIM({c})<>'' ORDER BY 1 LIMIT 20"
).fetchall()
print(f" {tbl}.{c}: " + ", ".join(repr(v) for (v,) in rows))
con.close()
return 0
if __name__ == "__main__":
raise SystemExit(main())