Spaces:

aura-apn
/

Cco

Running

Cco / scripts /validate_schema.py

Gabriel Sapucaia

Deploy CCO Apoena (réplica do cco)

d88a9ad verified 21 days ago

5.35 kB

	#!/usr/bin/env python3
	"""Valida o schema real da Apoena (f2m_local.duckdb) vs. o que o cco espera.

	Reporta: colunas faltantes por tabela, valores categóricos chave, formato de
	origem_subarea (regex de cota/liberação), coluna de teor, e range de datas.
	"""
	from __future__ import annotations

	import sys
	from pathlib import Path

	import duckdb

	ROOT = Path(__file__).resolve().parent.parent
	DB = sys.argv[1] if len(sys.argv) > 1 else str(ROOT / "f2m_local.duckdb")

	# Colunas que o cco (queries.py/relatorios.py/recomendacoes) assume existir.
	EXPECTED = {
	"current_transporte": [
	"id", "data_inicio", "hora_inicio", "data_fim", "hora_fim", "production_date",
	"hora", "turma", "turno", "caminhao", "frota_caminhao", "operador_caminhao",
	"carga", "frota_carga", "origem", "origem_subarea", "destino", "destino_subarea",
	"material", "grupo_material", "tipo_movimentacao", "massa_calculada", "tempo_ciclo",
	"tempo_fila_carga", "tempo_carga", "tempo_fila_basc", "tempo_basc", "tempo_vazio",
	"tempo_cheio", "tempo_manobra_vazio", "tempo_manobra_cheio", "dist_vazio",
	"dist_cheio", "tipo_ciclo", "correct_destination",
	],
	"current_registro_estados": [
	"data_inicio", "hora_inicio", "data_fim", "hora_fim", "hora", "production_date",
	"tipo_equipamento", "turma", "turno", "equipamento", "frota", "duracao",
	"codigo", "codigo_grupo", "codigo_status", "operador",
	],
	"current_categorias_codigos": [
	"production_date", "turno", "turma", "tipo_equip", "equip", "h_calendario",
	"h_disponivel", "st_operacao", "st_paradas_operacionais", "st_manutencao",
	"st_ocioso", "st_inativo", "gp_corretiva", "gp_preventiva",
	],
	"current_qualidade": [
	"production_date", "caminhao", "carga", "material", "origem_subarea",
	"massa_calculada", # + coluna de teor: AU (Apoena) ou Ouro (Almas)
	],
	}

	CATEGORICALS = {
	"current_transporte": ["grupo_material", "tipo_ciclo", "tipo_movimentacao"],
	"current_registro_estados": ["codigo_status", "codigo_grupo", "tipo_equipamento"],
	"current_categorias_codigos": ["tipo_equip"],
	}


	def main() -> int:
	con = duckdb.connect(DB, read_only=True)

	def cols(tbl: str) -> list[str]:
	return [r[0] for r in con.execute(f'DESCRIBE "{tbl}"').fetchall()]

	print(f"=== DB: {DB} ===\n")

	print("### Colunas faltantes vs. esperado pelo cco ###")
	for tbl, expected in EXPECTED.items():
	actual = set(cols(tbl))
	missing = [c for c in expected if c not in actual]
	flag = "OK" if not missing else f"FALTAM: {missing}"
	print(f" {tbl:32s} {flag}")

	# teor em qualidade
	qcols = cols("current_qualidade")
	teor = [c for c in qcols if c.upper() in ("AU", "OURO")]
	print(f"\n### Coluna de teor em current_qualidade: {teor or 'NENHUMA'} ###")
	print(f" (todas as colunas qualidade: {qcols})")

	print("\n### Valores categóricos chave ###")
	for tbl, colnames in CATEGORICALS.items():
	for c in colnames:
	try:
	rows = con.execute(
	f'SELECT TRIM("{c}") v, COUNT(*) n FROM "{tbl}" GROUP BY 1 ORDER BY 2 DESC LIMIT 15'
	).fetchall()
	vals = ", ".join(f"{v!r}({n})" for v, n in rows)
	print(f" {tbl}.{c}: {vals}")
	except Exception as e:
	print(f" {tbl}.{c}: ERRO {e}")

	print("\n### material (top 20) ###")
	rows = con.execute(
	'SELECT TRIM(material) v, COUNT(*) n FROM current_transporte GROUP BY 1 ORDER BY 2 DESC LIMIT 20'
	).fetchall()
	for v, n in rows:
	print(f" {v!r}: {n}")

	print("\n### Amostras de origem_subarea (distintas, 25) ###")
	rows = con.execute(
	"SELECT DISTINCT TRIM(origem_subarea) FROM current_transporte WHERE origem_subarea IS NOT NULL AND TRIM(origem_subarea) <> '' LIMIT 25"
	).fetchall()
	for (v,) in rows:
	print(f" {v!r}")

	print("\n### Amostras de origem / destino (distintas) ###")
	for c in ("origem", "destino"):
	rows = con.execute(
	f"SELECT TRIM({c}) v, COUNT(*) n FROM current_transporte GROUP BY 1 ORDER BY 2 DESC LIMIT 15"
	).fetchall()
	print(f" {c}: " + ", ".join(f"{v!r}({n})" for v, n in rows))

	print("\n### Formato de data/hora (current_transporte) ###")
	rows = con.execute(
	"SELECT data_inicio, hora_inicio, production_date FROM current_transporte LIMIT 3"
	).fetchall()
	for r in rows:
	print(f" data_inicio={r[0]!r} hora_inicio={r[1]!r} production_date={r[2]!r}")
	rng = con.execute(
	"SELECT MIN(production_date), MAX(production_date) FROM current_transporte"
	).fetchone()
	print(f" production_date range (junho): {rng}")

	# nomes de equipamento (prefixos)
	print("\n### Prefixos de equipamento ###")
	for tbl, c in [("current_transporte", "caminhao"), ("current_transporte", "carga"),
	("current_registro_estados", "equipamento")]:
	rows = con.execute(
	f"SELECT DISTINCT TRIM({c}) v FROM \"{tbl}\" WHERE {c} IS NOT NULL AND TRIM({c})<>'' ORDER BY 1 LIMIT 20"
	).fetchall()
	print(f" {tbl}.{c}: " + ", ".join(repr(v) for (v,) in rows))

	con.close()
	return 0


	if __name__ == "__main__":
	raise SystemExit(main())