Danielfonseca1212 commited on
Commit
6864b79
·
verified ·
1 Parent(s): 0e321eb

Create routes .py

Browse files
Files changed (1) hide show
  1. routes .py +148 -0
routes .py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ data/routes.py
3
+ Descoberta automática de Rotas Atômicas a partir das chaves estrangeiras.
4
+
5
+ Uma "Rota Atômica" é um caminho entre tabelas conectadas via FKs.
6
+ NÃO convertemos para grafo — as rotas são inferidas do schema relacional.
7
+ """
8
+
9
+ from dataclasses import dataclass, field
10
+ from typing import List, Dict, Tuple, Optional
11
+ import numpy as np
12
+ from collections import deque
13
+
14
+
15
+ # ─── SCHEMA HARDCODED TPC-H ──────────────────────────────────────────────────
16
+
17
+ TPCH_FOREIGN_KEYS: List[Tuple[str, str, str, str]] = [
18
+ # (tabela_origem, fk_coluna, tabela_destino, pk_coluna)
19
+ ("orders", "o_custkey", "customers", "c_custkey"),
20
+ ("lineitem", "l_orderkey", "orders", "o_orderkey"),
21
+ ("lineitem", "l_suppkey", "supplier", "s_suppkey"),
22
+ ("lineitem", "l_partkey", "part", "p_partkey"),
23
+ ("customers", "c_nationkey", "nation", "n_nationkey"),
24
+ ("supplier", "s_nationkey", "nation", "n_nationkey"),
25
+ ]
26
+
27
+
28
+ @dataclass
29
+ class RouteConfig:
30
+ max_hops: int = 3
31
+ target_table: str = "customers" # entidade alvo da predição
32
+ min_attention_weight: float = 0.1
33
+
34
+
35
+ @dataclass
36
+ class AtomicRoute:
37
+ path: List[str] # ex: ["customers", "orders", "lineitem"]
38
+ fk_edges: List[Tuple] # [(from, fk_col, to, pk_col), ...]
39
+ n_hops: int = 0
40
+ attention_weight: float = 1.0
41
+ active: bool = True
42
+
43
+ def __post_init__(self):
44
+ self.n_hops = len(self.path) - 1
45
+
46
+ def __repr__(self):
47
+ return f"Route({' → '.join(self.path)}, α={self.attention_weight:.3f})"
48
+
49
+
50
+ def _build_adjacency(fk_list: List[Tuple]) -> Dict[str, List[Tuple]]:
51
+ """Constrói lista de adjacência bidirecional a partir das FKs."""
52
+ adj = {}
53
+ for (src, src_col, dst, dst_col) in fk_list:
54
+ if src not in adj:
55
+ adj[src] = []
56
+ if dst not in adj:
57
+ adj[dst] = []
58
+ adj[src].append((dst, src_col, dst_col, "forward"))
59
+ adj[dst].append((src, dst_col, src_col, "backward"))
60
+ return adj
61
+
62
+
63
+ def discover_atomic_routes(
64
+ tables: Dict,
65
+ config: RouteConfig,
66
+ fk_list: Optional[List[Tuple]] = None,
67
+ ) -> List[AtomicRoute]:
68
+ """
69
+ BFS a partir da tabela alvo para descobrir todas as rotas atômicas
70
+ até `config.max_hops` saltos.
71
+
72
+ NÃO requer conversão para grafo — opera diretamente no schema.
73
+ """
74
+ if fk_list is None:
75
+ fk_list = TPCH_FOREIGN_KEYS
76
+
77
+ adj = _build_adjacency(fk_list)
78
+ start = config.target_table
79
+ routes: List[AtomicRoute] = []
80
+
81
+ # BFS: (caminho_atual, arestas_percorridas, visitados)
82
+ queue = deque()
83
+ queue.append(([start], [], {start}))
84
+
85
+ while queue:
86
+ path, edges, visited = queue.popleft()
87
+ current = path[-1]
88
+ n_hops = len(path) - 1
89
+
90
+ # Só registra rotas com pelo menos 1 hop
91
+ if n_hops >= 1:
92
+ fk_edges = [
93
+ (fk_list[i] if i < len(fk_list) else edges[i])
94
+ for i in range(len(edges))
95
+ ]
96
+ route = AtomicRoute(
97
+ path=list(path),
98
+ fk_edges=list(edges),
99
+ attention_weight=_initial_attention(n_hops),
100
+ active=(n_hops <= 2),
101
+ )
102
+ routes.append(route)
103
+
104
+ if n_hops >= config.max_hops:
105
+ continue
106
+
107
+ # Expande vizinhos
108
+ for (neighbor, col_a, col_b, direction) in adj.get(current, []):
109
+ if neighbor not in visited and neighbor in tables:
110
+ new_path = path + [neighbor]
111
+ new_edges = edges + [(current, col_a, neighbor, col_b)]
112
+ new_visited = visited | {neighbor}
113
+ queue.append((new_path, new_edges, new_visited))
114
+
115
+ # Ordena por peso inicial (hops menores têm mais peso)
116
+ routes.sort(key=lambda r: -r.attention_weight)
117
+
118
+ # Normaliza pesos com softmax simulado
119
+ weights = np.array([r.attention_weight for r in routes])
120
+ weights = np.exp(weights) / np.exp(weights).sum()
121
+ for r, w in zip(routes, weights):
122
+ r.attention_weight = float(w)
123
+
124
+ return routes
125
+
126
+
127
+ def _initial_attention(n_hops: int) -> float:
128
+ """Peso inicial decrescente por número de hops (heurística)."""
129
+ return 1.0 / (n_hops ** 1.5)
130
+
131
+
132
+ def routes_to_dataframe(routes: List[AtomicRoute]):
133
+ """Converte lista de rotas para DataFrame para exibição."""
134
+ import pandas as pd
135
+ return pd.DataFrame([{
136
+ "Rota": " → ".join(r.path),
137
+ "Hops": r.n_hops,
138
+ "Peso α": round(r.attention_weight, 4),
139
+ "Ativa": r.active,
140
+ } for r in routes])
141
+
142
+
143
+ def get_feature_tables_for_route(
144
+ route: AtomicRoute,
145
+ tables: Dict,
146
+ ) -> List:
147
+ """Retorna as tabelas (DataFrames) na ordem da rota."""
148
+ return [tables[t] for t in route.path if t in tables]