File size: 3,030 Bytes
db886e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from typing import Dict, Any, List, Tuple
import numpy as np
from collections import deque, defaultdict
from sklearn.preprocessing import StandardScaler

def _sum_inputs_btc(tx: Dict[str, Any]) -> float:
    s = 0.0
    for v in tx.get("vin", []):
        s += float(v.get("prevout_value") or 0.0) / 1e8
    return s

def _sum_outputs_btc(tx: Dict[str, Any]) -> float:
    s = 0.0
    for o in tx.get("vout", []):
        s += float(o.get("value") or 0.0) / 1e8
    return s

def _compute_distances(n: int, edges: List[Tuple[int,int]], center: int) -> np.ndarray:
    # undirected BFS distance
    adj = [[] for _ in range(n)]
    for u,v in edges:
        adj[u].append(v); adj[v].append(u)
    dist = np.full(n, fill_value=-1, dtype=np.int32)
    q = deque([center]); dist[center] = 0
    while q:
        u = q.popleft()
        for nb in adj[u]:
            if dist[nb] == -1:
                dist[nb] = dist[u] + 1
                q.append(nb)
    return dist

def build_features(nodes: List[str], edges: List[Tuple[int,int]], center_idx: int, node_meta: Dict[str, Dict[str, Any]]):
    n = len(nodes)
    # degrees
    out_deg = np.zeros(n, dtype=np.float32)
    in_deg = np.zeros(n, dtype=np.float32)
    for u,v in edges:
        out_deg[u] += 1
        in_deg[v] += 1
    deg = in_deg + out_deg
    ratio_in_out = in_deg / (out_deg + 1e-6)

    # sums & counts from metadata
    sum_in_btc = np.zeros(n, dtype=np.float32)
    sum_out_btc = np.zeros(n, dtype=np.float32)
    n_inputs = np.zeros(n, dtype=np.float32)
    n_outputs = np.zeros(n, dtype=np.float32)
    block_height = np.zeros(n, dtype=np.float32)

    for idx, txid in enumerate(nodes):
        meta = node_meta.get(txid) or {}
        n_inputs[idx] = float(len(meta.get("vin", []) or []))
        n_outputs[idx] = float(len(meta.get("vout", []) or []))
        sum_in_btc[idx] = _sum_inputs_btc(meta)
        sum_out_btc[idx] = _sum_outputs_btc(meta)
        bh = meta.get("block_height")
        block_height[idx] = float(bh) if bh is not None else 0.0

    log_sum_in = np.log1p(sum_in_btc)
    log_sum_out = np.log1p(sum_out_btc)
    distance = _compute_distances(n, edges, center_idx)

    feats = np.stack([
        in_deg, out_deg, deg, ratio_in_out,
        n_inputs, n_outputs,
        sum_in_btc, sum_out_btc,
        log_sum_in, log_sum_out,
        distance.astype(np.float32),
        block_height
    ], axis=1)

    feature_names = [
        "in_degree","out_degree","degree","ratio_in_out",
        "n_inputs","n_outputs",
        "sum_in_btc","sum_out_btc",
        "log_sum_in","log_sum_out",
        "distance","block_height",
    ]
    return feats, feature_names

def scale_features(X: np.ndarray, scaler=None):
    if scaler is None:
        scaler = StandardScaler()
        Xs = scaler.fit_transform(X)
        note = "Fitted new StandardScaler on ego-subgraph (domain shift vs Elliptic)."
    else:
        Xs = scaler.transform(X)
        note = "Used provided scaler from model repo."
    return Xs.astype("float32"), scaler, note