Omarrran commited on
Commit
babb07c
Β·
verified Β·
1 Parent(s): c873006

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +606 -0
app.py ADDED
@@ -0,0 +1,606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Blockchain Intelligence Dashboard β€” 8-Dimension Cross-Chain Analyzer
3
+ HuggingFace Space: Interactive exploration of 50K real cryptocurrency transactions
4
+ across ETC, BTC, DOGE, BCH, DASH.
5
+
6
+ Upload your own CSVs or explore the pre-loaded dataset.
7
+ """
8
+
9
+ import gradio as gr
10
+ import pandas as pd
11
+ import numpy as np
12
+ import plotly.graph_objects as go
13
+ import plotly.express as px
14
+ from plotly.subplots import make_subplots
15
+ from scipy import stats
16
+ from sklearn.ensemble import IsolationForest, RandomForestClassifier, GradientBoostingRegressor
17
+ from sklearn.cluster import DBSCAN
18
+ from sklearn.preprocessing import StandardScaler
19
+ from sklearn.model_selection import train_test_split
20
+ from sklearn.metrics import roc_auc_score, mean_absolute_error, mean_squared_error, r2_score
21
+ import json
22
+ import warnings
23
+ import io
24
+
25
+ warnings.filterwarnings("ignore")
26
+ np.random.seed(42)
27
+
28
+ # ─── Color palette ───
29
+ COLORS = {
30
+ "ETC": "#627EEA", "BTC": "#F7931A", "DOGE": "#C2A633",
31
+ "BCH": "#8DC351", "DASH": "#008DE4",
32
+ }
33
+ CHAIN_ORDER = ["ETC", "BTC", "DOGE", "BCH", "DASH"]
34
+ UTXO_CHAINS = ["BTC", "DOGE", "BCH", "DASH"]
35
+
36
+ # ─── Pre-loaded results from real analysis ───
37
+ PRELOADED = json.loads(r'''
38
+ {
39
+ "meta": {"dataset": "Omarrran/50k_Cryptocurrency_Transaction_Dataset_by_HNM", "chains": ["BCH","BTC","DASH","DOGE","ETC"], "total_tx": 50000},
40
+ "rd1_fee": {"ETC": {"mean": 6.200637, "median": 1.0, "std": 41.592424, "cv": 6.7078, "skewness": 89.1592, "kurtosis": 8512.9975, "n": 10000}, "BTC": {"mean": 9.4692e-06, "median": 4.4e-06, "cv": 3.1239, "skewness": 22.0863, "kurtosis": 780.3443, "n": 9997}, "DOGE": {"mean": 0.0716, "median": 0.0104, "cv": 7.2477, "skewness": 17.3935, "kurtosis": 389.2284, "n": 9986}, "BCH": {"mean": 0.000307, "median": 3.74e-06, "cv": 15.8896, "skewness": 17.8966, "kurtosis": 320.7481, "n": 9879}, "DASH": {"mean": 6.19e-05, "median": 6e-06, "cv": 13.9415, "skewness": 71.7787, "kurtosis": 5477.6621, "n": 6422}, "levene_etc_btc": {"stat": 51.4278, "p": 0.0, "sig": true}},
41
+ "rd2_whale": {"etc": {"threshold_99": 1075.7688, "whale_count": 100, "whale_vol_pct": 88.43, "gini": 0.9871, "freq_whale_addrs": 11, "ks_stat": 0.0733, "ks_p": 0.635, "mean": 75.7417, "median": 0.3645, "max": 91499.99}, "utxo": {"BTC": {"anomalies": 100, "whale_vol_pct": 58.53, "gini": 0.9656}, "DOGE": {"anomalies": 100, "whale_vol_pct": 98.96, "gini": 0.9984}, "BCH": {"anomalies": 100, "whale_vol_pct": 72.56, "gini": 0.9646}, "DASH": {"anomalies": 100, "whale_vol_pct": 53.37, "gini": 0.9019}}},
42
+ "rd3_reliability": {"failure_rate": 0.0007, "failed": 7, "total": 10000, "auc": 0.4985, "features": {"gas": 0.2903, "zero_val": 0.2382, "value_etc": 0.1679, "log_val": 0.1181, "gas_price_gwei": 0.0775, "log_gp": 0.074, "high_gas": 0.0215, "hour": 0.0126}},
43
+ "rd4_aml": {"etc": {"round_tx": 488, "round_pct": 4.88, "rapid_tx": 9060, "rapid_pct": 90.6, "equal_val_patterns": 5251, "freq_senders": 35}, "utxo": {"BTC": {"peeling": 5018, "round_outputs": 5466, "high_risk_rate": 0.4013}, "DOGE": {"peeling": 383, "round_outputs": 9334, "high_risk_rate": 0.0141}, "BCH": {"peeling": 5022, "round_outputs": 1903, "high_risk_rate": 0.1733}, "DASH": {"peeling": 4907, "round_outputs": 2796, "high_risk_rate": 0.3532}}, "total_peeling": 15330},
44
+ "rd5_velocity": {"BCH": {"velocity": 53.27, "bh_ratio": 0.4695, "health": 44.15}, "BTC": {"velocity": 2.49, "bh_ratio": 0.0, "health": 30.0}, "DASH": {"velocity": 23.40, "bh_ratio": 0.5836, "health": 47.54}, "DOGE": {"velocity": 30977.50, "bh_ratio": 0.0, "health": 70.0}, "ETC": {"velocity": 378.52, "bh_ratio": 0.4746, "health": 44.73}},
45
+ "rd6_mev": {"pred": {"mae": 3.5607, "rmse": 6.7611, "r2": 0.2686, "features": {"ma10": 0.493, "ma30": 0.4696, "std10": 0.0189, "l1": 0.0085, "vr": 0.0055, "min": 0.0022, "l3": 0.0014, "l5": 0.0007, "hour": 0.0003}}, "mev": {"candidates_z3": 4, "front_run": 3, "mev_rate_pct": 0.0401}},
46
+ "rd7_arbitrage": {"coint": {"BTC-BCH": {"adf": -9.3522, "coint": true}, "BTC-DOGE": {"adf": -9.3398, "coint": true}, "ETC-DASH": {"adf": -9.5276, "coint": true}, "BTC-ETC": {"adf": -9.3351, "coint": true}, "DOGE-DASH": {"adf": -8.4527, "coint": true}}, "signals": [{"pair": "BTC-BCH", "count": 518, "avg_div": 4.3728, "max_div": 12.1035}, {"pair": "BTC-DOGE", "count": 339, "avg_div": 5.4999, "max_div": 15.0246}, {"pair": "ETC-DASH", "count": 758, "avg_div": 4.3304, "max_div": 10.2677}], "total_signals": 1615, "coint_pairs": 5},
47
+ "rd8_privacy": {"etc": {"unique": 4232, "reused": 2351, "reuse_rate": 0.5555, "max_reuse": 2487, "entropy": 8.6111, "max_entropy": 12.0471, "norm_entropy": 0.7148}, "utxo": {"BTC": {"risk_score": 0.6272}, "DOGE": {"risk_score": 0.6356}, "BCH": {"risk_score": 0.4591}, "DASH": {"risk_score": 0.4417}}}
48
+ }
49
+ ''')
50
+
51
+
52
+ # ═══════════════════════════════════════════════════════════
53
+ # VISUALIZATION BUILDERS
54
+ # ═══════════════════════════════════════════════════════════
55
+
56
+ def build_overview():
57
+ """Build the overview summary dashboard."""
58
+ fig = make_subplots(
59
+ rows=2, cols=3,
60
+ subplot_titles=("Fee CV", "Whale Vol %", "Gini Coefficient",
61
+ "Velocity (log)", "AML Risk Rate %", "Privacy Risk"),
62
+ specs=[[{"type": "bar"}, {"type": "bar"}, {"type": "bar"}],
63
+ [{"type": "bar"}, {"type": "bar"}, {"type": "bar"}]],
64
+ horizontal_spacing=0.08, vertical_spacing=0.15,
65
+ )
66
+
67
+ rd1 = PRELOADED["rd1_fee"]
68
+ chains = ["ETC", "BTC", "DOGE", "BCH", "DASH"]
69
+ cvs = [rd1.get(c, {}).get("cv", 0) for c in chains]
70
+ colors = [COLORS[c] for c in chains]
71
+
72
+ fig.add_trace(go.Bar(x=chains, y=cvs, marker_color=colors, text=[f"{v:.1f}" for v in cvs],
73
+ textposition="outside", showlegend=False), row=1, col=1)
74
+
75
+ rd2 = PRELOADED["rd2_whale"]
76
+ wvols = [rd2["etc"]["whale_vol_pct"]] + [rd2["utxo"][c]["whale_vol_pct"] for c in UTXO_CHAINS]
77
+ fig.add_trace(go.Bar(x=chains, y=wvols, marker_color=colors, text=[f"{v:.0f}%" for v in wvols],
78
+ textposition="outside", showlegend=False), row=1, col=2)
79
+
80
+ ginis = [rd2["etc"]["gini"]] + [rd2["utxo"][c]["gini"] for c in UTXO_CHAINS]
81
+ fig.add_trace(go.Bar(x=chains, y=ginis, marker_color=colors, text=[f"{v:.3f}" for v in ginis],
82
+ textposition="outside", showlegend=False), row=1, col=3)
83
+
84
+ rd5 = PRELOADED["rd5_velocity"]
85
+ vels = [rd5[c]["velocity"] for c in chains]
86
+ fig.add_trace(go.Bar(x=chains, y=vels, marker_color=colors, text=[f"{v:.1f}" for v in vels],
87
+ textposition="outside", showlegend=False), row=2, col=1)
88
+ fig.update_yaxes(type="log", row=2, col=1)
89
+
90
+ rd4 = PRELOADED["rd4_aml"]
91
+ risks = [0] + [rd4["utxo"][c]["high_risk_rate"] * 100 for c in UTXO_CHAINS]
92
+ fig.add_trace(go.Bar(x=chains, y=risks, marker_color=colors, text=[f"{v:.1f}" for v in risks],
93
+ textposition="outside", showlegend=False), row=2, col=2)
94
+
95
+ rd8 = PRELOADED["rd8_privacy"]
96
+ priv = [1 - rd8["etc"]["norm_entropy"]] + [rd8["utxo"][c]["risk_score"] for c in UTXO_CHAINS]
97
+ fig.add_trace(go.Bar(x=chains, y=priv, marker_color=colors, text=[f"{v:.3f}" for v in priv],
98
+ textposition="outside", showlegend=False), row=2, col=3)
99
+
100
+ fig.update_layout(height=600, title_text="Cross-Chain Intelligence Overview β€” 50K Real Transactions",
101
+ template="plotly_white", margin=dict(t=80))
102
+ return fig
103
+
104
+
105
+ def build_rd1_fee():
106
+ rd1 = PRELOADED["rd1_fee"]
107
+ chains = ["ETC", "BTC", "DOGE", "BCH", "DASH"]
108
+
109
+ fig = make_subplots(rows=1, cols=3, subplot_titles=("CV (Οƒ/ΞΌ)", "Skewness", "Kurtosis"),
110
+ horizontal_spacing=0.08)
111
+ colors = [COLORS[c] for c in chains]
112
+
113
+ for i, (metric, fmt) in enumerate([(lambda c: rd1[c]["cv"], ".1f"),
114
+ (lambda c: rd1[c]["skewness"], ".1f"),
115
+ (lambda c: rd1[c]["kurtosis"], ",.0f")], 1):
116
+ vals = [metric(c) for c in chains]
117
+ fig.add_trace(go.Bar(x=chains, y=vals, marker_color=colors,
118
+ text=[f"{v:{fmt}}" for v in vals], textposition="outside",
119
+ showlegend=False), row=1, col=i)
120
+
121
+ lev = rd1["levene_etc_btc"]
122
+ fig.update_layout(height=400, template="plotly_white",
123
+ title_text=f"RD1: Fee Market Efficiency β€” Levene W={lev['stat']:.1f}, p<0.001")
124
+ return fig
125
+
126
+
127
+ def build_rd2_whale():
128
+ rd2 = PRELOADED["rd2_whale"]
129
+ chains = ["ETC", "BTC", "DOGE", "BCH", "DASH"]
130
+ colors = [COLORS[c] for c in chains]
131
+
132
+ fig = make_subplots(rows=1, cols=2, subplot_titles=("Whale Volume %", "Gini Coefficient"),
133
+ horizontal_spacing=0.1)
134
+
135
+ wvols = [rd2["etc"]["whale_vol_pct"]] + [rd2["utxo"][c]["whale_vol_pct"] for c in UTXO_CHAINS]
136
+ fig.add_trace(go.Bar(x=chains, y=wvols, marker_color=colors,
137
+ text=[f"{v:.1f}%" for v in wvols], textposition="outside",
138
+ showlegend=False), row=1, col=1)
139
+
140
+ ginis = [rd2["etc"]["gini"]] + [rd2["utxo"][c]["gini"] for c in UTXO_CHAINS]
141
+ fig.add_trace(go.Bar(x=chains, y=ginis, marker_color=colors,
142
+ text=[f"{v:.4f}" for v in ginis], textposition="outside",
143
+ showlegend=False), row=1, col=2)
144
+
145
+ fig.update_layout(height=400, template="plotly_white",
146
+ title_text="RD2: Whale Concentration β€” Top 1% controls 53-99% of volume")
147
+ return fig
148
+
149
+
150
+ def build_rd3_reliability():
151
+ rd3 = PRELOADED["rd3_reliability"]
152
+ feats = rd3["features"]
153
+ names = list(feats.keys())
154
+ vals = list(feats.values())
155
+
156
+ fig = go.Figure(go.Bar(y=names, x=vals, orientation="h",
157
+ marker_color="#627EEA",
158
+ text=[f"{v:.3f}" for v in vals], textposition="outside"))
159
+ fig.update_layout(height=400, template="plotly_white",
160
+ title_text=f"RD3: Reliability β€” {rd3['failed']}/{rd3['total']} failures (AUC={rd3['auc']:.3f})",
161
+ xaxis_title="Feature Importance")
162
+ return fig
163
+
164
+
165
+ def build_rd4_aml():
166
+ rd4 = PRELOADED["rd4_aml"]
167
+
168
+ fig = make_subplots(rows=1, cols=2, subplot_titles=("Peeling Chains", "High-Risk Rate %"),
169
+ horizontal_spacing=0.12)
170
+
171
+ utxo = UTXO_CHAINS
172
+ peeling = [rd4["utxo"][c]["peeling"] for c in utxo]
173
+ risk = [rd4["utxo"][c]["high_risk_rate"] * 100 for c in utxo]
174
+ colors = [COLORS[c] for c in utxo]
175
+
176
+ fig.add_trace(go.Bar(x=utxo, y=peeling, marker_color=colors,
177
+ text=peeling, textposition="outside", showlegend=False), row=1, col=1)
178
+ fig.add_trace(go.Bar(x=utxo, y=risk, marker_color=colors,
179
+ text=[f"{v:.1f}%" for v in risk], textposition="outside",
180
+ showlegend=False), row=1, col=2)
181
+
182
+ etc = rd4["etc"]
183
+ fig.update_layout(height=400, template="plotly_white",
184
+ title_text=f"RD4: AML Detection β€” {rd4['total_peeling']:,} peeling chains | ETC: {etc['round_pct']}% round, {etc['freq_senders']} freq senders")
185
+ return fig
186
+
187
+
188
+ def build_rd5_velocity():
189
+ rd5 = PRELOADED["rd5_velocity"]
190
+ chains = CHAIN_ORDER
191
+
192
+ fig = make_subplots(rows=1, cols=2, subplot_titles=("Velocity (log scale)", "Health Index"),
193
+ horizontal_spacing=0.1)
194
+ colors = [COLORS[c] for c in chains]
195
+
196
+ vels = [rd5[c]["velocity"] for c in chains]
197
+ health = [rd5[c]["health"] for c in chains]
198
+
199
+ fig.add_trace(go.Bar(x=chains, y=vels, marker_color=colors,
200
+ text=[f"{v:,.1f}" for v in vels], textposition="outside",
201
+ showlegend=False), row=1, col=1)
202
+ fig.update_yaxes(type="log", row=1, col=1)
203
+
204
+ fig.add_trace(go.Bar(x=chains, y=health, marker_color=colors,
205
+ text=[f"{v:.1f}" for v in health], textposition="outside",
206
+ showlegend=False), row=1, col=2)
207
+
208
+ fig.update_layout(height=400, template="plotly_white",
209
+ title_text="RD5: Payment Velocity β€” 12,400Γ— gap between DOGE and BTC")
210
+ return fig
211
+
212
+
213
+ def build_rd6_mev():
214
+ rd6 = PRELOADED["rd6_mev"]
215
+ pred = rd6["pred"]
216
+
217
+ feats = pred["features"]
218
+ names = list(feats.keys())
219
+ vals = list(feats.values())
220
+
221
+ fig = go.Figure(go.Bar(y=names, x=vals, orientation="h",
222
+ marker_color="#627EEA",
223
+ text=[f"{v:.4f}" for v in vals], textposition="outside"))
224
+
225
+ mev = rd6["mev"]
226
+ fig.update_layout(height=400, template="plotly_white",
227
+ title_text=f"RD6: Gas Prediction β€” RΒ²={pred['r2']:.3f}, MAE={pred['mae']:.2f} Gwei | MEV candidates: {mev['candidates_z3']}",
228
+ xaxis_title="Feature Importance")
229
+ return fig
230
+
231
+
232
+ def build_rd7_arbitrage():
233
+ rd7 = PRELOADED["rd7_arbitrage"]
234
+
235
+ fig = make_subplots(rows=1, cols=2, subplot_titles=("ADF Statistics (all < -2.86)", "Divergence Signals"),
236
+ horizontal_spacing=0.12)
237
+
238
+ pairs = list(rd7["coint"].keys())
239
+ adfs = [rd7["coint"][p]["adf"] for p in pairs]
240
+ fig.add_trace(go.Bar(x=pairs, y=adfs, marker_color="#2ECC71",
241
+ text=[f"{v:.2f}" for v in adfs], textposition="outside",
242
+ showlegend=False), row=1, col=1)
243
+ fig.add_hline(y=-2.86, line_dash="dash", line_color="red",
244
+ annotation_text="5% critical", row=1, col=1)
245
+
246
+ sigs = rd7["signals"]
247
+ fig.add_trace(go.Bar(x=[s["pair"] for s in sigs], y=[s["count"] for s in sigs],
248
+ marker_color="#3498DB",
249
+ text=[s["count"] for s in sigs], textposition="outside",
250
+ showlegend=False), row=1, col=2)
251
+
252
+ fig.update_layout(height=400, template="plotly_white",
253
+ title_text=f"RD7: Cross-Chain Arbitrage β€” {rd7['coint_pairs']}/5 cointegrated, {rd7['total_signals']:,} signals")
254
+ return fig
255
+
256
+
257
+ def build_rd8_privacy():
258
+ rd8 = PRELOADED["rd8_privacy"]
259
+
260
+ fig = make_subplots(rows=1, cols=2, subplot_titles=("ETC Address Entropy", "UTXO Privacy Risk"),
261
+ horizontal_spacing=0.12)
262
+
263
+ etc = rd8["etc"]
264
+ fig.add_trace(go.Bar(x=["Shannon H", "Max H", "Norm H"],
265
+ y=[etc["entropy"], etc["max_entropy"], etc["norm_entropy"]],
266
+ marker_color=["#627EEA", "#95A5A6", "#E74C3C"],
267
+ text=[f"{etc['entropy']:.2f}", f"{etc['max_entropy']:.2f}", f"{etc['norm_entropy']:.3f}"],
268
+ textposition="outside", showlegend=False), row=1, col=1)
269
+
270
+ utxo = UTXO_CHAINS
271
+ risks = [rd8["utxo"][c]["risk_score"] for c in utxo]
272
+ fig.add_trace(go.Bar(x=utxo, y=risks, marker_color=[COLORS[c] for c in utxo],
273
+ text=[f"{v:.3f}" for v in risks], textposition="outside",
274
+ showlegend=False), row=1, col=2)
275
+
276
+ fig.update_layout(height=400, template="plotly_white",
277
+ title_text=f"RD8: Privacy β€” ETC {etc['reuse_rate']:.1%} address reuse, max reuse {etc['max_reuse']:,}Γ—")
278
+ return fig
279
+
280
+
281
+ def build_radar():
282
+ """Radar chart comparing all chains across normalized dimensions."""
283
+ categories = ["Fee Stability", "Whale Equality", "Reliability",
284
+ "AML Safety", "Velocity", "Privacy"]
285
+
286
+ rd1 = PRELOADED["rd1_fee"]
287
+ rd2 = PRELOADED["rd2_whale"]
288
+ rd5 = PRELOADED["rd5_velocity"]
289
+ rd4 = PRELOADED["rd4_aml"]
290
+ rd8 = PRELOADED["rd8_privacy"]
291
+
292
+ fig = go.Figure()
293
+ for chain in CHAIN_ORDER:
294
+ # Normalize each metric to 0-1 (higher = better)
295
+ max_cv = max(rd1[c]["cv"] for c in CHAIN_ORDER)
296
+ fee_stab = 1 - rd1[chain]["cv"] / max_cv
297
+
298
+ if chain == "ETC":
299
+ whale_eq = 1 - PRELOADED["rd2_whale"]["etc"]["gini"]
300
+ aml_safe = 1.0 # No peeling chain metric for ETC
301
+ privacy = PRELOADED["rd8_privacy"]["etc"]["norm_entropy"]
302
+ else:
303
+ whale_eq = 1 - rd2["utxo"][chain]["gini"]
304
+ aml_safe = 1 - rd4["utxo"][chain]["high_risk_rate"]
305
+ privacy = 1 - rd8["utxo"][chain]["risk_score"]
306
+
307
+ reliability = 1.0 if chain == "ETC" else 0.9 # ETC has receipt_status
308
+
309
+ max_vel = max(rd5[c]["velocity"] for c in CHAIN_ORDER)
310
+ velocity = np.log1p(rd5[chain]["velocity"]) / np.log1p(max_vel)
311
+
312
+ vals = [fee_stab, whale_eq, reliability, aml_safe, velocity, privacy]
313
+ vals.append(vals[0]) # Close the radar
314
+
315
+ fig.add_trace(go.Scatterpolar(
316
+ r=vals, theta=categories + [categories[0]],
317
+ fill="toself", name=chain,
318
+ line_color=COLORS[chain], opacity=0.6,
319
+ ))
320
+
321
+ fig.update_layout(
322
+ polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
323
+ height=500, template="plotly_white",
324
+ title_text="Cross-Chain Radar β€” Normalized Scores (higher = better)",
325
+ )
326
+ return fig
327
+
328
+
329
+ # ═══════════════════════════════════════════════════════════
330
+ # CUSTOM ANALYSIS ENGINE
331
+ # ═══════════════════════════════════════════════════════════
332
+
333
+ def analyze_custom_csv(file):
334
+ """Analyze an uploaded CSV file and return results + visualization."""
335
+ if file is None:
336
+ return "Please upload a CSV file.", None
337
+
338
+ try:
339
+ df = pd.read_csv(file.name)
340
+ except Exception as e:
341
+ return f"Error reading CSV: {e}", None
342
+
343
+ cols = [c.lower() for c in df.columns]
344
+ n = len(df)
345
+ report = []
346
+ report.append(f"## Dataset: {n:,} rows Γ— {len(df.columns)} columns")
347
+ report.append(f"**Columns:** {', '.join(df.columns)}")
348
+
349
+ # Auto-detect chain type
350
+ is_etc = any("gas" in c for c in cols) or any("from" in c for c in cols)
351
+ report.append(f"**Detected type:** {'Account-based (ETC-like)' if is_etc else 'UTXO-based'}")
352
+
353
+ fig = make_subplots(rows=2, cols=2,
354
+ subplot_titles=("Value Distribution", "Fee Distribution",
355
+ "Temporal Activity", "Concentration"),
356
+ horizontal_spacing=0.1, vertical_spacing=0.15)
357
+
358
+ # Find value column
359
+ val_col = None
360
+ for c in df.columns:
361
+ cl = c.lower()
362
+ if "value" in cl or "input_btc" in cl or "input_doge" in cl or "input_bch" in cl or "input_dash" in cl:
363
+ val_col = c
364
+ break
365
+ if val_col is None:
366
+ for c in df.columns:
367
+ if df[c].dtype in [np.float64, np.int64] and c.lower() not in ["block_number"]:
368
+ val_col = c
369
+ break
370
+
371
+ if val_col:
372
+ vals = df[val_col].dropna()
373
+ vals_pos = vals[vals > 0]
374
+ report.append(f"\n### Value Analysis (`{val_col}`)")
375
+ report.append(f"- Mean: {vals.mean():.6f}")
376
+ report.append(f"- Median: {vals.median():.6f}")
377
+ report.append(f"- Std: {vals.std():.6f}")
378
+ report.append(f"- CV: {vals.std()/vals.mean():.4f}" if vals.mean() != 0 else "- CV: N/A")
379
+ report.append(f"- Skewness: {vals.skew():.4f}")
380
+ report.append(f"- Kurtosis: {vals.kurtosis():.4f}")
381
+
382
+ if len(vals_pos) > 10:
383
+ sorted_v = np.sort(vals_pos.values)
384
+ nn = len(sorted_v)
385
+ idx = np.arange(1, nn + 1)
386
+ gini = float((2 * np.sum(idx * sorted_v)) / (nn * np.sum(sorted_v)) - (nn + 1) / nn)
387
+ t99 = vals_pos.quantile(0.99)
388
+ whale_vol = vals_pos[vals_pos >= t99].sum() / vals_pos.sum() * 100
389
+ report.append(f"- **Gini coefficient: {gini:.4f}**")
390
+ report.append(f"- **Top 1% volume share: {whale_vol:.1f}%**")
391
+
392
+ fig.add_trace(go.Histogram(x=np.log1p(vals_pos), nbinsx=50,
393
+ marker_color="#627EEA", name="log(1+value)"), row=1, col=1)
394
+
395
+ # Find fee column
396
+ fee_col = None
397
+ for c in df.columns:
398
+ cl = c.lower()
399
+ if "fee" in cl or "gas_price" in cl:
400
+ fee_col = c
401
+ break
402
+
403
+ if fee_col:
404
+ fees = df[fee_col].dropna()
405
+ fees_pos = fees[fees > 0]
406
+ report.append(f"\n### Fee Analysis (`{fee_col}`)")
407
+ report.append(f"- Mean: {fees.mean():.8f}")
408
+ report.append(f"- Median: {fees.median():.8f}")
409
+ report.append(f"- CV: {fees.std()/fees.mean():.4f}" if fees.mean() != 0 else "- CV: N/A")
410
+
411
+ if len(fees_pos) > 10:
412
+ fig.add_trace(go.Histogram(x=np.log1p(fees_pos), nbinsx=50,
413
+ marker_color="#F7931A", name="log(1+fee)"), row=1, col=2)
414
+
415
+ # Temporal analysis
416
+ ts_col = None
417
+ for c in df.columns:
418
+ if "timestamp" in c.lower():
419
+ ts_col = c
420
+ break
421
+
422
+ if ts_col:
423
+ try:
424
+ ts = pd.to_datetime(df[ts_col], format="mixed", utc=True)
425
+ hours = ts.dt.hour
426
+ bh_ratio = ((hours >= 9) & (hours <= 17)).mean()
427
+ report.append(f"\n### Temporal Analysis")
428
+ report.append(f"- Business hours (9-17 UTC): {bh_ratio:.1%}")
429
+ report.append(f"- Time span: {ts.min()} to {ts.max()}")
430
+
431
+ hour_counts = hours.value_counts().sort_index()
432
+ fig.add_trace(go.Bar(x=hour_counts.index, y=hour_counts.values,
433
+ marker_color="#C2A633", name="Hourly activity"), row=2, col=1)
434
+ except Exception:
435
+ pass
436
+
437
+ # Address analysis (if ETC-like)
438
+ addr_col = None
439
+ for c in df.columns:
440
+ if "from" in c.lower() and "addr" in c.lower():
441
+ addr_col = c
442
+ break
443
+ if addr_col is None:
444
+ for c in df.columns:
445
+ if c.lower().startswith("from"):
446
+ addr_col = c
447
+ break
448
+
449
+ if addr_col:
450
+ addr_counts = df[addr_col].value_counts()
451
+ unique = len(addr_counts)
452
+ reused = (addr_counts > 1).sum()
453
+ report.append(f"\n### Address Analysis (`{addr_col}`)")
454
+ report.append(f"- Unique addresses: {unique:,}")
455
+ report.append(f"- Reuse rate: {reused/unique:.1%}")
456
+
457
+ probs = addr_counts.values / addr_counts.values.sum()
458
+ H = -np.sum(probs * np.log2(probs + 1e-15))
459
+ Hmax = np.log2(unique) if unique > 1 else 1
460
+ report.append(f"- **Shannon entropy: {H:.2f} / {Hmax:.2f} (norm: {H/Hmax:.3f})**")
461
+
462
+ top20 = addr_counts.head(20)
463
+ fig.add_trace(go.Bar(x=[f"Addr{i}" for i in range(len(top20))],
464
+ y=top20.values, marker_color="#8DC351", name="Top addresses"), row=2, col=2)
465
+
466
+ # Receipt status (if present)
467
+ status_col = None
468
+ for c in df.columns:
469
+ if "status" in c.lower() or "receipt" in c.lower():
470
+ status_col = c
471
+ break
472
+ if status_col:
473
+ sr = df[status_col].mean()
474
+ report.append(f"\n### Reliability (`{status_col}`)")
475
+ report.append(f"- Success rate: {sr:.4%}")
476
+ report.append(f"- Failures: {(df[status_col]==0).sum()}")
477
+
478
+ fig.update_layout(height=550, template="plotly_white",
479
+ title_text=f"Custom Analysis: {n:,} transactions",
480
+ showlegend=False)
481
+
482
+ return "\n".join(report), fig
483
+
484
+
485
+ # ═══════════════════════════════════════════════════════════
486
+ # GRADIO APP
487
+ # ═══════════════════════════════════════════════════════════
488
+
489
+ SUMMARY_MD = """
490
+ # πŸ”— Blockchain Intelligence Dashboard
491
+ ### 8-Dimension Cross-Chain Analysis of 50,000 Real Transactions
492
+
493
+ | Dimension | Key Finding |
494
+ |-----------|-------------|
495
+ | **RD1** Fee Markets | BCH highest CV (15.89), BTC most stable (3.12). Levene p<0.001 |
496
+ | **RD2** Whales | DOGE Gini = 0.998. Top 1% controls 53-99% of volume |
497
+ | **RD3** Reliability | ETC 99.93% success. Failures unpredictable (AUC=0.499) |
498
+ | **RD4** AML | 15,330 peeling chains. BTC risk rate 40.1% |
499
+ | **RD5** Velocity | 12,400Γ— gap: DOGE (30,978) vs BTC (2.49) |
500
+ | **RD6** Gas/MEV | RΒ²=0.269. Moving averages = 96% importance. Only 4 MEV |
501
+ | **RD7** Arbitrage | All 5 pairs cointegrated. 1,615 divergence signals |
502
+ | **RD8** Privacy | ETC 55.6% address reuse. Norm entropy 0.715 |
503
+
504
+ **Dataset:** [Omarrran/50k_Cryptocurrency_Transaction_Dataset_by_HNM](https://huggingface.co/datasets/Omarrran/50k_Cryptocurrency_Transaction_Dataset_by_HNM)
505
+ **Chains:** ETC (account) Β· BTC Β· DOGE Β· BCH Β· DASH (UTXO)
506
+ """
507
+
508
+ with gr.Blocks(title="Blockchain Intelligence", theme=gr.themes.Soft()) as demo:
509
+ gr.Markdown(SUMMARY_MD)
510
+
511
+ with gr.Tabs():
512
+ with gr.TabItem("πŸ“Š Overview"):
513
+ gr.Plot(value=build_overview)
514
+ gr.Plot(value=build_radar)
515
+
516
+ with gr.TabItem("πŸ’° RD1: Fee Markets"):
517
+ gr.Plot(value=build_rd1_fee)
518
+ gr.Markdown("""
519
+ **Insight:** All chains exhibit extreme heavy-tailed fee distributions (CV 3.1–15.9).
520
+ BCH's CV of 15.89 reflects sporadic high-fee events on low base volume.
521
+ ETC's kurtosis of 8,513 means extreme outliers dominate β€” median is 1.0 Gwei but mean is 6.2 Gwei.
522
+ Levene's test (W=51.4, p<0.001) confirms account vs UTXO fee mechanisms produce fundamentally different profiles.
523
+ """)
524
+
525
+ with gr.TabItem("πŸ‹ RD2: Whales"):
526
+ gr.Plot(value=build_rd2_whale)
527
+ gr.Markdown("""
528
+ **Insight:** Wealth concentration is universal and extreme. DOGE's Gini of 0.998 means virtually all
529
+ economic activity flows through whale accounts. ETC: mean 75.7 vs median 0.36 (207Γ— ratio).
530
+ KS test shows whales DON'T transact at different times (p=0.635) β€” surprising for institutional actors.
531
+ Cross-chain correlations are negligible (|r|<0.1) β€” each chain has independent whale populations.
532
+ """)
533
+
534
+ with gr.TabItem("βœ… RD3: Reliability"):
535
+ gr.Plot(value=build_rd3_reliability)
536
+ gr.Markdown("""
537
+ **Insight:** Only 7/10,000 ETC transactions failed (0.07%). Random Forest AUC of 0.499 means
538
+ failures are genuinely unpredictable from transaction features β€” they're essentially random events.
539
+ Gas limit and zero-value indicator dominate importance but provide no actionable signal.
540
+ """)
541
+
542
+ with gr.TabItem("🚨 RD4: AML"):
543
+ gr.Plot(value=build_rd4_aml)
544
+ gr.Markdown("""
545
+ **Insight:** BTC's 40.1% high-risk rate reflects documented use in layering operations.
546
+ DOGE has only 383 peeling chains but 93.3% round outputs β€” that's micro-payment culture, not laundering.
547
+ ETC's 90.6% rapid-sequence rate reflects 13-second block time, not suspicious activity.
548
+ DBSCAN found 9 clusters on DOGE vs 3 on other chains β€” more diverse transaction patterns.
549
+ """)
550
+
551
+ with gr.TabItem("⚑ RD5: Velocity"):
552
+ gr.Plot(value=build_rd5_velocity)
553
+ gr.Markdown("""
554
+ **Insight:** DOGE velocity of 30,978 vs BTC's 2.49 empirically confirms payment token vs store-of-value.
555
+ BTC and DOGE show 0% business-hours activity (automated/non-UTC users).
556
+ DASH has highest business-hours ratio (58.4%) consistent with merchant payment use case.
557
+ """)
558
+
559
+ with gr.TabItem("β›½ RD6: Gas & MEV"):
560
+ gr.Plot(value=build_rd6_mev)
561
+ gr.Markdown("""
562
+ **Insight:** RΒ²=0.269 β€” modest but meaningful. Moving averages (ma10 + ma30) account for 96.3% of
563
+ prediction power, revealing strong mean-reversion behavior in ETC gas prices.
564
+ Only 4 MEV candidates (0.04%) β€” ETC's minimal DeFi activity precludes meaningful extraction.
565
+ """)
566
+
567
+ with gr.TabItem("πŸ“ˆ RD7: Arbitrage"):
568
+ gr.Plot(value=build_rd7_arbitrage)
569
+ gr.Markdown("""
570
+ **Insight:** All 5 pairs cointegrated despite near-zero contemporaneous correlation (|r|<0.1).
571
+ This reveals shared long-run equilibrium driven by latent factors (market sentiment).
572
+ 1,615 divergence signals (16.2% of observations) exceed random-walk expectations.
573
+ BTC-DOGE maximum divergence of 15.02Οƒ reflects the 1,300Γ— nominal value difference.
574
+ """)
575
+
576
+ with gr.TabItem("πŸ”’ RD8: Privacy"):
577
+ gr.Plot(value=build_rd8_privacy)
578
+ gr.Markdown("""
579
+ **Insight:** ETC privacy is severely compromised β€” one address appears 2,487 times.
580
+ 55.6% reuse rate and normalized entropy of 0.715 mean 28.5% of address diversity is lost.
581
+ DOGE has highest UTXO risk (0.636) due to 93.3% round outputs + 97.8% single-input transactions.
582
+ DASH achieves lowest risk (0.442) despite limited PrivateSend adoption in this sample.
583
+ """)
584
+
585
+ with gr.TabItem("πŸ”¬ Analyze Your Data"):
586
+ gr.Markdown("""
587
+ ### Upload a CSV to analyze
588
+ Supports any blockchain transaction CSV. The tool auto-detects columns for:
589
+ values, fees, timestamps, addresses, and receipt status.
590
+ """)
591
+ file_input = gr.File(label="Upload CSV", file_types=[".csv"])
592
+ analyze_btn = gr.Button("πŸ” Analyze", variant="primary")
593
+ result_md = gr.Markdown()
594
+ result_plot = gr.Plot()
595
+ analyze_btn.click(fn=analyze_custom_csv, inputs=[file_input],
596
+ outputs=[result_md, result_plot])
597
+
598
+ gr.Markdown("""
599
+ ---
600
+ *Built from real blockchain data (Nov 2024). Paper: "Comprehensive Cross-Chain Cryptocurrency Analysis:
601
+ Eight Dimensions of Blockchain Intelligence" β€’
602
+ [Dataset](https://huggingface.co/datasets/Omarrran/50k_Cryptocurrency_Transaction_Dataset_by_HNM)*
603
+ """)
604
+
605
+ if __name__ == "__main__":
606
+ demo.launch()