Spaces:
Sleeping
Sleeping
| """ | |
| Blockchain Intelligence Dashboard β 8-Dimension Cross-Chain Analyzer | |
| HuggingFace Space: Interactive exploration of 50K real cryptocurrency transactions | |
| across ETC, BTC, DOGE, BCH, DASH. | |
| Upload your own CSVs or explore the pre-loaded dataset. | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from plotly.subplots import make_subplots | |
| from scipy import stats | |
| from sklearn.ensemble import IsolationForest, RandomForestClassifier, GradientBoostingRegressor | |
| from sklearn.cluster import DBSCAN | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import roc_auc_score, mean_absolute_error, mean_squared_error, r2_score | |
| import json | |
| import warnings | |
| import io | |
| warnings.filterwarnings("ignore") | |
| np.random.seed(42) | |
| # βββ Color palette βββ | |
| COLORS = { | |
| "ETC": "#627EEA", "BTC": "#F7931A", "DOGE": "#C2A633", | |
| "BCH": "#8DC351", "DASH": "#008DE4", | |
| } | |
| CHAIN_ORDER = ["ETC", "BTC", "DOGE", "BCH", "DASH"] | |
| UTXO_CHAINS = ["BTC", "DOGE", "BCH", "DASH"] | |
| # βββ Pre-loaded results from real analysis βββ | |
| PRELOADED = json.loads(r''' | |
| { | |
| "meta": {"dataset": "Omarrran/50k_Cryptocurrency_Transaction_Dataset_by_HNM", "chains": ["BCH","BTC","DASH","DOGE","ETC"], "total_tx": 50000}, | |
| "rd1_fee": {"ETC": {"mean": 6.200637, "median": 1.0, "std": 41.592424, "cv": 6.7078, "skewness": 89.1592, "kurtosis": 8512.9975, "n": 10000}, "BTC": {"mean": 9.4692e-06, "median": 4.4e-06, "cv": 3.1239, "skewness": 22.0863, "kurtosis": 780.3443, "n": 9997}, "DOGE": {"mean": 0.0716, "median": 0.0104, "cv": 7.2477, "skewness": 17.3935, "kurtosis": 389.2284, "n": 9986}, "BCH": {"mean": 0.000307, "median": 3.74e-06, "cv": 15.8896, "skewness": 17.8966, "kurtosis": 320.7481, "n": 9879}, "DASH": {"mean": 6.19e-05, "median": 6e-06, "cv": 13.9415, "skewness": 71.7787, "kurtosis": 5477.6621, "n": 6422}, "levene_etc_btc": {"stat": 51.4278, "p": 0.0, "sig": true}}, | |
| "rd2_whale": {"etc": {"threshold_99": 1075.7688, "whale_count": 100, "whale_vol_pct": 88.43, "gini": 0.9871, "freq_whale_addrs": 11, "ks_stat": 0.0733, "ks_p": 0.635, "mean": 75.7417, "median": 0.3645, "max": 91499.99}, "utxo": {"BTC": {"anomalies": 100, "whale_vol_pct": 58.53, "gini": 0.9656}, "DOGE": {"anomalies": 100, "whale_vol_pct": 98.96, "gini": 0.9984}, "BCH": {"anomalies": 100, "whale_vol_pct": 72.56, "gini": 0.9646}, "DASH": {"anomalies": 100, "whale_vol_pct": 53.37, "gini": 0.9019}}}, | |
| "rd3_reliability": {"failure_rate": 0.0007, "failed": 7, "total": 10000, "auc": 0.4985, "features": {"gas": 0.2903, "zero_val": 0.2382, "value_etc": 0.1679, "log_val": 0.1181, "gas_price_gwei": 0.0775, "log_gp": 0.074, "high_gas": 0.0215, "hour": 0.0126}}, | |
| "rd4_aml": {"etc": {"round_tx": 488, "round_pct": 4.88, "rapid_tx": 9060, "rapid_pct": 90.6, "equal_val_patterns": 5251, "freq_senders": 35}, "utxo": {"BTC": {"peeling": 5018, "round_outputs": 5466, "high_risk_rate": 0.4013}, "DOGE": {"peeling": 383, "round_outputs": 9334, "high_risk_rate": 0.0141}, "BCH": {"peeling": 5022, "round_outputs": 1903, "high_risk_rate": 0.1733}, "DASH": {"peeling": 4907, "round_outputs": 2796, "high_risk_rate": 0.3532}}, "total_peeling": 15330}, | |
| "rd5_velocity": {"BCH": {"velocity": 53.27, "bh_ratio": 0.4695, "health": 44.15}, "BTC": {"velocity": 2.49, "bh_ratio": 0.0, "health": 30.0}, "DASH": {"velocity": 23.40, "bh_ratio": 0.5836, "health": 47.54}, "DOGE": {"velocity": 30977.50, "bh_ratio": 0.0, "health": 70.0}, "ETC": {"velocity": 378.52, "bh_ratio": 0.4746, "health": 44.73}}, | |
| "rd6_mev": {"pred": {"mae": 3.5607, "rmse": 6.7611, "r2": 0.2686, "features": {"ma10": 0.493, "ma30": 0.4696, "std10": 0.0189, "l1": 0.0085, "vr": 0.0055, "min": 0.0022, "l3": 0.0014, "l5": 0.0007, "hour": 0.0003}}, "mev": {"candidates_z3": 4, "front_run": 3, "mev_rate_pct": 0.0401}}, | |
| "rd7_arbitrage": {"coint": {"BTC-BCH": {"adf": -9.3522, "coint": true}, "BTC-DOGE": {"adf": -9.3398, "coint": true}, "ETC-DASH": {"adf": -9.5276, "coint": true}, "BTC-ETC": {"adf": -9.3351, "coint": true}, "DOGE-DASH": {"adf": -8.4527, "coint": true}}, "signals": [{"pair": "BTC-BCH", "count": 518, "avg_div": 4.3728, "max_div": 12.1035}, {"pair": "BTC-DOGE", "count": 339, "avg_div": 5.4999, "max_div": 15.0246}, {"pair": "ETC-DASH", "count": 758, "avg_div": 4.3304, "max_div": 10.2677}], "total_signals": 1615, "coint_pairs": 5}, | |
| "rd8_privacy": {"etc": {"unique": 4232, "reused": 2351, "reuse_rate": 0.5555, "max_reuse": 2487, "entropy": 8.6111, "max_entropy": 12.0471, "norm_entropy": 0.7148}, "utxo": {"BTC": {"risk_score": 0.6272}, "DOGE": {"risk_score": 0.6356}, "BCH": {"risk_score": 0.4591}, "DASH": {"risk_score": 0.4417}}} | |
| } | |
| ''') | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # VISUALIZATION BUILDERS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_overview(): | |
| """Build the overview summary dashboard.""" | |
| fig = make_subplots( | |
| rows=2, cols=3, | |
| subplot_titles=("Fee CV", "Whale Vol %", "Gini Coefficient", | |
| "Velocity (log)", "AML Risk Rate %", "Privacy Risk"), | |
| specs=[[{"type": "bar"}, {"type": "bar"}, {"type": "bar"}], | |
| [{"type": "bar"}, {"type": "bar"}, {"type": "bar"}]], | |
| horizontal_spacing=0.08, vertical_spacing=0.15, | |
| ) | |
| rd1 = PRELOADED["rd1_fee"] | |
| chains = ["ETC", "BTC", "DOGE", "BCH", "DASH"] | |
| cvs = [rd1.get(c, {}).get("cv", 0) for c in chains] | |
| colors = [COLORS[c] for c in chains] | |
| fig.add_trace(go.Bar(x=chains, y=cvs, marker_color=colors, text=[f"{v:.1f}" for v in cvs], | |
| textposition="outside", showlegend=False), row=1, col=1) | |
| rd2 = PRELOADED["rd2_whale"] | |
| wvols = [rd2["etc"]["whale_vol_pct"]] + [rd2["utxo"][c]["whale_vol_pct"] for c in UTXO_CHAINS] | |
| fig.add_trace(go.Bar(x=chains, y=wvols, marker_color=colors, text=[f"{v:.0f}%" for v in wvols], | |
| textposition="outside", showlegend=False), row=1, col=2) | |
| ginis = [rd2["etc"]["gini"]] + [rd2["utxo"][c]["gini"] for c in UTXO_CHAINS] | |
| fig.add_trace(go.Bar(x=chains, y=ginis, marker_color=colors, text=[f"{v:.3f}" for v in ginis], | |
| textposition="outside", showlegend=False), row=1, col=3) | |
| rd5 = PRELOADED["rd5_velocity"] | |
| vels = [rd5[c]["velocity"] for c in chains] | |
| fig.add_trace(go.Bar(x=chains, y=vels, marker_color=colors, text=[f"{v:.1f}" for v in vels], | |
| textposition="outside", showlegend=False), row=2, col=1) | |
| fig.update_yaxes(type="log", row=2, col=1) | |
| rd4 = PRELOADED["rd4_aml"] | |
| risks = [0] + [rd4["utxo"][c]["high_risk_rate"] * 100 for c in UTXO_CHAINS] | |
| fig.add_trace(go.Bar(x=chains, y=risks, marker_color=colors, text=[f"{v:.1f}" for v in risks], | |
| textposition="outside", showlegend=False), row=2, col=2) | |
| rd8 = PRELOADED["rd8_privacy"] | |
| priv = [1 - rd8["etc"]["norm_entropy"]] + [rd8["utxo"][c]["risk_score"] for c in UTXO_CHAINS] | |
| fig.add_trace(go.Bar(x=chains, y=priv, marker_color=colors, text=[f"{v:.3f}" for v in priv], | |
| textposition="outside", showlegend=False), row=2, col=3) | |
| fig.update_layout(height=600, title_text="Cross-Chain Intelligence Overview β 50K Real Transactions", | |
| template="plotly_white", margin=dict(t=80)) | |
| return fig | |
| def build_rd1_fee(): | |
| rd1 = PRELOADED["rd1_fee"] | |
| chains = ["ETC", "BTC", "DOGE", "BCH", "DASH"] | |
| fig = make_subplots(rows=1, cols=3, subplot_titles=("CV (Ο/ΞΌ)", "Skewness", "Kurtosis"), | |
| horizontal_spacing=0.08) | |
| colors = [COLORS[c] for c in chains] | |
| for i, (metric, fmt) in enumerate([(lambda c: rd1[c]["cv"], ".1f"), | |
| (lambda c: rd1[c]["skewness"], ".1f"), | |
| (lambda c: rd1[c]["kurtosis"], ",.0f")], 1): | |
| vals = [metric(c) for c in chains] | |
| fig.add_trace(go.Bar(x=chains, y=vals, marker_color=colors, | |
| text=[f"{v:{fmt}}" for v in vals], textposition="outside", | |
| showlegend=False), row=1, col=i) | |
| lev = rd1["levene_etc_btc"] | |
| fig.update_layout(height=400, template="plotly_white", | |
| title_text=f"RD1: Fee Market Efficiency β Levene W={lev['stat']:.1f}, p<0.001") | |
| return fig | |
| def build_rd2_whale(): | |
| rd2 = PRELOADED["rd2_whale"] | |
| chains = ["ETC", "BTC", "DOGE", "BCH", "DASH"] | |
| colors = [COLORS[c] for c in chains] | |
| fig = make_subplots(rows=1, cols=2, subplot_titles=("Whale Volume %", "Gini Coefficient"), | |
| horizontal_spacing=0.1) | |
| wvols = [rd2["etc"]["whale_vol_pct"]] + [rd2["utxo"][c]["whale_vol_pct"] for c in UTXO_CHAINS] | |
| fig.add_trace(go.Bar(x=chains, y=wvols, marker_color=colors, | |
| text=[f"{v:.1f}%" for v in wvols], textposition="outside", | |
| showlegend=False), row=1, col=1) | |
| ginis = [rd2["etc"]["gini"]] + [rd2["utxo"][c]["gini"] for c in UTXO_CHAINS] | |
| fig.add_trace(go.Bar(x=chains, y=ginis, marker_color=colors, | |
| text=[f"{v:.4f}" for v in ginis], textposition="outside", | |
| showlegend=False), row=1, col=2) | |
| fig.update_layout(height=400, template="plotly_white", | |
| title_text="RD2: Whale Concentration β Top 1% controls 53-99% of volume") | |
| return fig | |
| def build_rd3_reliability(): | |
| rd3 = PRELOADED["rd3_reliability"] | |
| feats = rd3["features"] | |
| names = list(feats.keys()) | |
| vals = list(feats.values()) | |
| fig = go.Figure(go.Bar(y=names, x=vals, orientation="h", | |
| marker_color="#627EEA", | |
| text=[f"{v:.3f}" for v in vals], textposition="outside")) | |
| fig.update_layout(height=400, template="plotly_white", | |
| title_text=f"RD3: Reliability β {rd3['failed']}/{rd3['total']} failures (AUC={rd3['auc']:.3f})", | |
| xaxis_title="Feature Importance") | |
| return fig | |
| def build_rd4_aml(): | |
| rd4 = PRELOADED["rd4_aml"] | |
| fig = make_subplots(rows=1, cols=2, subplot_titles=("Peeling Chains", "High-Risk Rate %"), | |
| horizontal_spacing=0.12) | |
| utxo = UTXO_CHAINS | |
| peeling = [rd4["utxo"][c]["peeling"] for c in utxo] | |
| risk = [rd4["utxo"][c]["high_risk_rate"] * 100 for c in utxo] | |
| colors = [COLORS[c] for c in utxo] | |
| fig.add_trace(go.Bar(x=utxo, y=peeling, marker_color=colors, | |
| text=peeling, textposition="outside", showlegend=False), row=1, col=1) | |
| fig.add_trace(go.Bar(x=utxo, y=risk, marker_color=colors, | |
| text=[f"{v:.1f}%" for v in risk], textposition="outside", | |
| showlegend=False), row=1, col=2) | |
| etc = rd4["etc"] | |
| fig.update_layout(height=400, template="plotly_white", | |
| title_text=f"RD4: AML Detection β {rd4['total_peeling']:,} peeling chains | ETC: {etc['round_pct']}% round, {etc['freq_senders']} freq senders") | |
| return fig | |
| def build_rd5_velocity(): | |
| rd5 = PRELOADED["rd5_velocity"] | |
| chains = CHAIN_ORDER | |
| fig = make_subplots(rows=1, cols=2, subplot_titles=("Velocity (log scale)", "Health Index"), | |
| horizontal_spacing=0.1) | |
| colors = [COLORS[c] for c in chains] | |
| vels = [rd5[c]["velocity"] for c in chains] | |
| health = [rd5[c]["health"] for c in chains] | |
| fig.add_trace(go.Bar(x=chains, y=vels, marker_color=colors, | |
| text=[f"{v:,.1f}" for v in vels], textposition="outside", | |
| showlegend=False), row=1, col=1) | |
| fig.update_yaxes(type="log", row=1, col=1) | |
| fig.add_trace(go.Bar(x=chains, y=health, marker_color=colors, | |
| text=[f"{v:.1f}" for v in health], textposition="outside", | |
| showlegend=False), row=1, col=2) | |
| fig.update_layout(height=400, template="plotly_white", | |
| title_text="RD5: Payment Velocity β 12,400Γ gap between DOGE and BTC") | |
| return fig | |
| def build_rd6_mev(): | |
| rd6 = PRELOADED["rd6_mev"] | |
| pred = rd6["pred"] | |
| feats = pred["features"] | |
| names = list(feats.keys()) | |
| vals = list(feats.values()) | |
| fig = go.Figure(go.Bar(y=names, x=vals, orientation="h", | |
| marker_color="#627EEA", | |
| text=[f"{v:.4f}" for v in vals], textposition="outside")) | |
| mev = rd6["mev"] | |
| fig.update_layout(height=400, template="plotly_white", | |
| title_text=f"RD6: Gas Prediction β RΒ²={pred['r2']:.3f}, MAE={pred['mae']:.2f} Gwei | MEV candidates: {mev['candidates_z3']}", | |
| xaxis_title="Feature Importance") | |
| return fig | |
| def build_rd7_arbitrage(): | |
| rd7 = PRELOADED["rd7_arbitrage"] | |
| fig = make_subplots(rows=1, cols=2, subplot_titles=("ADF Statistics (all < -2.86)", "Divergence Signals"), | |
| horizontal_spacing=0.12) | |
| pairs = list(rd7["coint"].keys()) | |
| adfs = [rd7["coint"][p]["adf"] for p in pairs] | |
| fig.add_trace(go.Bar(x=pairs, y=adfs, marker_color="#2ECC71", | |
| text=[f"{v:.2f}" for v in adfs], textposition="outside", | |
| showlegend=False), row=1, col=1) | |
| fig.add_hline(y=-2.86, line_dash="dash", line_color="red", | |
| annotation_text="5% critical", row=1, col=1) | |
| sigs = rd7["signals"] | |
| fig.add_trace(go.Bar(x=[s["pair"] for s in sigs], y=[s["count"] for s in sigs], | |
| marker_color="#3498DB", | |
| text=[s["count"] for s in sigs], textposition="outside", | |
| showlegend=False), row=1, col=2) | |
| fig.update_layout(height=400, template="plotly_white", | |
| title_text=f"RD7: Cross-Chain Arbitrage β {rd7['coint_pairs']}/5 cointegrated, {rd7['total_signals']:,} signals") | |
| return fig | |
| def build_rd8_privacy(): | |
| rd8 = PRELOADED["rd8_privacy"] | |
| fig = make_subplots(rows=1, cols=2, subplot_titles=("ETC Address Entropy", "UTXO Privacy Risk"), | |
| horizontal_spacing=0.12) | |
| etc = rd8["etc"] | |
| fig.add_trace(go.Bar(x=["Shannon H", "Max H", "Norm H"], | |
| y=[etc["entropy"], etc["max_entropy"], etc["norm_entropy"]], | |
| marker_color=["#627EEA", "#95A5A6", "#E74C3C"], | |
| text=[f"{etc['entropy']:.2f}", f"{etc['max_entropy']:.2f}", f"{etc['norm_entropy']:.3f}"], | |
| textposition="outside", showlegend=False), row=1, col=1) | |
| utxo = UTXO_CHAINS | |
| risks = [rd8["utxo"][c]["risk_score"] for c in utxo] | |
| fig.add_trace(go.Bar(x=utxo, y=risks, marker_color=[COLORS[c] for c in utxo], | |
| text=[f"{v:.3f}" for v in risks], textposition="outside", | |
| showlegend=False), row=1, col=2) | |
| fig.update_layout(height=400, template="plotly_white", | |
| title_text=f"RD8: Privacy β ETC {etc['reuse_rate']:.1%} address reuse, max reuse {etc['max_reuse']:,}Γ") | |
| return fig | |
| def build_radar(): | |
| """Radar chart comparing all chains across normalized dimensions.""" | |
| categories = ["Fee Stability", "Whale Equality", "Reliability", | |
| "AML Safety", "Velocity", "Privacy"] | |
| rd1 = PRELOADED["rd1_fee"] | |
| rd2 = PRELOADED["rd2_whale"] | |
| rd5 = PRELOADED["rd5_velocity"] | |
| rd4 = PRELOADED["rd4_aml"] | |
| rd8 = PRELOADED["rd8_privacy"] | |
| fig = go.Figure() | |
| for chain in CHAIN_ORDER: | |
| # Normalize each metric to 0-1 (higher = better) | |
| max_cv = max(rd1[c]["cv"] for c in CHAIN_ORDER) | |
| fee_stab = 1 - rd1[chain]["cv"] / max_cv | |
| if chain == "ETC": | |
| whale_eq = 1 - PRELOADED["rd2_whale"]["etc"]["gini"] | |
| aml_safe = 1.0 # No peeling chain metric for ETC | |
| privacy = PRELOADED["rd8_privacy"]["etc"]["norm_entropy"] | |
| else: | |
| whale_eq = 1 - rd2["utxo"][chain]["gini"] | |
| aml_safe = 1 - rd4["utxo"][chain]["high_risk_rate"] | |
| privacy = 1 - rd8["utxo"][chain]["risk_score"] | |
| reliability = 1.0 if chain == "ETC" else 0.9 # ETC has receipt_status | |
| max_vel = max(rd5[c]["velocity"] for c in CHAIN_ORDER) | |
| velocity = np.log1p(rd5[chain]["velocity"]) / np.log1p(max_vel) | |
| vals = [fee_stab, whale_eq, reliability, aml_safe, velocity, privacy] | |
| vals.append(vals[0]) # Close the radar | |
| fig.add_trace(go.Scatterpolar( | |
| r=vals, theta=categories + [categories[0]], | |
| fill="toself", name=chain, | |
| line_color=COLORS[chain], opacity=0.6, | |
| )) | |
| fig.update_layout( | |
| polar=dict(radialaxis=dict(visible=True, range=[0, 1])), | |
| height=500, template="plotly_white", | |
| title_text="Cross-Chain Radar β Normalized Scores (higher = better)", | |
| ) | |
| return fig | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CUSTOM ANALYSIS ENGINE | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def analyze_custom_csv(file): | |
| """Analyze an uploaded CSV file and return results + visualization.""" | |
| if file is None: | |
| return "Please upload a CSV file.", None | |
| try: | |
| df = pd.read_csv(file.name) | |
| except Exception as e: | |
| return f"Error reading CSV: {e}", None | |
| cols = [c.lower() for c in df.columns] | |
| n = len(df) | |
| report = [] | |
| report.append(f"## Dataset: {n:,} rows Γ {len(df.columns)} columns") | |
| report.append(f"**Columns:** {', '.join(df.columns)}") | |
| # Auto-detect chain type | |
| is_etc = any("gas" in c for c in cols) or any("from" in c for c in cols) | |
| report.append(f"**Detected type:** {'Account-based (ETC-like)' if is_etc else 'UTXO-based'}") | |
| fig = make_subplots(rows=2, cols=2, | |
| subplot_titles=("Value Distribution", "Fee Distribution", | |
| "Temporal Activity", "Concentration"), | |
| horizontal_spacing=0.1, vertical_spacing=0.15) | |
| # Find value column | |
| val_col = None | |
| for c in df.columns: | |
| cl = c.lower() | |
| if "value" in cl or "input_btc" in cl or "input_doge" in cl or "input_bch" in cl or "input_dash" in cl: | |
| val_col = c | |
| break | |
| if val_col is None: | |
| for c in df.columns: | |
| if df[c].dtype in [np.float64, np.int64] and c.lower() not in ["block_number"]: | |
| val_col = c | |
| break | |
| if val_col: | |
| vals = df[val_col].dropna() | |
| vals_pos = vals[vals > 0] | |
| report.append(f"\n### Value Analysis (`{val_col}`)") | |
| report.append(f"- Mean: {vals.mean():.6f}") | |
| report.append(f"- Median: {vals.median():.6f}") | |
| report.append(f"- Std: {vals.std():.6f}") | |
| report.append(f"- CV: {vals.std()/vals.mean():.4f}" if vals.mean() != 0 else "- CV: N/A") | |
| report.append(f"- Skewness: {vals.skew():.4f}") | |
| report.append(f"- Kurtosis: {vals.kurtosis():.4f}") | |
| if len(vals_pos) > 10: | |
| sorted_v = np.sort(vals_pos.values) | |
| nn = len(sorted_v) | |
| idx = np.arange(1, nn + 1) | |
| gini = float((2 * np.sum(idx * sorted_v)) / (nn * np.sum(sorted_v)) - (nn + 1) / nn) | |
| t99 = vals_pos.quantile(0.99) | |
| whale_vol = vals_pos[vals_pos >= t99].sum() / vals_pos.sum() * 100 | |
| report.append(f"- **Gini coefficient: {gini:.4f}**") | |
| report.append(f"- **Top 1% volume share: {whale_vol:.1f}%**") | |
| fig.add_trace(go.Histogram(x=np.log1p(vals_pos), nbinsx=50, | |
| marker_color="#627EEA", name="log(1+value)"), row=1, col=1) | |
| # Find fee column | |
| fee_col = None | |
| for c in df.columns: | |
| cl = c.lower() | |
| if "fee" in cl or "gas_price" in cl: | |
| fee_col = c | |
| break | |
| if fee_col: | |
| fees = df[fee_col].dropna() | |
| fees_pos = fees[fees > 0] | |
| report.append(f"\n### Fee Analysis (`{fee_col}`)") | |
| report.append(f"- Mean: {fees.mean():.8f}") | |
| report.append(f"- Median: {fees.median():.8f}") | |
| report.append(f"- CV: {fees.std()/fees.mean():.4f}" if fees.mean() != 0 else "- CV: N/A") | |
| if len(fees_pos) > 10: | |
| fig.add_trace(go.Histogram(x=np.log1p(fees_pos), nbinsx=50, | |
| marker_color="#F7931A", name="log(1+fee)"), row=1, col=2) | |
| # Temporal analysis | |
| ts_col = None | |
| for c in df.columns: | |
| if "timestamp" in c.lower(): | |
| ts_col = c | |
| break | |
| if ts_col: | |
| try: | |
| ts = pd.to_datetime(df[ts_col], format="mixed", utc=True) | |
| hours = ts.dt.hour | |
| bh_ratio = ((hours >= 9) & (hours <= 17)).mean() | |
| report.append(f"\n### Temporal Analysis") | |
| report.append(f"- Business hours (9-17 UTC): {bh_ratio:.1%}") | |
| report.append(f"- Time span: {ts.min()} to {ts.max()}") | |
| hour_counts = hours.value_counts().sort_index() | |
| fig.add_trace(go.Bar(x=hour_counts.index, y=hour_counts.values, | |
| marker_color="#C2A633", name="Hourly activity"), row=2, col=1) | |
| except Exception: | |
| pass | |
| # Address analysis (if ETC-like) | |
| addr_col = None | |
| for c in df.columns: | |
| if "from" in c.lower() and "addr" in c.lower(): | |
| addr_col = c | |
| break | |
| if addr_col is None: | |
| for c in df.columns: | |
| if c.lower().startswith("from"): | |
| addr_col = c | |
| break | |
| if addr_col: | |
| addr_counts = df[addr_col].value_counts() | |
| unique = len(addr_counts) | |
| reused = (addr_counts > 1).sum() | |
| report.append(f"\n### Address Analysis (`{addr_col}`)") | |
| report.append(f"- Unique addresses: {unique:,}") | |
| report.append(f"- Reuse rate: {reused/unique:.1%}") | |
| probs = addr_counts.values / addr_counts.values.sum() | |
| H = -np.sum(probs * np.log2(probs + 1e-15)) | |
| Hmax = np.log2(unique) if unique > 1 else 1 | |
| report.append(f"- **Shannon entropy: {H:.2f} / {Hmax:.2f} (norm: {H/Hmax:.3f})**") | |
| top20 = addr_counts.head(20) | |
| fig.add_trace(go.Bar(x=[f"Addr{i}" for i in range(len(top20))], | |
| y=top20.values, marker_color="#8DC351", name="Top addresses"), row=2, col=2) | |
| # Receipt status (if present) | |
| status_col = None | |
| for c in df.columns: | |
| if "status" in c.lower() or "receipt" in c.lower(): | |
| status_col = c | |
| break | |
| if status_col: | |
| sr = df[status_col].mean() | |
| report.append(f"\n### Reliability (`{status_col}`)") | |
| report.append(f"- Success rate: {sr:.4%}") | |
| report.append(f"- Failures: {(df[status_col]==0).sum()}") | |
| fig.update_layout(height=550, template="plotly_white", | |
| title_text=f"Custom Analysis: {n:,} transactions", | |
| showlegend=False) | |
| return "\n".join(report), fig | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # GRADIO APP | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SUMMARY_MD = """ | |
| # π Blockchain Intelligence Dashboard | |
| ### 8-Dimension Cross-Chain Analysis of 50,000 Real Transactions | |
| | Dimension | Key Finding | | |
| |-----------|-------------| | |
| | **RD1** Fee Markets | BCH highest CV (15.89), BTC most stable (3.12). Levene p<0.001 | | |
| | **RD2** Whales | DOGE Gini = 0.998. Top 1% controls 53-99% of volume | | |
| | **RD3** Reliability | ETC 99.93% success. Failures unpredictable (AUC=0.499) | | |
| | **RD4** AML | 15,330 peeling chains. BTC risk rate 40.1% | | |
| | **RD5** Velocity | 12,400Γ gap: DOGE (30,978) vs BTC (2.49) | | |
| | **RD6** Gas/MEV | RΒ²=0.269. Moving averages = 96% importance. Only 4 MEV | | |
| | **RD7** Arbitrage | All 5 pairs cointegrated. 1,615 divergence signals | | |
| | **RD8** Privacy | ETC 55.6% address reuse. Norm entropy 0.715 | | |
| **Dataset:** [Omarrran/50k_Cryptocurrency_Transaction_Dataset_by_HNM](https://huggingface.co/datasets/Omarrran/50k_Cryptocurrency_Transaction_Dataset_by_HNM) | |
| **Chains:** ETC (account) Β· BTC Β· DOGE Β· BCH Β· DASH (UTXO) | |
| """ | |
| with gr.Blocks(title="Blockchain Intelligence", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(SUMMARY_MD) | |
| with gr.Tabs(): | |
| with gr.TabItem("π Overview"): | |
| gr.Plot(value=build_overview) | |
| gr.Plot(value=build_radar) | |
| with gr.TabItem("π° RD1: Fee Markets"): | |
| gr.Plot(value=build_rd1_fee) | |
| gr.Markdown(""" | |
| **Insight:** All chains exhibit extreme heavy-tailed fee distributions (CV 3.1β15.9). | |
| BCH's CV of 15.89 reflects sporadic high-fee events on low base volume. | |
| ETC's kurtosis of 8,513 means extreme outliers dominate β median is 1.0 Gwei but mean is 6.2 Gwei. | |
| Levene's test (W=51.4, p<0.001) confirms account vs UTXO fee mechanisms produce fundamentally different profiles. | |
| """) | |
| with gr.TabItem("π RD2: Whales"): | |
| gr.Plot(value=build_rd2_whale) | |
| gr.Markdown(""" | |
| **Insight:** Wealth concentration is universal and extreme. DOGE's Gini of 0.998 means virtually all | |
| economic activity flows through whale accounts. ETC: mean 75.7 vs median 0.36 (207Γ ratio). | |
| KS test shows whales DON'T transact at different times (p=0.635) β surprising for institutional actors. | |
| Cross-chain correlations are negligible (|r|<0.1) β each chain has independent whale populations. | |
| """) | |
| with gr.TabItem("β RD3: Reliability"): | |
| gr.Plot(value=build_rd3_reliability) | |
| gr.Markdown(""" | |
| **Insight:** Only 7/10,000 ETC transactions failed (0.07%). Random Forest AUC of 0.499 means | |
| failures are genuinely unpredictable from transaction features β they're essentially random events. | |
| Gas limit and zero-value indicator dominate importance but provide no actionable signal. | |
| """) | |
| with gr.TabItem("π¨ RD4: AML"): | |
| gr.Plot(value=build_rd4_aml) | |
| gr.Markdown(""" | |
| **Insight:** BTC's 40.1% high-risk rate reflects documented use in layering operations. | |
| DOGE has only 383 peeling chains but 93.3% round outputs β that's micro-payment culture, not laundering. | |
| ETC's 90.6% rapid-sequence rate reflects 13-second block time, not suspicious activity. | |
| DBSCAN found 9 clusters on DOGE vs 3 on other chains β more diverse transaction patterns. | |
| """) | |
| with gr.TabItem("β‘ RD5: Velocity"): | |
| gr.Plot(value=build_rd5_velocity) | |
| gr.Markdown(""" | |
| **Insight:** DOGE velocity of 30,978 vs BTC's 2.49 empirically confirms payment token vs store-of-value. | |
| BTC and DOGE show 0% business-hours activity (automated/non-UTC users). | |
| DASH has highest business-hours ratio (58.4%) consistent with merchant payment use case. | |
| """) | |
| with gr.TabItem("β½ RD6: Gas & MEV"): | |
| gr.Plot(value=build_rd6_mev) | |
| gr.Markdown(""" | |
| **Insight:** RΒ²=0.269 β modest but meaningful. Moving averages (ma10 + ma30) account for 96.3% of | |
| prediction power, revealing strong mean-reversion behavior in ETC gas prices. | |
| Only 4 MEV candidates (0.04%) β ETC's minimal DeFi activity precludes meaningful extraction. | |
| """) | |
| with gr.TabItem("π RD7: Arbitrage"): | |
| gr.Plot(value=build_rd7_arbitrage) | |
| gr.Markdown(""" | |
| **Insight:** All 5 pairs cointegrated despite near-zero contemporaneous correlation (|r|<0.1). | |
| This reveals shared long-run equilibrium driven by latent factors (market sentiment). | |
| 1,615 divergence signals (16.2% of observations) exceed random-walk expectations. | |
| BTC-DOGE maximum divergence of 15.02Ο reflects the 1,300Γ nominal value difference. | |
| """) | |
| with gr.TabItem("π RD8: Privacy"): | |
| gr.Plot(value=build_rd8_privacy) | |
| gr.Markdown(""" | |
| **Insight:** ETC privacy is severely compromised β one address appears 2,487 times. | |
| 55.6% reuse rate and normalized entropy of 0.715 mean 28.5% of address diversity is lost. | |
| DOGE has highest UTXO risk (0.636) due to 93.3% round outputs + 97.8% single-input transactions. | |
| DASH achieves lowest risk (0.442) despite limited PrivateSend adoption in this sample. | |
| """) | |
| with gr.TabItem("π¬ Analyze Your Data"): | |
| gr.Markdown(""" | |
| ### Upload a CSV to analyze | |
| Supports any blockchain transaction CSV. The tool auto-detects columns for: | |
| values, fees, timestamps, addresses, and receipt status. | |
| """) | |
| file_input = gr.File(label="Upload CSV", file_types=[".csv"]) | |
| analyze_btn = gr.Button("π Analyze", variant="primary") | |
| result_md = gr.Markdown() | |
| result_plot = gr.Plot() | |
| analyze_btn.click(fn=analyze_custom_csv, inputs=[file_input], | |
| outputs=[result_md, result_plot]) | |
| gr.Markdown(""" | |
| --- | |
| *Built from real blockchain data (Nov 2024). Paper: "Comprehensive Cross-Chain Cryptocurrency Analysis: | |
| Eight Dimensions of Blockchain Intelligence" β’ | |
| [Dataset](https://huggingface.co/datasets/Omarrran/50k_Cryptocurrency_Transaction_Dataset_by_HNM)* | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |