Spaces:
Build error
Build error
| import pandas as pd | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from datetime import datetime, timedelta | |
| from typing import Dict, List, Optional, Union, Any, Tuple | |
| import io | |
| import base64 | |
| import matplotlib.pyplot as plt | |
| from matplotlib.backends.backend_pdf import PdfPages | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.pdfgen import canvas | |
| from reportlab.lib import colors | |
| from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer | |
| from reportlab.lib.styles import getSampleStyleSheet | |
| class Visualizer: | |
| """ | |
| Generate visualizations and reports for whale transaction data | |
| """ | |
| def __init__(self): | |
| self.color_map = { | |
| "buy": "green", | |
| "sell": "red", | |
| "transfer": "blue", | |
| "other": "gray" | |
| } | |
| def create_transaction_timeline(self, transactions_df: pd.DataFrame) -> go.Figure: | |
| """ | |
| Create a timeline visualization of transactions | |
| Args: | |
| transactions_df: DataFrame of transactions | |
| Returns: | |
| Plotly figure object | |
| """ | |
| if transactions_df.empty: | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title="No Transaction Data Available", | |
| xaxis_title="Date", | |
| yaxis_title="Action", | |
| height=400, | |
| template="plotly_white" | |
| ) | |
| fig.add_annotation( | |
| text="No transaction data available for timeline", | |
| showarrow=False, | |
| font=dict(size=14) | |
| ) | |
| return fig | |
| try: | |
| # Ensure timestamp column exists | |
| if 'Timestamp' in transactions_df.columns: | |
| timestamp_col = 'Timestamp' | |
| elif 'timeStamp' in transactions_df.columns: | |
| timestamp_col = 'timeStamp' | |
| # Convert timestamp to datetime if it's not already | |
| if not pd.api.types.is_datetime64_any_dtype(transactions_df[timestamp_col]): | |
| try: | |
| transactions_df[timestamp_col] = pd.to_datetime(transactions_df[timestamp_col].astype(float), unit='s') | |
| except Exception as e: | |
| print(f"Error converting timestamp: {str(e)}") | |
| transactions_df[timestamp_col] = pd.date_range(start='2025-01-01', periods=len(transactions_df), freq='H') | |
| else: | |
| # Create a dummy timestamp if none exists | |
| transactions_df['dummy_timestamp'] = pd.date_range(start='2025-01-01', periods=len(transactions_df), freq='H') | |
| timestamp_col = 'dummy_timestamp' | |
| # Create figure | |
| fig = go.Figure() | |
| # Add transactions to timeline | |
| for idx, row in transactions_df.iterrows(): | |
| # Determine transaction type | |
| if 'From' in transactions_df.columns and 'To' in transactions_df.columns: | |
| from_col, to_col = 'From', 'To' | |
| else: | |
| from_col, to_col = 'from', 'to' | |
| tx_type = "other" | |
| hover_text = "" | |
| if pd.isna(row[from_col]) or row[from_col] == '0x0000000000000000000000000000000000000000': | |
| tx_type = "buy" | |
| hover_text = f"Buy: {row[to_col]}" | |
| elif pd.isna(row[to_col]) or row[to_col] == '0x0000000000000000000000000000000000000000': | |
| tx_type = "sell" | |
| hover_text = f"Sell: {row[from_col]}" | |
| else: | |
| tx_type = "transfer" | |
| hover_text = f"Transfer: {row[from_col]} → {row[to_col]}" | |
| # Add amount to hover text if available | |
| if 'Amount' in row: | |
| hover_text += f"<br>Amount: {row['Amount']}" | |
| elif 'value' in row: | |
| hover_text += f"<br>Value: {row['value']}" | |
| # Add token info if available | |
| if 'tokenSymbol' in row: | |
| hover_text += f"<br>Token: {row['tokenSymbol']}" | |
| # Add transaction to timeline | |
| fig.add_trace(go.Scatter( | |
| x=[row[timestamp_col]], | |
| y=[tx_type], | |
| mode='markers', | |
| marker=dict( | |
| size=12, | |
| color=self.color_map.get(tx_type, "gray"), | |
| line=dict(width=1, color='black') | |
| ), | |
| name=tx_type, | |
| text=hover_text, | |
| hoverinfo='text' | |
| )) | |
| # Update layout | |
| fig.update_layout( | |
| title='Whale Transaction Timeline', | |
| xaxis_title='Time', | |
| yaxis_title='Transaction Type', | |
| height=400, | |
| template='plotly_white', | |
| showlegend=True, | |
| hovermode='closest' | |
| ) | |
| return fig | |
| except Exception as e: | |
| # If any error occurs, return a figure with error information | |
| print(f"Error creating transaction timeline: {str(e)}") | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title="Error in Transaction Timeline", | |
| xaxis_title="", | |
| yaxis_title="", | |
| height=400, | |
| template="plotly_white" | |
| ) | |
| fig.add_annotation( | |
| text=f"Error generating timeline: {str(e)}", | |
| showarrow=False, | |
| font=dict(size=14, color="red") | |
| ) | |
| return fig | |
| def create_volume_chart(self, transactions_df: pd.DataFrame, time_window: str = 'D') -> go.Figure: | |
| """ | |
| Create a volume chart aggregated by time window | |
| Args: | |
| transactions_df: DataFrame of transactions | |
| time_window: Time window for aggregation (e.g., 'D' for day, 'H' for hour) | |
| Returns: | |
| Plotly figure object | |
| """ | |
| # Create an empty figure with appropriate message if no data | |
| if transactions_df.empty: | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title="No Transaction Data Available", | |
| xaxis_title="Date", | |
| yaxis_title="Volume", | |
| height=400, | |
| template="plotly_white" | |
| ) | |
| fig.add_annotation( | |
| text="No transactions found for volume analysis", | |
| showarrow=False, | |
| font=dict(size=14) | |
| ) | |
| return fig | |
| try: | |
| # Create a deep copy to avoid modifying the original | |
| df = transactions_df.copy() | |
| # Ensure timestamp column exists and convert to datetime | |
| if 'Timestamp' in df.columns: | |
| timestamp_col = 'Timestamp' | |
| elif 'timeStamp' in df.columns: | |
| timestamp_col = 'timeStamp' | |
| else: | |
| # Create a dummy timestamp if none exists | |
| df['dummy_timestamp'] = pd.date_range(start='2025-01-01', periods=len(df), freq='H') | |
| timestamp_col = 'dummy_timestamp' | |
| # Convert timestamp to datetime safely | |
| if not pd.api.types.is_datetime64_any_dtype(df[timestamp_col]): | |
| try: | |
| df[timestamp_col] = pd.to_datetime(df[timestamp_col].astype(float), unit='s') | |
| except Exception as e: | |
| print(f"Error converting timestamp: {str(e)}") | |
| df[timestamp_col] = pd.date_range(start='2025-01-01', periods=len(df), freq='H') | |
| # Ensure amount column exists | |
| if 'Amount' in df.columns: | |
| amount_col = 'Amount' | |
| elif 'tokenAmount' in df.columns: | |
| amount_col = 'tokenAmount' | |
| elif 'value' in df.columns: | |
| # Try to adjust for decimals if 'tokenDecimal' exists | |
| if 'tokenDecimal' in df.columns: | |
| df['adjustedValue'] = df['value'].astype(float) / (10 ** df['tokenDecimal'].astype(int)) | |
| amount_col = 'adjustedValue' | |
| else: | |
| amount_col = 'value' | |
| else: | |
| # Create a dummy amount column if none exists | |
| df['dummy_amount'] = 1.0 | |
| amount_col = 'dummy_amount' | |
| # Alternative approach: manually aggregate by date to avoid index issues | |
| df['date'] = df[timestamp_col].dt.date | |
| # Group by date | |
| volume_data = df.groupby('date').agg({ | |
| amount_col: 'sum', | |
| timestamp_col: 'count' | |
| }).reset_index() | |
| volume_data.columns = ['Date', 'Volume', 'Count'] | |
| # Create figure | |
| fig = go.Figure() | |
| # Add volume bars | |
| fig.add_trace(go.Bar( | |
| x=volume_data['Date'], | |
| y=volume_data['Volume'], | |
| name='Volume', | |
| marker_color='blue', | |
| opacity=0.7 | |
| )) | |
| # Add transaction count line | |
| fig.add_trace(go.Scatter( | |
| x=volume_data['Date'], | |
| y=volume_data['Count'], | |
| name='Transaction Count', | |
| mode='lines+markers', | |
| marker=dict(color='red'), | |
| yaxis='y2' | |
| )) | |
| # Update layout | |
| fig.update_layout( | |
| title="Transaction Volume Over Time", | |
| xaxis_title="Date", | |
| yaxis_title="Volume", | |
| yaxis2=dict( | |
| title="Transaction Count", | |
| overlaying="y", | |
| side="right" | |
| ), | |
| height=500, | |
| template="plotly_white", | |
| hovermode="x unified", | |
| legend=dict( | |
| orientation="h", | |
| yanchor="bottom", | |
| y=1.02, | |
| xanchor="right", | |
| x=1 | |
| ) | |
| ) | |
| return fig | |
| except Exception as e: | |
| # If any error occurs, return a figure with error information | |
| print(f"Error in create_volume_chart: {str(e)}") | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title="Error in Volume Chart", | |
| xaxis_title="", | |
| yaxis_title="", | |
| height=400, | |
| template="plotly_white" | |
| ) | |
| fig.add_annotation( | |
| text=f"Error generating volume chart: {str(e)}", | |
| showarrow=False, | |
| font=dict(size=14, color="red") | |
| ) | |
| return fig | |
| def plot_volume_by_day(self, transactions_df: pd.DataFrame) -> go.Figure: | |
| """ | |
| Create a volume chart aggregated by day with improved visualization | |
| Args: | |
| transactions_df: DataFrame of transactions | |
| Returns: | |
| Plotly figure object | |
| """ | |
| # This is a wrapper around create_volume_chart that specifically uses day as the time window | |
| return self.create_volume_chart(transactions_df, time_window='D') | |
| def plot_transaction_flow(self, transactions_df: pd.DataFrame) -> go.Figure: | |
| """ | |
| Create a network flow visualization of transactions between wallets | |
| Args: | |
| transactions_df: DataFrame of transactions | |
| Returns: | |
| Plotly figure object | |
| """ | |
| if transactions_df.empty: | |
| # Return empty figure if no data | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title="No Transaction Flow Data Available", | |
| xaxis_title="", | |
| yaxis_title="", | |
| height=400, | |
| template="plotly_white" | |
| ) | |
| fig.add_annotation( | |
| text="No transactions found for flow analysis", | |
| showarrow=False, | |
| font=dict(size=14) | |
| ) | |
| return fig | |
| try: | |
| # Ensure from/to columns exist | |
| if 'From' in transactions_df.columns and 'To' in transactions_df.columns: | |
| from_col, to_col = 'From', 'To' | |
| elif 'from' in transactions_df.columns and 'to' in transactions_df.columns: | |
| from_col, to_col = 'from', 'to' | |
| else: | |
| # Create an error visualization | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title="Transaction Flow Error", | |
| xaxis_title="", | |
| yaxis_title="", | |
| height=400, | |
| template="plotly_white" | |
| ) | |
| fig.add_annotation( | |
| text="From/To columns not found in transactions data", | |
| showarrow=False, | |
| font=dict(size=14, color="red") | |
| ) | |
| return fig | |
| # Ensure amount column exists | |
| if 'Amount' in transactions_df.columns: | |
| amount_col = 'Amount' | |
| elif 'tokenAmount' in transactions_df.columns: | |
| amount_col = 'tokenAmount' | |
| elif 'value' in transactions_df.columns: | |
| # Try to adjust for decimals if 'tokenDecimal' exists | |
| if 'tokenDecimal' in transactions_df.columns: | |
| transactions_df['adjustedValue'] = transactions_df['value'].astype(float) / (10 ** transactions_df['tokenDecimal'].astype(int)) | |
| amount_col = 'adjustedValue' | |
| else: | |
| amount_col = 'value' | |
| else: | |
| # Create an error visualization | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title="Transaction Flow Error", | |
| xaxis_title="", | |
| yaxis_title="", | |
| height=400, | |
| template="plotly_white" | |
| ) | |
| fig.add_annotation( | |
| text="Amount column not found in transactions data", | |
| showarrow=False, | |
| font=dict(size=14, color="red") | |
| ) | |
| return fig | |
| # Aggregate flows between wallets | |
| flow_df = transactions_df.groupby([from_col, to_col]).agg({ | |
| amount_col: ['sum', 'count'] | |
| }).reset_index() | |
| flow_df.columns = [from_col, to_col, 'Value', 'Count'] | |
| # Limit to top 20 flows to keep visualization readable | |
| top_flows = flow_df.sort_values('Value', ascending=False).head(20) | |
| # Create Sankey diagram | |
| # First, create a mapping of unique addresses to indices | |
| all_addresses = pd.unique(top_flows[[from_col, to_col]].values.ravel('K')) | |
| address_to_idx = {addr: i for i, addr in enumerate(all_addresses)} | |
| # Create source, target, and value arrays for the Sankey diagram | |
| sources = [address_to_idx[addr] for addr in top_flows[from_col]] | |
| targets = [address_to_idx[addr] for addr in top_flows[to_col]] | |
| values = top_flows['Value'].tolist() | |
| # Create hover text | |
| hover_text = [f"From: {src}<br>To: {tgt}<br>Value: {val:.2f}<br>Count: {cnt}" | |
| for src, tgt, val, cnt in zip(top_flows[from_col], top_flows[to_col], | |
| top_flows['Value'], top_flows['Count'])] | |
| # Shorten addresses for node labels | |
| node_labels = [f"{addr[:6]}...{addr[-4:]}" if len(addr) > 12 else addr | |
| for addr in all_addresses] | |
| # Create Sankey diagram figure | |
| fig = go.Figure(data=[go.Sankey( | |
| node=dict( | |
| pad=15, | |
| thickness=20, | |
| line=dict(color="black", width=0.5), | |
| label=node_labels, | |
| color="blue" | |
| ), | |
| link=dict( | |
| source=sources, | |
| target=targets, | |
| value=values, | |
| label=hover_text, | |
| hovertemplate='%{label}<extra></extra>' | |
| ) | |
| )]) | |
| fig.update_layout( | |
| title="Whale Transaction Flow", | |
| font_size=12, | |
| height=600, | |
| template="plotly_white" | |
| ) | |
| return fig | |
| except Exception as e: | |
| # If any error occurs, return a figure with error information | |
| print(f"Error in plot_transaction_flow: {str(e)}") | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title="Error in Transaction Flow", | |
| xaxis_title="", | |
| yaxis_title="", | |
| height=400, | |
| template="plotly_white" | |
| ) | |
| fig.add_annotation( | |
| text=f"Error generating transaction flow: {str(e)}", | |
| showarrow=False, | |
| font=dict(size=14, color="red") | |
| ) | |
| return fig | |
| def generate_pdf_report(self, | |
| transactions_df: pd.DataFrame, | |
| patterns: List[Dict[str, Any]] = None, | |
| price_impact: Dict[str, Any] = None, | |
| alerts: List[Dict[str, Any]] = None, | |
| title: str = "Whale Analysis Report", | |
| start_date: datetime = None, | |
| end_date: datetime = None) -> bytes: | |
| """ | |
| Generate a PDF report of whale activity | |
| Args: | |
| transactions_df: DataFrame of transactions | |
| patterns: List of pattern dictionaries | |
| price_impact: Dictionary of price impact analysis | |
| alerts: List of alert dictionaries | |
| title: Report title | |
| start_date: Start date for report period | |
| end_date: End date for report period | |
| Returns: | |
| PDF report as bytes | |
| """ | |
| buffer = io.BytesIO() | |
| doc = SimpleDocTemplate(buffer, pagesize=letter) | |
| elements = [] | |
| # Add title | |
| styles = getSampleStyleSheet() | |
| elements.append(Paragraph(title, styles['Title'])) | |
| # Add date range | |
| if start_date and end_date: | |
| date_range = f"Period: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}" | |
| elements.append(Paragraph(date_range, styles['Heading2'])) | |
| elements.append(Spacer(1, 12)) | |
| # Add transaction summary | |
| if not transactions_df.empty: | |
| elements.append(Paragraph("Transaction Summary", styles['Heading2'])) | |
| summary_data = [ | |
| ["Total Transactions", str(len(transactions_df))], | |
| ["Unique Addresses", str(len(pd.unique(transactions_df['from'].tolist() + transactions_df['to'].tolist())))] | |
| ] | |
| # Add token breakdown if available | |
| if 'tokenSymbol' in transactions_df.columns: | |
| token_counts = transactions_df['tokenSymbol'].value_counts() | |
| summary_data.append(["Most Common Token", f"{token_counts.index[0]} ({token_counts.iloc[0]} txns)"]) | |
| summary_table = Table(summary_data) | |
| summary_table.setStyle(TableStyle([ | |
| ('BACKGROUND', (0, 0), (0, -1), colors.lightgrey), | |
| ('GRID', (0, 0), (-1, -1), 1, colors.black), | |
| ('PADDING', (0, 0), (-1, -1), 6), | |
| ])) | |
| elements.append(summary_table) | |
| elements.append(Spacer(1, 12)) | |
| # Add pattern analysis | |
| if patterns: | |
| elements.append(Paragraph("Trading Patterns Detected", styles['Heading2'])) | |
| for i, pattern in enumerate(patterns): | |
| pattern_text = f"Pattern {i+1}: {pattern.get('name', 'Unnamed')}\n" | |
| pattern_text += f"Description: {pattern.get('description', 'No description')}\n" | |
| if 'risk_profile' in pattern: | |
| pattern_text += f"Risk Profile: {pattern['risk_profile']}\n" | |
| if 'confidence' in pattern: | |
| pattern_text += f"Confidence: {pattern['confidence']:.2f}\n" | |
| elements.append(Paragraph(pattern_text, styles['Normal'])) | |
| elements.append(Spacer(1, 6)) | |
| elements.append(Spacer(1, 12)) | |
| # Add price impact analysis | |
| if price_impact: | |
| elements.append(Paragraph("Price Impact Analysis", styles['Heading2'])) | |
| impact_text = "" | |
| if 'avg_impact' in price_impact: | |
| impact_text += f"Average Impact: {price_impact['avg_impact']:.2f}%\n" | |
| if 'max_impact' in price_impact: | |
| impact_text += f"Maximum Impact: {price_impact['max_impact']:.2f}%\n" | |
| if 'insights' in price_impact: | |
| impact_text += f"Insights: {price_impact['insights']}\n" | |
| elements.append(Paragraph(impact_text, styles['Normal'])) | |
| elements.append(Spacer(1, 12)) | |
| # Add alerts | |
| if alerts: | |
| elements.append(Paragraph("Alerts", styles['Heading2'])) | |
| for alert in alerts: | |
| alert_text = f"{alert.get('level', 'Info')}: {alert.get('message', 'No details')}" | |
| elements.append(Paragraph(alert_text, styles['Normal'])) | |
| elements.append(Spacer(1, 6)) | |
| # Build the PDF | |
| doc.build(elements) | |
| buffer.seek(0) | |
| return buffer.getvalue() | |
| def generate_csv_report(self, | |
| transactions_df: pd.DataFrame, | |
| report_type: str = "Transaction Summary") -> str: | |
| """ | |
| Generate a CSV report of transaction data | |
| Args: | |
| transactions_df: DataFrame of transactions | |
| report_type: Type of report to generate | |
| Returns: | |
| CSV data as string | |
| """ | |
| if transactions_df.empty: | |
| return "No data available for report" | |
| if report_type == "Transaction Summary": | |
| # Return basic transaction summary | |
| return transactions_df.to_csv(index=False) | |
| elif report_type == "Daily Volume": | |
| # Get timestamp column | |
| if 'Timestamp' in transactions_df.columns: | |
| timestamp_col = 'Timestamp' | |
| elif 'timeStamp' in transactions_df.columns: | |
| timestamp_col = 'timeStamp' | |
| # Convert timestamp to datetime if needed | |
| if not pd.api.types.is_datetime64_any_dtype(transactions_df[timestamp_col]): | |
| try: | |
| transactions_df[timestamp_col] = pd.to_datetime(transactions_df[timestamp_col].astype(float), unit='s') | |
| except: | |
| return "Error processing timestamp data" | |
| else: | |
| return "Timestamp column not found" | |
| # Get amount column | |
| if 'Amount' in transactions_df.columns: | |
| amount_col = 'Amount' | |
| elif 'tokenAmount' in transactions_df.columns: | |
| amount_col = 'tokenAmount' | |
| elif 'value' in transactions_df.columns: | |
| amount_col = 'value' | |
| else: | |
| return "Amount column not found" | |
| # Aggregate by day | |
| transactions_df['date'] = transactions_df[timestamp_col].dt.date | |
| daily_volume = transactions_df.groupby('date').agg({ | |
| amount_col: 'sum', | |
| 'hash': 'count' # Assuming 'hash' exists for all transactions | |
| }).reset_index() | |
| daily_volume.columns = ['Date', 'Volume', 'Transactions'] | |
| return daily_volume.to_csv(index=False) | |
| else: | |
| return "Unknown report type" | |
| def generate_png_chart(self, | |
| fig: go.Figure, | |
| width: int = 1200, | |
| height: int = 800) -> bytes: | |
| """ | |
| Convert a Plotly figure to PNG image data | |
| Args: | |
| fig: Plotly figure object | |
| width: Image width in pixels | |
| height: Image height in pixels | |
| Returns: | |
| PNG image as bytes | |
| """ | |
| img_bytes = fig.to_image(format="png", width=width, height=height) | |
| return img_bytes | |