Upload folder using huggingface_hub

d195287 verified 25 days ago

61.8 kB

	# model.py (REFACTORED AND FIXED)

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from transformers import AutoModel, LlamaConfig
	from typing import List, Dict, Any, Optional, Tuple
	import os
	import json

	# --- NOW, we import all the encoders ---
	from models.helper_encoders import ContextualTimeEncoder
	from models.token_encoder import TokenEncoder
	from models.wallet_encoder import WalletEncoder
	from models.graph_updater import GraphUpdater
	from models.ohlc_embedder import OHLCEmbedder
	from models.quant_ohlc_embedder import QuantOHLCEmbedder
	from models.HoldersEncoder import HolderDistributionEncoder # NEW
	from models.SocialEncoders import SocialEncoder # NEW
	import models.vocabulary as vocab # For vocab sizes
	from data.context_targets import MOVEMENT_CLASS_NAMES

	class Oracle(nn.Module):
	"""

	"""
	def __init__(self,
	token_encoder: TokenEncoder,
	wallet_encoder: WalletEncoder,
	graph_updater: GraphUpdater,
	ohlc_embedder: OHLCEmbedder, # NEW
	quant_ohlc_embedder: QuantOHLCEmbedder,
	time_encoder: ContextualTimeEncoder,
	num_event_types: int,
	multi_modal_dim: int,
	event_pad_id: int,
	event_type_to_id: Dict[str, int],
	model_config_name: str = "llama3-12l-768d-gqa4-8k-random",
	quantiles: List[float] = [0.1, 0.5, 0.9],
	horizons_seconds: List[int] = [30, 60, 120, 240, 420],
	dtype: torch.dtype = torch.bfloat16):

	super().__init__()

	device = "cuda" if torch.cuda.is_available() else "cpu"
	self.device = torch.device(device)
	self.dtype = dtype
	self.multi_modal_dim = multi_modal_dim


	self.num_event_types = num_event_types
	self.event_pad_id = event_pad_id
	self.model_config_name = model_config_name
	self.quantiles = quantiles
	self.horizons_seconds = horizons_seconds
	self.num_outputs = len(quantiles) * len(horizons_seconds)
	self.num_movement_classes = len(MOVEMENT_CLASS_NAMES)
	self.dtype = dtype

	# --- 2. Backbone: Llama-style decoder, RANDOM INIT (no pretrained weights) ---
	# This gives you RoPE + modern decoder blocks and lets HF use optimized attention
	# implementations (SDPA / FlashAttention) without us implementing a transformer.
	#
	# Size target: ~80-120M params, suitable for 8k-ish seq caps with your data regime.
	attn_impl = os.getenv("HF_ATTN_IMPL", "sdpa") # "sdpa" (safe) or "flash_attention_2" (if installed)
	llama_cfg = LlamaConfig(
	# Model size
	hidden_size=768,
	intermediate_size=3072,
	num_hidden_layers=12,
	num_attention_heads=12,
	# GQA-style KV heads (Llama 3-style efficiency knob)
	num_key_value_heads=4,
	# Long context (must be >= your effective max sequence length)
	max_position_embeddings=8192,
	# Llama 3 uses a large theta; harmless for random init and helps longer contexts.
	rope_theta=500000.0,
	rms_norm_eps=1e-5,
	# Unused when providing inputs_embeds, but required by config
	vocab_size=32000,
	)
	self.d_model = llama_cfg.hidden_size
	# Older transformers versions may not support attn_implementation in from_config.
	# Also, flash_attention_2 requires optional deps; fall back to SDPA if unavailable.
	try:
	self.model = AutoModel.from_config(llama_cfg, attn_implementation=attn_impl)
	except TypeError:
	self.model = AutoModel.from_config(llama_cfg)
	except Exception:
	if attn_impl != "sdpa":
	self.model = AutoModel.from_config(llama_cfg, attn_implementation="sdpa")
	else:
	raise
	# Disable KV cache during training (saves memory; not used for full-seq training).
	if hasattr(self.model, "config"):
	self.model.config.use_cache = False
	self.model.to(self.device, dtype=self.dtype)

	# Quantile prediction head (maps pooled hidden state -> flattened horizon/quantile grid)
	self.quantile_head = nn.Sequential(
	nn.Linear(self.d_model, self.d_model),
	nn.GELU(),
	nn.Linear(self.d_model, self.num_outputs)
	)
	self.quality_head = nn.Sequential(
	nn.Linear(self.d_model, self.d_model),
	nn.GELU(),
	nn.Linear(self.d_model, 1)
	)
	self.movement_head = nn.Sequential(
	nn.Linear(self.d_model, self.d_model),
	nn.GELU(),
	nn.Linear(self.d_model, len(self.horizons_seconds) * self.num_movement_classes)
	)

	self.event_type_to_id = event_type_to_id

	# --- 1. Store All Encoders ---
	# Define Token Roles before using them
	self.token_roles = {'main': 0, 'quote': 1, 'trending': 2} # Add trending for future use
	self.main_token_role_id = self.token_roles['main']
	self.quote_token_role_id = self.token_roles['quote']
	self.trending_token_role_id = self.token_roles['trending']


	self.token_encoder = token_encoder
	self.wallet_encoder = wallet_encoder
	self.graph_updater = graph_updater
	self.ohlc_embedder = ohlc_embedder
	self.quant_ohlc_embedder = quant_ohlc_embedder
	self.time_encoder = time_encoder # Store time_encoder

	self.social_encoder = SocialEncoder(d_model=self.d_model, dtype=self.dtype) # Now self.d_model is defined

	# --- 4. Define Sequence Feature Embeddings ---
	self.event_type_embedding = nn.Embedding(num_event_types, self.d_model, padding_idx=event_pad_id)

	# --- NEW: Token Role Embeddings ---
	self.token_role_embedding = nn.Embedding(len(self.token_roles), self.d_model)



	# --- 5. Define Entity Padding (Learnable) ---
	self.pad_wallet_emb = nn.Parameter(torch.zeros(1, self.wallet_encoder.d_model))
	self.pad_token_emb = nn.Parameter(torch.zeros(1, self.token_encoder.output_dim))
	self.pad_ohlc_emb = nn.Parameter(torch.zeros(1, self.quant_ohlc_embedder.output_dim))
	self.pad_precomputed_emb = nn.Parameter(torch.zeros(1, self.multi_modal_dim)) # NEW: For text/images

	# --- NEW: Instantiate HolderDistributionEncoder internally ---
	self.holder_dist_encoder = HolderDistributionEncoder(
	wallet_embedding_dim=self.wallet_encoder.d_model,
	output_dim=self.d_model,
	dtype=self.dtype # Pass the correct dtype
	)
	self.pad_holder_snapshot_emb = nn.Parameter(torch.zeros(1, self.d_model)) # Output of holder_dist_encoder is d_model

	# --- 6. Define Projection MLPs ---
	self.time_proj = nn.Linear(self.time_encoder.projection.out_features, self.d_model)
	self.rel_ts_proj = nn.Linear(1, self.d_model)
	self.rel_ts_norm = nn.LayerNorm(1)
	self.wallet_proj = nn.Linear(self.wallet_encoder.d_model, self.d_model)
	self.token_proj = nn.Linear(self.token_encoder.output_dim, self.d_model)
	self.ohlc_proj = nn.Linear(self.quant_ohlc_embedder.output_dim, self.d_model)
	self.chart_interval_fusion_embedding = nn.Embedding(vocab.NUM_OHLC_INTERVALS, 32, padding_idx=0)
	fusion_input_dim = self.ohlc_embedder.output_dim + self.quant_ohlc_embedder.output_dim + 32
	self.chart_fusion = nn.Sequential(
	nn.Linear(fusion_input_dim, self.quant_ohlc_embedder.output_dim),
	nn.GELU(),
	nn.LayerNorm(self.quant_ohlc_embedder.output_dim),
	nn.Linear(self.quant_ohlc_embedder.output_dim, self.quant_ohlc_embedder.output_dim),
	nn.LayerNorm(self.quant_ohlc_embedder.output_dim),
	)
	# self.holder_snapshot_proj is no longer needed as HolderDistributionEncoder outputs directly to d_model


	# --- NEW: Layers for Transfer Numerical Features ---
	self.transfer_num_norm = nn.LayerNorm(4) # Normalize the 4 features
	self.transfer_num_proj = nn.Linear(4, self.d_model) # Project to d_model

	# --- NEW: Layers for Trade Numerical Features ---
	# --- FIXED: Size reduced from 10 to 8 ---
	self.trade_num_norm = nn.LayerNorm(8)
	self.trade_num_proj = nn.Linear(8, self.d_model)
	# --- NEW: Embedding for categorical dex_platform_id ---
	self.dex_platform_embedding = nn.Embedding(vocab.NUM_DEX_PLATFORMS, self.d_model)
	# --- NEW: Embedding for categorical trade_direction ---
	self.trade_direction_embedding = nn.Embedding(2, self.d_model) # 0 for buy, 1 for sell
	# --- FIXED: Embedding for categorical mev_protection is now binary ---
	self.mev_protection_embedding = nn.Embedding(2, self.d_model) # 0 for false, 1 for true
	# --- NEW: Embedding for categorical is_bundle ---
	self.is_bundle_embedding = nn.Embedding(2, self.d_model) # 0 for false, 1 for true

	# --- NEW: Separate Layers for Deployer Trade Numerical Features ---
	# --- FIXED: Size reduced from 10 to 8 ---
	self.deployer_trade_num_norm = nn.LayerNorm(8)
	self.deployer_trade_num_proj = nn.Linear(8, self.d_model)

	# --- NEW: Separate Layers for Smart Wallet Trade Numerical Features ---
	# --- FIXED: Size reduced from 10 to 8 ---
	self.smart_wallet_trade_num_norm = nn.LayerNorm(8)
	self.smart_wallet_trade_num_proj = nn.Linear(8, self.d_model)

	# --- NEW: Layers for PoolCreated Numerical Features ---
	# --- FIXED: Size reduced from 5 to 4 ---
	self.pool_created_num_norm = nn.LayerNorm(2)
	self.pool_created_num_proj = nn.Linear(2, self.d_model)

	# --- NEW: Layers for LiquidityChange Numerical Features ---
	# --- FIXED: Size reduced from 3 to 2 ---
	self.liquidity_change_num_norm = nn.LayerNorm(1)
	self.liquidity_change_num_proj = nn.Linear(1, self.d_model)
	# --- NEW: Embedding for categorical change_type_id ---
	# --- FIXED: Hardcoded the number of types (add/remove) as per user instruction ---
	self.liquidity_change_type_embedding = nn.Embedding(2, self.d_model)

	# --- NEW: Layers for FeeCollected Numerical Features ---
	self.fee_collected_num_norm = nn.LayerNorm(1) # sol_amount only
	self.fee_collected_num_proj = nn.Linear(1, self.d_model)

	# --- NEW: Layers for TokenBurn Numerical Features ---
	self.token_burn_num_norm = nn.LayerNorm(2) # amount_pct, amount_tokens
	self.token_burn_num_proj = nn.Linear(2, self.d_model)

	# --- NEW: Layers for SupplyLock Numerical Features ---
	self.supply_lock_num_norm = nn.LayerNorm(2) # amount_pct, lock_duration
	self.supply_lock_num_proj = nn.Linear(2, self.d_model)

	# --- NEW: Layers for OnChain_Snapshot Numerical Features ---
	self.onchain_snapshot_num_norm = nn.LayerNorm(14)
	self.onchain_snapshot_num_proj = nn.Linear(14, self.d_model)

	# --- NEW: Layers for TrendingToken Numerical Features ---
	# --- FIXED: Size reduced from 3 to 1 (rank only) ---
	self.trending_token_num_norm = nn.LayerNorm(1)
	self.trending_token_num_proj = nn.Linear(1, self.d_model)
	# --- NEW: Embeddings for categorical IDs ---
	self.trending_list_source_embedding = nn.Embedding(vocab.NUM_TRENDING_LIST_SOURCES, self.d_model)
	self.trending_timeframe_embedding = nn.Embedding(vocab.NUM_TRENDING_LIST_TIMEFRAMES, self.d_model)

	# --- NEW: Layers for BoostedToken Numerical Features ---
	self.boosted_token_num_norm = nn.LayerNorm(2) # total_boost_amount, rank
	self.boosted_token_num_proj = nn.Linear(2, self.d_model)

	# --- NEW: Layers for DexBoost_Paid Numerical Features ---
	self.dexboost_paid_num_norm = nn.LayerNorm(2) # amount, total_amount_on_token
	self.dexboost_paid_num_proj = nn.Linear(2, self.d_model)

	# --- NEW: Layers for DexProfile_Updated Features ---
	self.dexprofile_updated_flags_proj = nn.Linear(4, self.d_model) # Project the 4 boolean flags

	# --- NEW: Projection for all pre-computed embeddings (text/images) ---
	self.precomputed_proj = nn.Linear(self.multi_modal_dim, self.d_model)

	# --- NEW: Embedding for Protocol IDs (used in Migrated event) ---
	self.protocol_embedding = nn.Embedding(vocab.NUM_PROTOCOLS, self.d_model)

	# --- NEW: Embeddings for TrackerEncoder Events ---
	# Note: NUM_CALL_CHANNELS might need to be large and managed as vocab grows.
	self.alpha_group_embedding = nn.Embedding(vocab.NUM_ALPHA_GROUPS, self.d_model)
	self.call_channel_embedding = nn.Embedding(vocab.NUM_CALL_CHANNELS, self.d_model)
	self.cex_listing_embedding = nn.Embedding(vocab.NUM_EXCHANGES, self.d_model)

	# --- NEW: Layers for GlobalTrendingEncoder Events ---
	self.global_trending_num_norm = nn.LayerNorm(1) # rank
	self.global_trending_num_proj = nn.Linear(1, self.d_model)

	# --- NEW: Layers for ChainSnapshot Events ---
	self.chainsnapshot_num_norm = nn.LayerNorm(2) # native_token_price_usd, gas_fee
	self.chainsnapshot_num_proj = nn.Linear(2, self.d_model)

	# --- NEW: Layers for Lighthouse_Snapshot Events ---
	# --- FIXED: Size reduced from 7 to 5 ---
	self.lighthousesnapshot_num_norm = nn.LayerNorm(5)
	self.lighthousesnapshot_num_proj = nn.Linear(5, self.d_model)
	# --- NEW: Embedding for timeframe ID (re-uses protocol_embedding) ---
	self.lighthouse_timeframe_embedding = nn.Embedding(vocab.NUM_LIGHTHOUSE_TIMEFRAMES, self.d_model)

	# --- Embeddings for Special Context Tokens ---
	# Must match vocabulary event names (see models/vocabulary.py).
	self.special_context_tokens = {'MIDDLE': 0, 'RECENT': 1}
	self.special_context_embedding = nn.Embedding(len(self.special_context_tokens), self.d_model)


	# --- 7. Prediction Head --- (Unchanged)
	# self.prediction_head = nn.Linear(self.d_model, self.num_outputs)

	# --- 8. Move all new modules to correct dtype ---
	self.to(dtype)
	print("Oracle model (full pipeline) initialized.")

	def save_pretrained(self, save_directory: str):
	"""
	Saves the model in a Hugging Face-compatible way.
	"""
	if not os.path.exists(save_directory):
	os.makedirs(save_directory)

	# 1. Save the inner transformer model using its own save_pretrained
	# This gives us the standard HF config.json and pytorch_model.bin for the backbone
	self.model.save_pretrained(save_directory)

	# 2. Save the whole Oracle state dict (includes transformer + all custom encoders)
	# We use 'oracle_model.bin' for the full state.
	torch.save(self.state_dict(), os.path.join(save_directory, "pytorch_model.bin"))

	# 3. Save Oracle specific metadata for reconstruction
	oracle_config = {
	"num_event_types": self.num_event_types,
	"multi_modal_dim": self.multi_modal_dim,
	"event_pad_id": self.event_pad_id,
	"model_config_name": self.model_config_name,
	"quantiles": self.quantiles,
	"horizons_seconds": self.horizons_seconds,
	"dtype": str(self.dtype),
	"event_type_to_id": self.event_type_to_id
	}
	with open(os.path.join(save_directory, "oracle_config.json"), "w") as f:
	json.dump(oracle_config, f, indent=2)

	print(f"✅ Oracle model saved to {save_directory}")

	@classmethod
	def from_pretrained(cls, load_directory: str,
	token_encoder, wallet_encoder, graph_updater, ohlc_embedder, quant_ohlc_embedder, time_encoder):
	"""
	Loads the Oracle model from a saved directory.
	Note: You must still provide the initialized sub-encoders (or we can refactor to save them too).
	"""
	config_path = os.path.join(load_directory, "oracle_config.json")
	with open(config_path, "r") as f:
	config = json.load(f)

	# Determine dtype from string
	dtype = torch.bfloat16 # Default
	if "float32" in config["dtype"]: dtype = torch.float32
	elif "float16" in config["dtype"]: dtype = torch.float16

	# Instantiate model
	model = cls(
	token_encoder=token_encoder,
	wallet_encoder=wallet_encoder,
	graph_updater=graph_updater,
	ohlc_embedder=ohlc_embedder,
	quant_ohlc_embedder=quant_ohlc_embedder,
	time_encoder=time_encoder,
	num_event_types=config["num_event_types"],
	multi_modal_dim=config["multi_modal_dim"],
	event_pad_id=config["event_pad_id"],
	event_type_to_id=config["event_type_to_id"],
	model_config_name=config["model_config_name"],
	quantiles=config["quantiles"],
	horizons_seconds=config["horizons_seconds"],
	dtype=dtype
	)

	# Load weights
	weight_path = os.path.join(load_directory, "pytorch_model.bin")
	state_dict = torch.load(weight_path, map_location="cpu")
	model.load_state_dict(state_dict)
	print(f"✅ Oracle model loaded from {load_directory}")
	return model

	def _normalize_and_project(self,
	features: torch.Tensor,
	norm_layer: nn.LayerNorm,
	proj_layer: nn.Linear,
	log_indices: Optional[List[int]] = None) -> torch.Tensor:
	"""
	A helper function to selectively apply log scaling, then normalize and project.
	"""
	processed_features = torch.nan_to_num(
	features.to(torch.float32),
	nan=0.0,
	posinf=1e6,
	neginf=-1e6
	)

	# Apply log scaling only to specified indices
	if log_indices:
	# Ensure log_indices are valid
	valid_indices = [i for i in log_indices if i < processed_features.shape[-1]]
	if valid_indices:
	log_features = processed_features[:, :, valid_indices]
	log_scaled = torch.sign(log_features) * torch.log1p(torch.abs(log_features))
	processed_features[:, :, valid_indices] = log_scaled

	# Normalize and project the entire feature set
	norm_dtype = norm_layer.weight.dtype
	proj_dtype = proj_layer.weight.dtype
	normed_features = norm_layer(processed_features.to(norm_dtype))
	normed_features = torch.nan_to_num(normed_features, nan=0.0, posinf=0.0, neginf=0.0)
	return proj_layer(normed_features.to(proj_dtype))

	def _run_snapshot_encoders(self,
	batch: Dict[str, Any],
	final_wallet_embeddings_raw: torch.Tensor,
	wallet_addr_to_batch_idx: Dict[str, int]) -> Dict[str, torch.Tensor]:
	"""
	Runs snapshot-style encoders that process raw data into embeddings.
	This is now truly end-to-end.
	"""
	device = self.device
	all_holder_snapshot_embeds = []

	# Iterate through each HolderSnapshot event's raw data
	for raw_holder_list in batch['holder_snapshot_raw_data']:
	processed_holder_data = []
	for holder in raw_holder_list:
	wallet_addr = holder['wallet']
	# Get the graph-updated wallet embedding using its index
	wallet_idx = wallet_addr_to_batch_idx.get(wallet_addr, 0) # 0 is padding
	if wallet_idx > 0: # If it's a valid wallet
	wallet_embedding = final_wallet_embeddings_raw[wallet_idx - 1] # Adjust for 1-based indexing
	processed_holder_data.append({
	'wallet_embedding': wallet_embedding,
	'pct': holder['holding_pct']
	})
	# Pass the processed data to the HolderDistributionEncoder
	all_holder_snapshot_embeds.append(self.holder_dist_encoder(processed_holder_data))

	return {"holder_snapshot": torch.cat(all_holder_snapshot_embeds, dim=0) if all_holder_snapshot_embeds else torch.empty(0, self.d_model, device=device, dtype=self.dtype)}


	def _run_dynamic_encoders(self, batch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
	"""
	Runs all dynamic encoders and returns a dictionary of raw, unprojected embeddings.
	"""
	device = self.device
	# --- NEW: Get pre-computed embedding indices ---
	token_encoder_inputs = batch['token_encoder_inputs']
	wallet_encoder_inputs = batch['wallet_encoder_inputs']
	# The pre-computed embedding pool for the whole batch
	embedding_pool = torch.nan_to_num(
	batch['embedding_pool'].to(device, self.dtype),
	nan=0.0,
	posinf=0.0,
	neginf=0.0
	)

	ohlc_price_tensors = torch.nan_to_num(
	batch['ohlc_price_tensors'].to(device, self.dtype),
	nan=0.0,
	posinf=0.0,
	neginf=0.0
	)
	ohlc_interval_ids = batch['ohlc_interval_ids'].to(device)
	quant_ohlc_feature_tensors = torch.nan_to_num(
	batch['quant_ohlc_feature_tensors'].to(device, self.dtype),
	nan=0.0,
	posinf=0.0,
	neginf=0.0
	)
	quant_ohlc_feature_mask = batch['quant_ohlc_feature_mask'].to(device)
	quant_ohlc_feature_version_ids = batch['quant_ohlc_feature_version_ids'].to(device)
	graph_updater_links = batch['graph_updater_links']

	# 1a. Encode Tokens
	# --- FIXED: Check for a key that still exists ---
	if token_encoder_inputs['name_embed_indices'].numel() > 0:
	# --- NEW: Gather pre-computed embeddings and pass to encoder ---
	# --- CRITICAL FIX: Remove keys that are not part of the TokenEncoder's signature ---
	encoder_args = token_encoder_inputs.copy()
	encoder_args.pop('_addresses_for_lookup', None) # This key is for the WalletEncoder
	encoder_args.pop('name_embed_indices', None)
	encoder_args.pop('symbol_embed_indices', None)
	encoder_args.pop('image_embed_indices', None)

	# --- SAFETY: Create a padded view of the embedding pool and map missing indices (-1) to pad ---
	if embedding_pool.numel() > 0:
	pad_row = torch.zeros(1, embedding_pool.size(1), device=device, dtype=embedding_pool.dtype)
	pool_padded = torch.cat([pad_row, embedding_pool], dim=0)
	def pad_and_lookup(idx_tensor: torch.Tensor) -> torch.Tensor:
	# Map valid indices >=0 to +1 (shift), invalid (<0) to 0 (pad)
	shifted = torch.where(idx_tensor >= 0, idx_tensor + 1, torch.zeros_like(idx_tensor))
	return F.embedding(shifted, pool_padded)
	name_embeds = pad_and_lookup(token_encoder_inputs['name_embed_indices'])
	symbol_embeds = pad_and_lookup(token_encoder_inputs['symbol_embed_indices'])
	image_embeds = pad_and_lookup(token_encoder_inputs['image_embed_indices'])
	else:
	# Empty pool: provide zeros with correct shapes
	n = token_encoder_inputs['name_embed_indices'].shape[0]
	d = self.multi_modal_dim
	zeros = torch.zeros(n, d, device=device, dtype=self.dtype)
	name_embeds = zeros
	symbol_embeds = zeros
	image_embeds = zeros

	batch_token_embeddings_unupd = self.token_encoder(
	name_embeds=name_embeds,
	symbol_embeds=symbol_embeds,
	image_embeds=image_embeds,
	# Pass all other keys like protocol_ids, is_vanity_flags, etc.
	**encoder_args
	)
	else:
	batch_token_embeddings_unupd = torch.empty(0, self.token_encoder.output_dim, device=device, dtype=self.dtype)

	# 1b. Encode Wallets
	if wallet_encoder_inputs['profile_rows']:
	temp_token_lookup = {
	addr: batch_token_embeddings_unupd[i]
	for i, addr in enumerate(batch['token_encoder_inputs']['_addresses_for_lookup']) # Use helper key
	}
	initial_wallet_embeddings = self.wallet_encoder(
	**wallet_encoder_inputs,
	token_vibe_lookup=temp_token_lookup,
	embedding_pool=embedding_pool
	)
	else:
	initial_wallet_embeddings = torch.empty(0, self.wallet_encoder.d_model, device=device, dtype=self.dtype)

	# 1c. Encode OHLC
	if ohlc_price_tensors.shape[0] > 0:
	raw_chart_embeddings = self.ohlc_embedder(ohlc_price_tensors, ohlc_interval_ids)
	else:
	raw_chart_embeddings = torch.empty(0, self.ohlc_embedder.output_dim, device=device, dtype=self.dtype)
	if quant_ohlc_feature_tensors.shape[0] > 0:
	quant_chart_embeddings = self.quant_ohlc_embedder(
	quant_ohlc_feature_tensors,
	quant_ohlc_feature_mask,
	quant_ohlc_feature_version_ids,
	)
	else:
	quant_chart_embeddings = torch.empty(0, self.quant_ohlc_embedder.output_dim, device=device, dtype=self.dtype)
	num_chart_segments = max(raw_chart_embeddings.shape[0], quant_chart_embeddings.shape[0])
	if num_chart_segments > 0:
	if raw_chart_embeddings.shape[0] == 0:
	raw_chart_embeddings = torch.zeros(
	num_chart_segments,
	self.ohlc_embedder.output_dim,
	device=device,
	dtype=self.dtype,
	)
	if quant_chart_embeddings.shape[0] == 0:
	quant_chart_embeddings = torch.zeros(
	num_chart_segments,
	self.quant_ohlc_embedder.output_dim,
	device=device,
	dtype=self.dtype,
	)
	interval_embeds = self.chart_interval_fusion_embedding(ohlc_interval_ids[:num_chart_segments]).to(self.dtype)
	batch_ohlc_embeddings_raw = self.chart_fusion(
	torch.cat([raw_chart_embeddings, quant_chart_embeddings, interval_embeds], dim=-1)
	)
	else:
	batch_ohlc_embeddings_raw = torch.empty(0, self.quant_ohlc_embedder.output_dim, device=device, dtype=self.dtype)

	# 1d. Run Graph Updater
	pad_wallet_raw = self.pad_wallet_emb.to(self.dtype)
	pad_token_raw = self.pad_token_emb.to(self.dtype)
	padded_wallet_tensor = torch.cat([pad_wallet_raw, initial_wallet_embeddings], dim=0)
	padded_token_tensor = torch.cat([pad_token_raw, batch_token_embeddings_unupd], dim=0)

	x_dict_initial = {}
	if padded_wallet_tensor.shape[0] > 1: x_dict_initial['wallet'] = padded_wallet_tensor
	if padded_token_tensor.shape[0] > 1: x_dict_initial['token'] = padded_token_tensor

	if x_dict_initial and graph_updater_links:
	final_entity_embeddings_dict = self.graph_updater(x_dict_initial, graph_updater_links)
	final_padded_wallet_embs = final_entity_embeddings_dict.get('wallet', padded_wallet_tensor)
	final_padded_token_embs = final_entity_embeddings_dict.get('token', padded_token_tensor)
	else:
	final_padded_wallet_embs = padded_wallet_tensor
	final_padded_token_embs = padded_token_tensor

	# Strip padding before returning
	final_wallet_embeddings_raw = final_padded_wallet_embs[1:]
	final_token_embeddings_raw = final_padded_token_embs[1:]

	return {
	"wallet": final_wallet_embeddings_raw,
	"token": final_token_embeddings_raw,
	"ohlc": batch_ohlc_embeddings_raw
	}

	def _project_and_gather_embeddings(self, raw_embeds: Dict[str, torch.Tensor], batch: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
	"""
	Projects raw embeddings to d_model and gathers them into sequence-aligned tensors.
	"""
	# Project raw embeddings to d_model
	final_wallet_proj = self.wallet_proj(raw_embeds['wallet'])
	final_token_proj = self.token_proj(raw_embeds['token'])
	final_ohlc_proj = self.ohlc_proj(raw_embeds['ohlc'])

	# Project padding embeddings to d_model
	pad_wallet = self.wallet_proj(self.pad_wallet_emb.to(self.dtype))
	pad_token = self.token_proj(self.pad_token_emb.to(self.dtype))
	pad_ohlc = self.ohlc_proj(self.pad_ohlc_emb.to(self.dtype))
	pad_holder_snapshot = self.pad_holder_snapshot_emb.to(self.dtype) # Already d_model

	# --- NEW: Project pre-computed embeddings and create lookup ---
	precomputed_pool = torch.nan_to_num(
	batch['embedding_pool'].to(self.device, self.dtype),
	nan=0.0,
	posinf=0.0,
	neginf=0.0
	)
	final_precomputed_proj = self.precomputed_proj(precomputed_pool)
	pad_precomputed = self.precomputed_proj(self.pad_precomputed_emb.to(self.dtype))
	final_precomputed_lookup = torch.cat([pad_precomputed, final_precomputed_proj], dim=0)

	# Create final lookup tables with padding at index 0
	final_wallet_lookup = torch.cat([pad_wallet, final_wallet_proj], dim=0)
	final_token_lookup = torch.cat([pad_token, final_token_proj], dim=0)
	final_ohlc_lookup = torch.cat([pad_ohlc, final_ohlc_proj], dim=0)


	# --- NEW: Add Role Embeddings ---
	main_role_emb = self.token_role_embedding(torch.tensor(self.main_token_role_id, device=self.device))
	quote_role_emb = self.token_role_embedding(torch.tensor(self.quote_token_role_id, device=self.device))
	trending_role_emb = self.token_role_embedding(torch.tensor(self.trending_token_role_id, device=self.device))

	# Gather base embeddings
	gathered_main_token_embs = F.embedding(batch['token_indices'], final_token_lookup)
	gathered_quote_token_embs = F.embedding(batch['quote_token_indices'], final_token_lookup)
	gathered_trending_token_embs = F.embedding(batch['trending_token_indices'], final_token_lookup)
	gathered_boosted_token_embs = F.embedding(batch['boosted_token_indices'], final_token_lookup)

	# --- NEW: Handle HolderSnapshot ---
	final_holder_snapshot_lookup = torch.cat([pad_holder_snapshot, raw_embeds['holder_snapshot']], dim=0)

	# Gather embeddings for each event in the sequence
	return {
	"wallet": F.embedding(batch['wallet_indices'], final_wallet_lookup),
	"token": gathered_main_token_embs, # This is the baseline, no role needed
	"ohlc": F.embedding(batch['ohlc_indices'], final_ohlc_lookup),
	"original_author": F.embedding(batch['original_author_indices'], final_wallet_lookup), # NEW
	"dest_wallet": F.embedding(batch['dest_wallet_indices'], final_wallet_lookup), # Also gather dest wallet
	"quote_token": gathered_quote_token_embs + quote_role_emb,
	"trending_token": gathered_trending_token_embs + trending_role_emb,
	"boosted_token": gathered_boosted_token_embs + trending_role_emb, # Same role as trending
	"holder_snapshot": F.embedding(batch['holder_snapshot_indices'], final_holder_snapshot_lookup), # NEW
	"precomputed": final_precomputed_lookup # NEW: Pass the full lookup table
	}

	def _get_transfer_specific_embeddings(self, batch: Dict[str, torch.Tensor], gathered_embeds: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for Transfer/LargeTransfer events.
	"""
	device = self.device
	transfer_numerical_features = batch['transfer_numerical_features']
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Selectively log-scale features ---
	# Log scale: token_amount (idx 0), priority_fee (idx 3)
	# Linear scale: transfer_pct_of_total_supply (idx 1), transfer_pct_of_holding (idx 2)
	projected_transfer_features = self._normalize_and_project(
	transfer_numerical_features, self.transfer_num_norm, self.transfer_num_proj, log_indices=[0, 3]
	)
	# Create a mask for Transfer/LargeTransfer events
	transfer_event_ids = [self.event_type_to_id.get('Transfer', -1), self.event_type_to_id.get('LargeTransfer', -1)] # ADDED LargeTransfer
	transfer_mask = torch.isin(event_type_ids, torch.tensor(transfer_event_ids, device=device)).unsqueeze(-1)

	# Combine destination wallet and numerical features, then apply mask
	return (gathered_embeds['dest_wallet'] + projected_transfer_features) * transfer_mask

	def _get_trade_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for Trade events.
	"""
	device = self.device
	trade_numerical_features = batch['trade_numerical_features']
	trade_dex_ids = batch['trade_dex_ids'] # NEW
	trade_direction_ids = batch['trade_direction_ids']
	trade_mev_protection_ids = batch['trade_mev_protection_ids'] # NEW
	trade_is_bundle_ids = batch['trade_is_bundle_ids'] # NEW
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Selectively log-scale features ---
	# Log scale: sol_amount (idx 0), priority_fee (idx 1), total_usd (idx 7)
	# Linear scale: pcts, slippage, price_impact, success flags
	projected_trade_features = self._normalize_and_project(
	trade_numerical_features, self.trade_num_norm, self.trade_num_proj, log_indices=[0, 1, 7]
	)

	# --- CORRECTED: This layer now handles both generic and large trades ---
	trade_event_names = ['Trade', 'LargeTrade']
	trade_event_ids = [self.event_type_to_id.get(name, -1) for name in trade_event_names]

	# Create mask where event_type_id is one of the trade event ids
	trade_mask = torch.isin(event_type_ids, torch.tensor(trade_event_ids, device=device)).unsqueeze(-1)

	# --- NEW: Get embedding for the categorical dex_id ---
	dex_id_embeds = self.dex_platform_embedding(trade_dex_ids)
	direction_embeds = self.trade_direction_embedding(trade_direction_ids)
	mev_embeds = self.mev_protection_embedding(trade_mev_protection_ids) # NEW
	bundle_embeds = self.is_bundle_embedding(trade_is_bundle_ids) # NEW

	return (projected_trade_features + dex_id_embeds + direction_embeds + mev_embeds + bundle_embeds) * trade_mask

	def _get_deployer_trade_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for Deployer_Trade events using its own layers.
	"""
	device = self.device
	deployer_trade_numerical_features = batch['deployer_trade_numerical_features']
	trade_dex_ids = batch['trade_dex_ids'] # NEW: Re-use the same ID tensor
	trade_direction_ids = batch['trade_direction_ids']
	trade_mev_protection_ids = batch['trade_mev_protection_ids'] # NEW
	trade_is_bundle_ids = batch['trade_is_bundle_ids'] # NEW
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Selectively log-scale features ---
	# Log scale: sol_amount (idx 0), priority_fee (idx 1), total_usd (idx 7)
	projected_deployer_trade_features = self._normalize_and_project(
	deployer_trade_numerical_features, self.deployer_trade_num_norm, self.deployer_trade_num_proj, log_indices=[0, 1, 7]
	)

	dex_id_embeds = self.dex_platform_embedding(trade_dex_ids)
	direction_embeds = self.trade_direction_embedding(trade_direction_ids)
	mev_embeds = self.mev_protection_embedding(trade_mev_protection_ids) # NEW
	bundle_embeds = self.is_bundle_embedding(trade_is_bundle_ids) # NEW

	deployer_trade_mask = (event_type_ids == self.event_type_to_id.get('Deployer_Trade', -1)).unsqueeze(-1)
	return (projected_deployer_trade_features + dex_id_embeds + direction_embeds + mev_embeds + bundle_embeds) * deployer_trade_mask

	def _get_smart_wallet_trade_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for SmartWallet_Trade events using its own layers.
	"""
	device = self.device
	smart_wallet_trade_numerical_features = batch['smart_wallet_trade_numerical_features']
	trade_dex_ids = batch['trade_dex_ids'] # NEW: Re-use the same ID tensor
	trade_direction_ids = batch['trade_direction_ids']
	trade_mev_protection_ids = batch['trade_mev_protection_ids'] # NEW
	trade_is_bundle_ids = batch['trade_is_bundle_ids'] # NEW
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Selectively log-scale features ---
	# Log scale: sol_amount (idx 0), priority_fee (idx 1), total_usd (idx 7)
	projected_features = self._normalize_and_project(
	smart_wallet_trade_numerical_features, self.smart_wallet_trade_num_norm, self.smart_wallet_trade_num_proj, log_indices=[0, 1, 7]
	)

	dex_id_embeds = self.dex_platform_embedding(trade_dex_ids)
	direction_embeds = self.trade_direction_embedding(trade_direction_ids)
	mev_embeds = self.mev_protection_embedding(trade_mev_protection_ids) # NEW
	bundle_embeds = self.is_bundle_embedding(trade_is_bundle_ids) # NEW

	mask = (event_type_ids == self.event_type_to_id.get('SmartWallet_Trade', -1)).unsqueeze(-1)
	return (projected_features + dex_id_embeds + direction_embeds + mev_embeds + bundle_embeds) * mask

	def _get_pool_created_specific_embeddings(self, batch: Dict[str, torch.Tensor], gathered_embeds: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for PoolCreated events.
	"""
	device = self.device
	pool_created_numerical_features = batch['pool_created_numerical_features']
	pool_created_protocol_ids = batch['pool_created_protocol_ids'] # NEW
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Selectively log-scale features ---
	# Log scale: base_amount (idx 0), quote_amount (idx 1)
	# Linear scale: pcts (idx 2, 3)
	projected_features = self._normalize_and_project(
	pool_created_numerical_features, self.pool_created_num_norm, self.pool_created_num_proj, log_indices=[0, 1]
	)
	# --- NEW: Get embedding for the categorical protocol_id ---
	protocol_id_embeds = self.protocol_embedding(pool_created_protocol_ids)

	# Create mask for the event
	mask = (event_type_ids == self.event_type_to_id.get('PoolCreated', -1)).unsqueeze(-1)

	# Combine Quote Token embedding with projected numericals
	return (gathered_embeds['quote_token'] + projected_features + protocol_id_embeds) * mask

	def _get_liquidity_change_specific_embeddings(self, batch: Dict[str, torch.Tensor], gathered_embeds: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for LiquidityChange events.
	"""
	device = self.device
	liquidity_change_numerical_features = batch['liquidity_change_numerical_features']
	liquidity_change_type_ids = batch['liquidity_change_type_ids'] # NEW
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Selectively log-scale features ---
	# Log scale: quote_amount (idx 0)
	projected_features = self._normalize_and_project(
	liquidity_change_numerical_features, self.liquidity_change_num_norm, self.liquidity_change_num_proj, log_indices=[0]
	)
	# --- NEW: Get embedding for the categorical change_type_id ---
	change_type_embeds = self.liquidity_change_type_embedding(liquidity_change_type_ids)

	# Create mask for the event
	mask = (event_type_ids == self.event_type_to_id.get('LiquidityChange', -1)).unsqueeze(-1)

	# Combine Quote Token embedding with projected numericals
	return (gathered_embeds['quote_token'] + projected_features + change_type_embeds) * mask

	def _get_fee_collected_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for FeeCollected events.
	"""
	device = self.device
	fee_collected_numerical_features = batch['fee_collected_numerical_features']
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Single amount, log-scale ---
	projected_features = self._normalize_and_project(
	fee_collected_numerical_features, self.fee_collected_num_norm, self.fee_collected_num_proj, log_indices=[0]
	)

	# Create mask for the event
	mask = (event_type_ids == self.event_type_to_id.get('FeeCollected', -1)).unsqueeze(-1)

	return projected_features * mask

	def _get_token_burn_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for TokenBurn events.
	"""
	device = self.device
	token_burn_numerical_features = batch['token_burn_numerical_features']
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Selectively log-scale features ---
	# Log scale: amount_tokens_burned (idx 1)
	# Linear scale: amount_pct_of_total_supply (idx 0)
	projected_features = self._normalize_and_project(
	token_burn_numerical_features, self.token_burn_num_norm, self.token_burn_num_proj, log_indices=[1]
	)
	# Create mask for the event
	mask = (event_type_ids == self.event_type_to_id.get('TokenBurn', -1)).unsqueeze(-1)

	return projected_features * mask

	def _get_supply_lock_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for SupplyLock events.
	"""
	device = self.device
	supply_lock_numerical_features = batch['supply_lock_numerical_features']
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Selectively log-scale features ---
	# Log scale: lock_duration (idx 1)
	# Linear scale: amount_pct_of_total_supply (idx 0)
	projected_features = self._normalize_and_project(
	supply_lock_numerical_features, self.supply_lock_num_norm, self.supply_lock_num_proj, log_indices=[1]
	)
	# Create mask for the event
	mask = (event_type_ids == self.event_type_to_id.get('SupplyLock', -1)).unsqueeze(-1)

	return projected_features * mask

	def _get_onchain_snapshot_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for OnChain_Snapshot events.
	"""
	device = self.device
	onchain_snapshot_numerical_features = batch['onchain_snapshot_numerical_features']
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Selectively log-scale features ---
	# Log scale: counts, market_cap, liquidity, volume, fees (almost all)
	# Linear scale: growth_rate, holder_pcts (indices 3, 4, 5, 6, 7)
	projected_features = self._normalize_and_project(
	onchain_snapshot_numerical_features, self.onchain_snapshot_num_norm, self.onchain_snapshot_num_proj, log_indices=[0, 1, 2, 8, 9, 10, 11, 12, 13]
	)
	# Create mask for the event
	mask = (event_type_ids == self.event_type_to_id.get('OnChain_Snapshot', -1)).unsqueeze(-1)

	return projected_features * mask

	def _get_trending_token_specific_embeddings(self, batch: Dict[str, torch.Tensor], gathered_embeds: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for TrendingToken events.
	"""
	device = self.device
	trending_token_numerical_features = batch['trending_token_numerical_features']
	trending_token_source_ids = batch['trending_token_source_ids'] # NEW
	trending_token_timeframe_ids = batch['trending_token_timeframe_ids'] # NEW
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Rank is already inverted (0-1), so treat as linear ---
	projected_features = self._normalize_and_project(
	trending_token_numerical_features, self.trending_token_num_norm, self.trending_token_num_proj, log_indices=None
	)

	# --- NEW: Get embeddings for categorical IDs ---
	source_embeds = self.trending_list_source_embedding(trending_token_source_ids)
	timeframe_embeds = self.trending_timeframe_embedding(trending_token_timeframe_ids)

	# Create mask for the event
	mask = (event_type_ids == self.event_type_to_id.get('TrendingToken', -1)).unsqueeze(-1)

	# Combine Trending Token embedding with its projected numericals
	return (gathered_embeds['trending_token'] + projected_features + source_embeds + timeframe_embeds) * mask

	def _get_boosted_token_specific_embeddings(self, batch: Dict[str, torch.Tensor], gathered_embeds: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for BoostedToken events.
	"""
	device = self.device
	boosted_token_numerical_features = batch['boosted_token_numerical_features']
	event_type_ids = batch['event_type_ids']

	# --- FIXED: Selectively log-scale features ---
	# Log scale: total_boost_amount (idx 0)
	# Linear scale: inverted rank (idx 1)
	projected_features = self._normalize_and_project(
	boosted_token_numerical_features, self.boosted_token_num_norm, self.boosted_token_num_proj, log_indices=[0]
	)
	# Create mask for the event
	mask = (event_type_ids == self.event_type_to_id.get('BoostedToken', -1)).unsqueeze(-1)

	# Combine Boosted Token embedding with its projected numericals
	return (gathered_embeds['boosted_token'] + projected_features) * mask

	def _get_dexboost_paid_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Calculates the special embeddings for DexBoost_Paid events.
	"""
	device = self.device
	dexboost_paid_numerical_features = batch['dexboost_paid_numerical_features']
	event_type_ids = batch['event_type_ids']

	# --- FIXED: All features are amounts, so log-scale all ---
	projected_features = self._normalize_and_project(
	dexboost_paid_numerical_features, self.dexboost_paid_num_norm, self.dexboost_paid_num_proj, log_indices=[0, 1]
	)
	# Create mask for the event
	mask = (event_type_ids == self.event_type_to_id.get('DexBoost_Paid', -1)).unsqueeze(-1)

	return projected_features * mask

	def _get_alphagroup_call_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Handles AlphaGroup_Call events by looking up the group_id embedding.
	"""
	device = self.device
	group_ids = batch['alpha_group_ids']
	event_type_ids = batch['event_type_ids']

	# Look up the embedding for the group ID
	group_embeds = self.alpha_group_embedding(group_ids)

	# Create mask for the event
	mask = (event_type_ids == self.event_type_to_id.get('AlphaGroup_Call', -1)).unsqueeze(-1)
	return group_embeds * mask

	def _get_channel_call_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Handles Channel_Call events by looking up the channel_id embedding.
	"""
	device = self.device
	channel_ids = batch['channel_ids']
	event_type_ids = batch['event_type_ids']

	channel_embeds = self.call_channel_embedding(channel_ids)
	mask = (event_type_ids == self.event_type_to_id.get('Channel_Call', -1)).unsqueeze(-1)
	return channel_embeds * mask

	def _get_cexlisting_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Handles CexListing events by looking up the exchange_id embedding.
	"""
	device = self.device
	exchange_ids = batch['exchange_ids']
	event_type_ids = batch['event_type_ids']

	exchange_embeds = self.cex_listing_embedding(exchange_ids)
	mask = (event_type_ids == self.event_type_to_id.get('CexListing', -1)).unsqueeze(-1)
	return exchange_embeds * mask

	def _get_chainsnapshot_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Handles ChainSnapshot events.
	"""
	device = self.device
	numerical_features = batch['chainsnapshot_numerical_features']
	event_type_ids = batch['event_type_ids']

	# --- FIXED: All features are amounts/prices, so log-scale all ---
	projected_features = self._normalize_and_project(
	numerical_features, self.chainsnapshot_num_norm, self.chainsnapshot_num_proj, log_indices=[0, 1]
	)
	mask = (event_type_ids == self.event_type_to_id.get('ChainSnapshot', -1)).unsqueeze(-1)
	return projected_features * mask

	def _get_lighthousesnapshot_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Handles Lighthouse_Snapshot events.
	"""
	device = self.device
	numerical_features = batch['lighthousesnapshot_numerical_features']
	protocol_ids = batch['lighthousesnapshot_protocol_ids'] # NEW
	timeframe_ids = batch['lighthousesnapshot_timeframe_ids'] # NEW
	event_type_ids = batch['event_type_ids']

	# --- FIXED: All features are counts/volumes, so log-scale all ---
	projected_features = self._normalize_and_project(
	numerical_features, self.lighthousesnapshot_num_norm, self.lighthousesnapshot_num_proj, log_indices=[0, 1, 2, 3, 4]
	)
	# --- NEW: Get embeddings for categorical IDs ---
	# Re-use the main protocol embedding layer
	protocol_embeds = self.protocol_embedding(protocol_ids)
	timeframe_embeds = self.lighthouse_timeframe_embedding(timeframe_ids)

	mask = (event_type_ids == self.event_type_to_id.get('Lighthouse_Snapshot', -1)).unsqueeze(-1)
	return (projected_features + protocol_embeds + timeframe_embeds) * mask

	def _get_migrated_specific_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Handles Migrated events by looking up the protocol_id embedding.
	"""
	device = self.device
	protocol_ids = batch['migrated_protocol_ids']
	event_type_ids = batch['event_type_ids']

	# Look up the embedding for the protocol ID
	protocol_embeds = self.protocol_embedding(protocol_ids)

	# Create mask for the event
	mask = (event_type_ids == self.event_type_to_id.get('Migrated', -1)).unsqueeze(-1)
	return protocol_embeds * mask

	def _get_special_context_embeddings(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
	"""
	Handles special context tokens like 'MIDDLE' and 'RECENT' by adding their unique learnable embeddings.
	"""
	device = self.device
	event_type_ids = batch['event_type_ids']
	B, L = event_type_ids.shape

	middle_id = self.event_type_to_id.get('MIDDLE', -1)
	recent_id = self.event_type_to_id.get('RECENT', -1)

	middle_mask = (event_type_ids == middle_id)
	recent_mask = (event_type_ids == recent_id)

	middle_emb = self.special_context_embedding(torch.tensor(self.special_context_tokens['MIDDLE'], device=device))
	recent_emb = self.special_context_embedding(torch.tensor(self.special_context_tokens['RECENT'], device=device))

	# Add the embeddings at the correct locations
	return middle_mask.unsqueeze(-1) * middle_emb + recent_mask.unsqueeze(-1) * recent_emb

	def _pool_hidden_states(self,
	hidden_states: torch.Tensor,
	attention_mask: torch.Tensor) -> torch.Tensor:
	"""
	Pools variable-length hidden states into a single embedding per sequence by
	selecting the last non-masked token for each batch element.
	"""
	if hidden_states.size(0) == 0:
	return torch.empty(0, self.d_model, device=hidden_states.device, dtype=hidden_states.dtype)

	seq_lengths = attention_mask.long().sum(dim=1)
	last_indices = torch.clamp(seq_lengths - 1, min=0)
	batch_indices = torch.arange(hidden_states.size(0), device=hidden_states.device)
	return hidden_states[batch_indices, last_indices]

	def forward(self, batch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
	device = self.device

	# Unpack core sequence tensors
	event_type_ids = batch['event_type_ids'].to(device)
	timestamps_float = batch['timestamps_float'].to(device)
	relative_ts = batch['relative_ts'].to(device, self.dtype)
	attention_mask = batch['attention_mask'].to(device)

	B, L = event_type_ids.shape
	if B == 0 or L == 0:
	print("Warning: Received empty batch in Oracle forward.")
	empty_hidden = torch.empty(0, L, self.d_model, device=device, dtype=self.dtype)
	empty_mask = torch.empty(0, L, device=device, dtype=torch.long)
	empty_quantiles = torch.empty(0, self.num_outputs, device=device, dtype=self.dtype)
	empty_quality = torch.empty(0, device=device, dtype=self.dtype)
	empty_movement = torch.empty(0, len(self.horizons_seconds), self.num_movement_classes, device=device, dtype=self.dtype)
	return {
	'quantile_logits': empty_quantiles,
	'quality_logits': empty_quality,
	'movement_logits': empty_movement,
	'pooled_states': torch.empty(0, self.d_model, device=device, dtype=self.dtype),
	'hidden_states': empty_hidden,
	'attention_mask': empty_mask
	}

	# === 1. Run Dynamic Encoders (produces graph-updated entity embeddings) ===
	dynamic_raw_embeds = self._run_dynamic_encoders(batch)


	# === 2. Run Snapshot Encoders (uses dynamic_raw_embeds) ===
	wallet_addr_to_batch_idx = batch['wallet_addr_to_batch_idx']
	snapshot_raw_embeds = self._run_snapshot_encoders(batch, dynamic_raw_embeds['wallet'], wallet_addr_to_batch_idx)

	# === 3. Project Raw Embeddings and Gather for Sequence ===
	raw_embeds = {dynamic_raw_embeds, snapshot_raw_embeds}
	gathered_embeds = self._project_and_gather_embeddings(raw_embeds, batch)

	# === 4. Assemble Final `inputs_embeds` ===
	event_embeds = self.event_type_embedding(event_type_ids)
	ts_embeds = self.time_proj(self.time_encoder(timestamps_float))
	# Stabilize relative time: minutes scale + signed log1p + LayerNorm before projection
	relative_ts_fp32 = batch['relative_ts'].to(device, torch.float32)
	rel_ts_minutes = relative_ts_fp32 / 60.0
	rel_ts_processed = torch.sign(rel_ts_minutes) * torch.log1p(torch.abs(rel_ts_minutes))
	# Match LayerNorm parameter dtype, then match Linear parameter dtype
	norm_dtype = self.rel_ts_norm.weight.dtype
	proj_dtype = self.rel_ts_proj.weight.dtype
	rel_ts_normed = self.rel_ts_norm(rel_ts_processed.to(norm_dtype))
	rel_ts_embeds = self.rel_ts_proj(rel_ts_normed.to(proj_dtype))

	# Get special embeddings for Transfer events
	transfer_specific_embeds = self._get_transfer_specific_embeddings(batch, gathered_embeds)

	# Get special embeddings for Trade events
	trade_specific_embeds = self._get_trade_specific_embeddings(batch)

	# Get special embeddings for Deployer Trade events
	deployer_trade_specific_embeds = self._get_deployer_trade_specific_embeddings(batch)

	# Get special embeddings for Smart Wallet Trade events
	smart_wallet_trade_specific_embeds = self._get_smart_wallet_trade_specific_embeddings(batch)

	# Get special embeddings for PoolCreated events
	pool_created_specific_embeds = self._get_pool_created_specific_embeddings(batch, gathered_embeds)

	# Get special embeddings for LiquidityChange events
	liquidity_change_specific_embeds = self._get_liquidity_change_specific_embeddings(batch, gathered_embeds)

	# Get special embeddings for FeeCollected events
	fee_collected_specific_embeds = self._get_fee_collected_specific_embeddings(batch)

	# Get special embeddings for TokenBurn events
	token_burn_specific_embeds = self._get_token_burn_specific_embeddings(batch)

	# Get special embeddings for SupplyLock events
	supply_lock_specific_embeds = self._get_supply_lock_specific_embeddings(batch)

	# Get special embeddings for OnChain_Snapshot events
	onchain_snapshot_specific_embeds = self._get_onchain_snapshot_specific_embeddings(batch)

	# Get special embeddings for TrendingToken events
	trending_token_specific_embeds = self._get_trending_token_specific_embeddings(batch, gathered_embeds)

	# Get special embeddings for BoostedToken events
	boosted_token_specific_embeds = self._get_boosted_token_specific_embeddings(batch, gathered_embeds)

	# Get special embeddings for DexBoost_Paid events
	dexboost_paid_specific_embeds = self._get_dexboost_paid_specific_embeddings(batch)

	# --- NEW: Get embeddings for Tracker events ---
	alphagroup_call_specific_embeds = self._get_alphagroup_call_specific_embeddings(batch)
	channel_call_specific_embeds = self._get_channel_call_specific_embeddings(batch)
	cexlisting_specific_embeds = self._get_cexlisting_specific_embeddings(batch)

	# --- NEW: Get embeddings for Chain and Lighthouse Snapshots ---
	chainsnapshot_specific_embeds = self._get_chainsnapshot_specific_embeddings(batch)
	lighthousesnapshot_specific_embeds = self._get_lighthousesnapshot_specific_embeddings(batch)

	migrated_specific_embeds = self._get_migrated_specific_embeddings(batch)

	# --- NEW: Handle DexProfile_Updated flags separately ---
	dexprofile_updated_flags = batch['dexprofile_updated_flags']
	dexprofile_flags_embeds = self.dexprofile_updated_flags_proj(dexprofile_updated_flags.to(self.dtype))

	# --- REFACTORED: All text-based events are handled by the SocialEncoder ---
	# This single call will replace the inefficient loops for social, dexprofile, and global trending events.
	# The SocialEncoder's forward pass will need to be updated to handle this.
	textual_event_embeds = self.social_encoder(
	batch=batch,
	gathered_embeds=gathered_embeds
	)

	# --- NEW: Get embeddings for special context injection tokens ---
	special_context_embeds = self._get_special_context_embeddings(batch)

	# --- Combine all features ---
	# Sum in float32 for numerical stability, then cast back to model dtype
	components = [
	event_embeds, ts_embeds, rel_ts_embeds,
	gathered_embeds['wallet'], gathered_embeds['token'], gathered_embeds['original_author'], gathered_embeds['ohlc'],
	transfer_specific_embeds, trade_specific_embeds, deployer_trade_specific_embeds, smart_wallet_trade_specific_embeds,
	pool_created_specific_embeds, liquidity_change_specific_embeds, fee_collected_specific_embeds,
	token_burn_specific_embeds, supply_lock_specific_embeds, onchain_snapshot_specific_embeds,
	trending_token_specific_embeds, boosted_token_specific_embeds, dexboost_paid_specific_embeds,
	alphagroup_call_specific_embeds, channel_call_specific_embeds, cexlisting_specific_embeds,
	migrated_specific_embeds, special_context_embeds, gathered_embeds['holder_snapshot'], textual_event_embeds,
	dexprofile_flags_embeds, chainsnapshot_specific_embeds, lighthousesnapshot_specific_embeds
	]
	inputs_embeds = sum([t.float() for t in components]).to(self.dtype)

	hf_attention_mask = attention_mask.to(device=device, dtype=torch.long)
	outputs = self.model(
	inputs_embeds=inputs_embeds,
	attention_mask=hf_attention_mask,
	return_dict=True
	)
	sequence_hidden = outputs.last_hidden_state
	pooled_states = self._pool_hidden_states(sequence_hidden, hf_attention_mask)
	quantile_logits = self.quantile_head(pooled_states)
	quality_logits = self.quality_head(pooled_states).squeeze(-1)
	movement_logits = self.movement_head(pooled_states).view(
	pooled_states.shape[0],
	len(self.horizons_seconds),
	self.num_movement_classes,
	)

	return {
	'quantile_logits': quantile_logits,
	'quality_logits': quality_logits,
	'movement_logits': movement_logits,
	'pooled_states': pooled_states,
	'hidden_states': sequence_hidden,
	'attention_mask': hf_attention_mask
	}