Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

README.md +67 -4
data/data_collator.py +3 -1
data/data_loader.py +194 -50
data/ohlc_stats.npz +1 -1
log.log +2 -2
train.py +27 -21
train.sh +1 -4

README.md CHANGED Viewed

@@ -36,7 +36,70 @@ Launch training with updated hyperparameters.
 ./train.sh
 ```
-## TODOs
-*   [ ] **Re-run Caching**: Since horizons changed, the existing cache (if any) is stale. Expected to run `pre_cache.sh`.
-*   [ ] **Verify Inference**: Ensure `inference.py` handles the 20s latency constraints gracefully (e.g. timestamp checks).
-*   [ ] **Model Architecture**: Confirm `8192` context length fits in VRAM with current model config (Attention implementation).

 ./train.sh
 ```
+## TODO: Future Enhancements
+### Multi-Task Quality Prediction Head
+Add a secondary head (Head B) that predicts **token quality percentiles** alongside price returns:
+- **Fees Percentile** — Predicted future fees relative to class median
+- **Volume Percentile** — Predicted future volume relative to class median
+- **Holders Percentile** — Predicted future holder count relative to class median
+**Rationale:** The `analyze_distribution.py` script currently uses hard thresholds on future metrics to classify tokens as "Manipulated". This head would let the model **learn to predict** those quality metrics from current features, enabling scam detection at inference time without access to future data.
+**Approach Options:**
+1. Single composite quality score (simpler)
+2. Three separate percentile predictions (more interpretable)
+3. Three binary classifications (fees_ok, volume_ok, holders_ok)
+Data Sampling (Context Optimization)
+Replace hardcoded H/B/H limits with a dynamic sampling strategy that maximizes the model's context window usage.
+The Problem
+Currently, the system triggers H/B/H logic based on a fixed 30k trade count and uses hardcoded limits (10k early, 15k recent). This mismatch with the model's max_seq_len (e.g., 8192) leads to inefficient data usage—either truncating valuable data arbitrarily or feeding too little when more could fit.
+The Solution: Dynamic Context Filling
+Implementation moves to
+data_loader.py
+ (since cache contains full history).
+Algorithm
+Input: Full sorted list of events (Trades, Chart Segments, etc.) up to T_cutoff.
+Check: if
+len(events) <= max_seq_len
+, use ALL events.
+Split: If
+len(events) > max_seq_len
+:
+Reserve space for special tokens (start/end/pad).
+Calculate Budget: budget = max_seq_len - reserve (e.g., 8100).
+Dynamic Split:
+Head (Early): First budget / 2 events.
+Tail (Recent): Last budget / 2 events.
+Construct: [HEAD] ... [GAP_TOKEN] ... [TAIL].
+Implementation Changes
+[MODIFY]
+data_loader.py
+Remove Constants: Delete HBH_EARLY_EVENT_LIMIT, HBH_RECENT_EVENT_LIMIT.
+Update
+_generate_dataset_item
+:
+Accept max_seq_len.
+Implement the split logic defined above before returning event_sequence.
+ Here explained easly:
+We check all the final events if exeed the total context we have.
+Then we filter out all the trade events and then check how many non aggregable events we have, for example a burn or a deployer trade etc...
+Then we take the remaining from context exldued thosoe IMPORTANT events like i show above and we check how many snapshot will fit chart segment, holders snapshot, chain stats etc...
+Then the remaining after snapshot and important non aggregable events we use them to make the H segments (high definition) and in the middle (Blurry) we keep just the snapshots.
+This works because 90% of context is taken just by trades and transfers so they are the only thing to compress to help context
+you dont need new tokens becuase there are already special tokens for it:
+'MIDDLE',
+'RECENT'
+so when you switch to blurry <MIDDLE> and when you go back to high definition you use <RECENT>

data/data_collator.py CHANGED Viewed

@@ -710,7 +710,9 @@ class MemecoinCollator:
             'textual_event_data': textual_event_data_list, # RENAMED
             # Labels
             'labels': torch.stack([item['labels'] for item in batch]) if batch and 'labels' in batch[0] else None,
-            'labels_mask': torch.stack([item['labels_mask'] for item in batch]) if batch and 'labels_mask' in batch[0] else None
         }
         # Filter out None values (e.g., if no labels provided)

             'textual_event_data': textual_event_data_list, # RENAMED
             # Labels
             'labels': torch.stack([item['labels'] for item in batch]) if batch and 'labels' in batch[0] else None,
+            'labels_mask': torch.stack([item['labels_mask'] for item in batch]) if batch and 'labels_mask' in batch[0] else None,
+            # Debug info
+            'token_addresses': [item.get('token_address', 'unknown') for item in batch]
         }
         # Filter out None values (e.g., if no labels provided)

data/data_loader.py CHANGED Viewed

@@ -33,10 +33,25 @@ LARGE_TRANSFER_SUPPLY_PCT_THRESHOLD = 0.03 # 3% of supply
 SMART_WALLET_PNL_THRESHOLD = 3.0  # 300% PNL
 SMART_WALLET_USD_THRESHOLD = 20000.0
-# --- NEW: Hyperparameters for H/B/H Event Fetching ---
-EVENT_COUNT_THRESHOLD_FOR_HBH = 30000  # If total events > this, use H/B/H
-HBH_EARLY_EVENT_LIMIT = 10000
-HBH_RECENT_EVENT_LIMIT = 15000
 # --- NEW: OHLC Sequence Length Constant ---
 OHLC_SEQ_LEN = 300 # 4 minutes of chart
@@ -107,7 +122,10 @@ class OracleDataset(Dataset):
                  t_cutoff_seconds: int = 60,
                  cache_dir: Optional[Union[str, Path]] = None,
                  start_date: Optional[datetime.datetime] = None,
-                 min_trade_usd: float = 0.0):
         # --- NEW: Create a persistent requests session for efficiency ---
         # Configure robust HTTP session
@@ -261,6 +279,90 @@ class OracleDataset(Dataset):
         denom = self.ohlc_price_std if abs(self.ohlc_price_std) > 1e-9 else 1.0
         return [(float(v) - self.ohlc_price_mean) / denom for v in values]
     def _compute_future_return_labels(self,
                                       anchor_price: Optional[float],
                                       anchor_timestamp: int,
@@ -830,9 +932,11 @@ class OracleDataset(Dataset):
                 raw_data = torch.load(filepath, map_location='cpu', weights_only=False)
             except Exception as e:
                 raise RuntimeError(f"ERROR: Could not load cached item {filepath}: {e}")
         else:
-             # Online mode fallback
-             raw_data = self.__cacheitem__(idx)
         if not raw_data:
             raise RuntimeError(f"No raw data loaded for index {idx}")
@@ -882,19 +986,31 @@ class OracleDataset(Dataset):
         preferred_horizon = horizons[1] if len(horizons) > 1 else min_label
         mint_ts_value = _timestamp_to_order_value(mint_timestamp)
         trade_ts_values = [
             _timestamp_to_order_value(trade.get('timestamp'))
-            for trade in raw_data.get('trades', [])
-            if trade.get('timestamp') is not None
         ]
-        if not trade_ts_values:
             return None
-        # Cache guarantees min_trades=25, so we proceed assuming valid data.
-        # But for safety in dynamic sampling:
-        if not trade_ts_values:
-             return None
         # Sort trades to find the 24th trade timestamp
         sorted_trades_ts = sorted(trade_ts_values)
@@ -1057,10 +1173,10 @@ class OracleDataset(Dataset):
             max_horizon_seconds=self.max_cache_horizon_seconds,
             include_wallet_data=False,
             include_graph=False,
-            min_trades=25,
             full_history=True,      # Bypass H/B/H limits
-            prune_failed=True,      # Drop failed trades
-            prune_transfers=True    # Drop transfers (captured in snapshots)
         )
         if raw_data is None:
             return None
@@ -1447,9 +1563,12 @@ class OracleDataset(Dataset):
             cached_holders_list=cached_holders_list
         )
-        # 7. Finalize Sequence
         event_sequence_entries.sort(key=lambda x: x[0])
-        event_sequence = [entry[1] for entry in event_sequence_entries]
         # 8. Compute Labels using future data
         # Define horizons (e.g., [60, 120, ...])
@@ -1459,7 +1578,31 @@ class OracleDataset(Dataset):
         # Note: future_trades_for_labels contains ALL trades (past & future relative to T_cutoff)
         # We need to find the price at T_cutoff and at T_cutoff + h
-        all_trades = future_trades_for_labels
         # Ensure sorted
         all_trades.sort(key=lambda x: _timestamp_to_order_value(x['timestamp']))
@@ -1483,40 +1626,41 @@ class OracleDataset(Dataset):
         label_values = []
         mask_values = []
-        for h in horizons:
-            target_ts = cutoff_ts_val + h
-            if target_ts > last_trade_ts_val:
-                # Horizon extends beyond known history
-                # We MASK this label. We do NOT guess 0.
-                label_values.append(0.0) # Dummy value
-                mask_values.append(0.0)  # Mask = 0 (Ignore)
-            else:
-                # Find price at target_ts
-                # It is the last trade strictly before or at target_ts
-                future_price = current_price # Default to current if no trades found in window? Unlikely if checked range.
-                # Check trades between current_idx and target
-                # Optimization: start search from current_price_idx
-                found_future = False
-                for j in range(current_price_idx, len(all_trades)):
-                    t = all_trades[j]
-                    t_ts = _timestamp_to_order_value(t['timestamp'])
-                    if t_ts <= target_ts:
-                         future_price = float(t['price_usd'])
-                         found_future = True
-                    else:
-                        break # Optimization: surpassed target_ts
-                if current_price > 0:
-                    ret = (future_price - current_price) / current_price
                 else:
-                    ret = 0.0
-                label_values.append(ret)
-                mask_values.append(1.0) # Mask = 1 (Valid)
         return {
             'event_sequence': event_sequence,
             'wallets': wallet_data,
             'tokens': all_token_data,

 SMART_WALLET_PNL_THRESHOLD = 3.0  # 300% PNL
 SMART_WALLET_USD_THRESHOLD = 20000.0
+# --- Event Categorization for Dynamic Sampling ---
+# Events that are rare and should ALWAYS be kept
+CRITICAL_EVENTS = {
+    'Mint', 'Deployer_Trade', 'SmartWallet_Trade', 'LargeTrade', 'LargeTransfer',
+    'TokenBurn', 'SupplyLock', 'PoolCreated', 'LiquidityChange', 'Migrated',
+    'FeeCollected', 'TrendingToken', 'BoostedToken', 'XPost', 'XRetweet',
+    'XReply', 'XQuoteTweet', 'PumpReply', 'DexBoost_Paid', 'DexProfile_Updated',
+    'AlphaGroup_Call', 'Channel_Call', 'CexListing', 'TikTok_Trending_Hashtag',
+    'XTrending_Hashtag'
+}
+# Periodic snapshots - kept for context continuity
+SNAPSHOT_EVENTS = {
+    'Chart_Segment', 'OnChain_Snapshot', 'HolderSnapshot',
+    'ChainSnapshot', 'Lighthouse_Snapshot'
+}
+# High-volume events that can be compressed (Head/Tail)
+COMPRESSIBLE_EVENTS = {'Trade', 'Transfer'}
 # --- NEW: OHLC Sequence Length Constant ---
 OHLC_SEQ_LEN = 300 # 4 minutes of chart
                  t_cutoff_seconds: int = 60,
                  cache_dir: Optional[Union[str, Path]] = None,
                  start_date: Optional[datetime.datetime] = None,
+                 min_trade_usd: float = 0.0,
+                 max_seq_len: int = 8192):
+        self.max_seq_len = max_seq_len
         # --- NEW: Create a persistent requests session for efficiency ---
         # Configure robust HTTP session
         denom = self.ohlc_price_std if abs(self.ohlc_price_std) > 1e-9 else 1.0
         return [(float(v) - self.ohlc_price_mean) / denom for v in values]
+    def _apply_dynamic_sampling(self, events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """
+        Applies dynamic context sampling to fit events within max_seq_len.
+        Priority:
+        1. CRITICAL events (always kept)
+        2. SNAPSHOT events (kept for continuity)
+        3. COMPRESSIBLE events (Trade/Transfer) - split into Head/Tail with MIDDLE token
+        Uses existing 'MIDDLE' and 'RECENT' tokens to mark transitions.
+        """
+        if len(events) <= self.max_seq_len:
+            return events
+        # Categorize events by type
+        critical_events = []  # (original_idx, event)
+        snapshot_events = []
+        compressible_events = []
+        for idx, event in enumerate(events):
+            event_type = event.get('event_type', '')
+            if event_type in CRITICAL_EVENTS:
+                critical_events.append((idx, event))
+            elif event_type in SNAPSHOT_EVENTS:
+                snapshot_events.append((idx, event))
+            elif event_type in COMPRESSIBLE_EVENTS:
+                compressible_events.append((idx, event))
+            else:
+                # Unknown event types go to critical (safe default)
+                critical_events.append((idx, event))
+        # Calculate budget for compressible events
+        # Reserve 2 tokens for MIDDLE and RECENT markers
+        reserved_tokens = 2
+        fixed_count = len(critical_events) + len(snapshot_events) + reserved_tokens
+        budget_for_compressible = max(0, self.max_seq_len - fixed_count)
+        # If no budget for compressible, just return critical + snapshots
+        if budget_for_compressible == 0 or len(compressible_events) <= budget_for_compressible:
+            # All compressible fit, just return sorted
+            all_events = critical_events + snapshot_events + compressible_events
+            all_events.sort(key=lambda x: x[0])
+            return [e[1] for e in all_events]
+        # Apply Head/Tail split for compressible events
+        head_size = budget_for_compressible // 2
+        tail_size = budget_for_compressible - head_size
+        head_events = compressible_events[:head_size]
+        tail_events = compressible_events[-tail_size:] if tail_size > 0 else []
+        # Find the timestamp boundary for MIDDLE/RECENT markers
+        # MIDDLE goes after head, RECENT goes before tail
+        middle_marker_idx = head_events[-1][0] if head_events else 0
+        recent_marker_idx = tail_events[0][0] if tail_events else len(events)
+        # Create marker events
+        middle_marker = {
+            'event_type': 'MIDDLE',
+            'relative_ts': events[middle_marker_idx].get('relative_ts', 0) if middle_marker_idx < len(events) else 0,
+            'is_marker': True
+        }
+        recent_marker = {
+            'event_type': 'RECENT',
+            'relative_ts': events[recent_marker_idx - 1].get('relative_ts', 0) if recent_marker_idx > 0 and recent_marker_idx <= len(events) else 0,
+            'is_marker': True
+        }
+        # Combine all events with markers
+        # We need to maintain chronological order
+        all_indexed_events = critical_events + snapshot_events + head_events + tail_events
+        # Add markers with synthetic indices
+        middle_idx = middle_marker_idx + 0.5  # After last head event
+        recent_idx = recent_marker_idx - 0.5  # Before first tail event
+        all_indexed_events.append((middle_idx, middle_marker))
+        all_indexed_events.append((recent_idx, recent_marker))
+        # Sort by original index to maintain chronological order
+        all_indexed_events.sort(key=lambda x: x[0])
+        return [e[1] for e in all_indexed_events]
     def _compute_future_return_labels(self,
                                       anchor_price: Optional[float],
                                       anchor_timestamp: int,
                 raw_data = torch.load(filepath, map_location='cpu', weights_only=False)
             except Exception as e:
                 raise RuntimeError(f"ERROR: Could not load cached item {filepath}: {e}")
+            except Exception as e:
+                raise RuntimeError(f"ERROR: Could not load cached item {filepath}: {e}")
         else:
+             # Strict Offline Mode: No dynamic generation fallback
+             raise RuntimeError(f"Offline mode required. No cache directory provided or configured.")
         if not raw_data:
             raise RuntimeError(f"No raw data loaded for index {idx}")
         preferred_horizon = horizons[1] if len(horizons) > 1 else min_label
         mint_ts_value = _timestamp_to_order_value(mint_timestamp)
+        # ============================================================================
+        # CRITICAL: Use ONLY successful trades for T_cutoff sampling!
+        # ============================================================================
+        # Failed trades have invalid price_usd values and should not be used for:
+        # 1. Determining the valid T_cutoff range (trades[24] to trades[-1])
+        # 2. Computing price returns for labels
+        # The T_cutoff range must guarantee at least one successful trade after cutoff.
+        # ============================================================================
+        successful_trades = [
+            trade for trade in raw_data.get('trades', [])
+            if trade.get('success', False)
+            and trade.get('timestamp') is not None
+            and float(trade.get('price_usd', 0) or 0) > 0
+        ]
         trade_ts_values = [
             _timestamp_to_order_value(trade.get('timestamp'))
+            for trade in successful_trades
         ]
+        if not trade_ts_values or len(trade_ts_values) < 25:
+            # Not enough successful trades for valid sampling
             return None
         # Sort trades to find the 24th trade timestamp
         sorted_trades_ts = sorted(trade_ts_values)
             max_horizon_seconds=self.max_cache_horizon_seconds,
             include_wallet_data=False,
             include_graph=False,
+            min_trades=24, # Enforce min trades for context
             full_history=True,      # Bypass H/B/H limits
+            prune_failed=False,     # Keep failed trades for realistic simulation
+            prune_transfers=False   # Keep transfers for snapshot reconstruction
         )
         if raw_data is None:
             return None
             cached_holders_list=cached_holders_list
         )
+        # 7. Finalize Sequence with Dynamic Sampling
         event_sequence_entries.sort(key=lambda x: x[0])
+        raw_event_sequence = [entry[1] for entry in event_sequence_entries]
+        # Apply dynamic context sampling if needed
+        event_sequence = self._apply_dynamic_sampling(raw_event_sequence)
         # 8. Compute Labels using future data
         # Define horizons (e.g., [60, 120, ...])
         # Note: future_trades_for_labels contains ALL trades (past & future relative to T_cutoff)
         # We need to find the price at T_cutoff and at T_cutoff + h
+        # ============================================================================
+        # CRITICAL: Filter for successful trades with valid prices ONLY!
+        # ============================================================================
+        # Failed trades (success=False) often have price_usd=0 or invalid values.
+        # Using these for label computation causes mathematically impossible returns
+        # like -1.0 (price went to 0) or 0.0 (no price change despite trading).
+        # ALWAYS filter by: success=True AND price_usd > 0
+        # ============================================================================
+        all_trades = [
+            t for t in future_trades_for_labels
+            if t.get('success', False) and float(t.get('price_usd', 0) or 0) > 0
+        ]
+        if not all_trades:
+            # No valid trades for label computation
+            return {
+                'event_sequence': event_sequence,
+                'wallets': wallet_data,
+                'tokens': all_token_data,
+                'graph_links': graph_links,
+                'embedding_pooler': pooler,
+                'labels': torch.zeros(len(self.horizons_seconds), dtype=torch.float32),
+                'labels_mask': torch.zeros(len(self.horizons_seconds), dtype=torch.float32)
+            }
         # Ensure sorted
         all_trades.sort(key=lambda x: _timestamp_to_order_value(x['timestamp']))
         label_values = []
         mask_values = []
+        # Edge case: no trades before cutoff means we have no anchor price
+        if current_price_idx < 0 or current_price <= 0:
+            # No valid anchor price - mask all labels
+            for h in horizons:
+                label_values.append(0.0)
+                mask_values.append(0.0)
+        else:
+            for h in horizons:
+                target_ts = cutoff_ts_val + h
+                if target_ts > last_trade_ts_val:
+                    # Horizon extends beyond known history
+                    # We MASK this label. We do NOT guess 0.
+                    label_values.append(0.0) # Dummy value
+                    mask_values.append(0.0)  # Mask = 0 (Ignore)
                 else:
+                    # Find price at target_ts
+                    # Start searching AFTER current_price_idx to find the NEXT trade
+                    future_price = current_price
+                    # Search from current_price_idx + 1 to find trades in the horizon window
+                    for j in range(current_price_idx + 1, len(all_trades)):
+                        t = all_trades[j]
+                        t_ts = _timestamp_to_order_value(t['timestamp'])
+                        if t_ts <= target_ts:
+                            future_price = float(t['price_usd'])
+                        else:
+                            break # Surpassed target_ts
+                    ret = (future_price - current_price) / current_price
+                    label_values.append(ret)
+                    mask_values.append(1.0) # Mask = 1 (Valid)
         return {
+            'token_address': token_address,  # For debugging
             'event_sequence': event_sequence,
             'wallets': wallet_data,
             'tokens': all_token_data,

data/ohlc_stats.npz CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:847193fc90f4b0313f515ea38a24fd073be09188cfc4764c5dce3f658d4dc117
 size 1660

 version https://git-lfs.github.com/spec/v1
+oid sha256:e84cff0cfabf73d50f94c3f9a5cf9224e89c634db76982a4e3e5428c9df4ea91
 size 1660

log.log CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10917f8ad8d8962a8c05a46f2b24dcb1180b23665d0767ea5c65c63d9ec09c92
-size 314966

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1c2198c3ed6e249ddf7b7b017b99b2389e4611b8b0649c63d30c40c59e03ac1
+size 76001

train.py CHANGED Viewed

@@ -150,10 +150,14 @@ def log_debug_batch_context(batch: Dict[str, Any], logger: logging.Logger, step:
             events.append(name)
         logger.info(f"\n--- [Step {step}] Batch Input Preview (Sample 0) ---")
-        # Show a slice of events (e.g. last 50)
-        tail_len = 50
-        context_str = ", ".join(events[-tail_len:])
-        logger.info(f"Event Stream (Last {tail_len} of {len(events)}): [{context_str}]")
         # Show Labels
         # Assuming flattened labels [H*Q]
@@ -190,11 +194,7 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument("--num_workers", type=int, default=0)
     parser.add_argument("--pin_memory", dest="pin_memory", action="store_true", default=False)
     parser.add_argument("--no-pin_memory", dest="pin_memory", action="store_false")
-    parser.add_argument("--clickhouse_host", type=str, default="localhost")
-    parser.add_argument("--clickhouse_port", type=int, default=9000)
-    parser.add_argument("--neo4j_uri", type=str, default="bolt://localhost:7687")
-    parser.add_argument("--neo4j_user", type=str, default=None)
-    parser.add_argument("--neo4j_password", type=str, default=None)
     return parser.parse_args()
@@ -394,21 +394,27 @@ def main() -> None:
         dirs = [d for d in checkpoint_dir.iterdir() if d.is_dir()]
         if dirs:
             # Sort by modification time or name to find latest
             dirs.sort(key=lambda x: x.stat().st_mtime)
             latest_checkpoint = dirs[-1]
-            logger.info(f"Found checkpoint: {latest_checkpoint}. Resuming training...")
-            accelerator.load_state(str(latest_checkpoint))
-            # Try to infer epoch/step from folder name or saved state if custom tracking
-            # Accelerate restores DataLoader state, so we mainly need to know where we are for logging
-            # Assuming standard naming or just relying on DataLoader restore.
-            # Simple approach: Just trust Accelerate/DataLoader to skip.
-            # If you need precise epoch/step recovery for logging display:
-            # You could save a metadata.json inside the checkpoint folder.
-            logger.info("Checkpoint loaded. DataLoader state restored.")
     else:
-        logger.info("No checkpoint found. Starting fresh.")
     # --- 7. Training Loop ---
     total_steps = 0

             events.append(name)
         logger.info(f"\n--- [Step {step}] Batch Input Preview (Sample 0) ---")
+        # Log token address for manual verification
+        token_addresses = batch.get('token_addresses', [])
+        if token_addresses:
+            logger.info(f"Token Address: {token_addresses[0]}")
+        context_str = ", ".join(events)
+        logger.info(f"Event Stream ({len(events)}): [{context_str}]")
         # Show Labels
         # Assuming flattened labels [H*Q]
     parser.add_argument("--num_workers", type=int, default=0)
     parser.add_argument("--pin_memory", dest="pin_memory", action="store_true", default=False)
     parser.add_argument("--no-pin_memory", dest="pin_memory", action="store_false")
+    parser.add_argument("--resume_from_checkpoint", type=str, default=None, help="Path to checkpoint or 'latest'")
     return parser.parse_args()
         dirs = [d for d in checkpoint_dir.iterdir() if d.is_dir()]
         if dirs:
             # Sort by modification time or name to find latest
+    # Sort by modification time or name to find latest
             dirs.sort(key=lambda x: x.stat().st_mtime)
             latest_checkpoint = dirs[-1]
+    if args.resume_from_checkpoint:
+        if args.resume_from_checkpoint == "latest":
+             if latest_checkpoint:
+                logger.info(f"Resuming from latest checkpoint: {latest_checkpoint}")
+                accelerator.load_state(str(latest_checkpoint))
+             else:
+                logger.warning("Resume requested but no checkpoint found in dir. Starting fresh.")
+        else:
+             # Specific path
+             custom_ckpt = Path(args.resume_from_checkpoint)
+             if custom_ckpt.exists():
+                  logger.info(f"Resuming from specific checkpoint: {custom_ckpt}")
+                  accelerator.load_state(str(custom_ckpt))
+             else:
+                  raise FileNotFoundError(f"Checkpoint not found at {custom_ckpt}")
     else:
+        logger.info("No resume flag provided. Starting fresh.")
     # --- 7. Training Loop ---
     total_steps = 0

train.sh CHANGED Viewed

@@ -15,7 +15,4 @@ accelerate launch train.py \
   --horizons_seconds 60 180 300 600 1800 3600 7200 \
   --quantiles 0.1 0.5 0.9 \
   --ohlc_stats_path ./data/ohlc_stats.npz \
-  --num_workers 4 \
-  --clickhouse_host localhost \
-  --clickhouse_port 9000 \
-  --neo4j_uri bolt://localhost:7687

   --horizons_seconds 60 180 300 600 1800 3600 7200 \
   --quantiles 0.1 0.5 0.9 \
   --ohlc_stats_path ./data/ohlc_stats.npz \
+  --num_workers 4