WCNegentropy commited on
Commit
c39227f
·
verified ·
1 Parent(s): 6070daa

Remove markov_spline_training.py - cleanup for OS launch

Browse files
Files changed (1) hide show
  1. markov_spline_training.py +0 -438
markov_spline_training.py DELETED
@@ -1,438 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- MarkovSpline-Enhanced BitTransformerLM Training
4
-
5
- Integrates MarkovSpline data smoothing directly into BitTransformerLM training pipeline
6
- for improved data preprocessing and gradient optimization.
7
- """
8
-
9
- import os
10
- import sys
11
- import json
12
- import time
13
- import torch
14
- import torch.nn as nn
15
- import torch.optim as optim
16
- import numpy as np
17
- from pathlib import Path
18
- from typing import Dict, List, Tuple, Optional, Any
19
- from torch.utils.data import DataLoader, Dataset
20
-
21
- # Add MarkovSpline to path
22
- sys.path.insert(0, '/data/MarkovSpline')
23
- from bitpipe_integration import MarkovSplineBitPipeModule, create_markov_spline_bitpipe_module
24
-
25
- # BitTransformerLM imports
26
- from bit_transformer.model import BitTransformerLM
27
- from bit_transformer.telemetry import TelemetrySynthesizer
28
-
29
- # Simple trainer base class
30
- class BitwiseTrainer:
31
- """Simple base trainer for BitTransformerLM."""
32
-
33
- def __init__(self, model, learning_rate=1e-3, max_grad_norm=1.0):
34
- self.model = model
35
- self.device = next(model.parameters()).device
36
- self.optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
37
- self.criterion = nn.CrossEntropyLoss()
38
- self.max_grad_norm = max_grad_norm
39
-
40
- def train_step(self, batch):
41
- """Simple training step."""
42
- self.optimizer.zero_grad()
43
-
44
- outputs = self.model(batch['input_bits'])
45
- # BitTransformerLM returns (logits, telemetry)
46
- if isinstance(outputs, tuple):
47
- logits, telemetry = outputs
48
- else:
49
- logits = outputs
50
-
51
- loss = self.criterion(logits.reshape(-1, logits.size(-1)), batch['target_bits'].reshape(-1))
52
-
53
- loss.backward()
54
-
55
- if self.max_grad_norm > 0:
56
- torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_norm)
57
-
58
- self.optimizer.step()
59
-
60
- return {'loss': loss.item()}
61
-
62
-
63
- class MarkovSplineEnhancedDataset(Dataset):
64
- """Dataset wrapper that applies MarkovSpline preprocessing."""
65
-
66
- def __init__(self,
67
- base_dataset: Dataset,
68
- markov_module: MarkovSplineBitPipeModule,
69
- smoothing_strength: float = 0.1,
70
- enable_smoothing: bool = True):
71
-
72
- self.base_dataset = base_dataset
73
- self.markov_module = markov_module
74
- self.smoothing_strength = smoothing_strength
75
- self.enable_smoothing = enable_smoothing
76
-
77
- # Initialize data preprocessor
78
- if enable_smoothing:
79
- self.markov_module.initialize_application('data_preprocessor',
80
- smoothing_strength=smoothing_strength,
81
- preserve_features=True)
82
-
83
- def __len__(self):
84
- return len(self.base_dataset)
85
-
86
- def __getitem__(self, idx):
87
- # Get original data
88
- data = self.base_dataset[idx]
89
-
90
- if not self.enable_smoothing:
91
- return data
92
-
93
- # Apply MarkovSpline preprocessing to bit sequences
94
- if isinstance(data, dict) and 'input_bits' in data:
95
- try:
96
- # Smooth input bits
97
- result = self.markov_module.process_data(
98
- [data['input_bits']],
99
- 'preprocess_training',
100
- binary_data=True
101
- )
102
-
103
- if result['success'] and result['processed_sequences']:
104
- data['input_bits'] = result['processed_sequences'][0]
105
- data['smoothing_applied'] = True
106
- else:
107
- data['smoothing_applied'] = False
108
-
109
- except Exception as e:
110
- print(f"Warning: MarkovSpline preprocessing failed for sample {idx}: {e}")
111
- data['smoothing_applied'] = False
112
-
113
- return data
114
-
115
-
116
- class MarkovSplineEnhancedTrainer(BitwiseTrainer):
117
- """Enhanced BitTransformerLM trainer with MarkovSpline integration."""
118
-
119
- def __init__(self,
120
- model: BitTransformerLM,
121
- markov_config: Optional[Dict] = None,
122
- gradient_smoothing: bool = True,
123
- data_smoothing: bool = True,
124
- smoothing_strength: float = 0.1,
125
- **kwargs):
126
-
127
- super().__init__(model, **kwargs)
128
-
129
- # Initialize MarkovSpline module
130
- self.markov_module = create_markov_spline_bitpipe_module(markov_config)
131
- self.gradient_smoothing = gradient_smoothing
132
- self.data_smoothing = data_smoothing
133
- self.smoothing_strength = smoothing_strength
134
-
135
- # Initialize gradient smoother if enabled
136
- if gradient_smoothing:
137
- self.markov_module.initialize_application('gradient_smoother',
138
- learning_rate=kwargs.get('learning_rate', 0.001),
139
- smoothing_strength=smoothing_strength,
140
- momentum_states=10)
141
-
142
- # Tracking
143
- self.smoothing_metrics = {}
144
- self.gradient_smooth_history = []
145
-
146
- print(f"🌊 MarkovSpline Enhanced Trainer initialized")
147
- print(f" - Gradient smoothing: {'✅' if gradient_smoothing else '❌'}")
148
- print(f" - Data smoothing: {'✅' if data_smoothing else '❌'}")
149
- print(f" - Smoothing strength: {smoothing_strength}")
150
-
151
- def create_enhanced_dataloader(self,
152
- dataset: Dataset,
153
- batch_size: int = 8,
154
- **kwargs) -> DataLoader:
155
- """Create dataloader with MarkovSpline preprocessing."""
156
-
157
- enhanced_dataset = MarkovSplineEnhancedDataset(
158
- dataset,
159
- self.markov_module,
160
- self.smoothing_strength,
161
- self.data_smoothing
162
- )
163
-
164
- return DataLoader(enhanced_dataset, batch_size=batch_size, **kwargs)
165
-
166
- def apply_gradient_smoothing(self,
167
- parameters: Dict[str, torch.Tensor],
168
- gradients: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
169
- """Apply MarkovSpline gradient smoothing."""
170
-
171
- if not self.gradient_smoothing:
172
- return parameters
173
-
174
- try:
175
- # Process through MarkovSpline gradient smoother
176
- result = self.markov_module.process_data(
177
- {
178
- 'parameters': parameters,
179
- 'gradients': gradients
180
- },
181
- 'smooth_gradients'
182
- )
183
-
184
- if result['success']:
185
- self.gradient_smooth_history.append(result['optimization_metrics'])
186
- return result['smoothed_parameters']
187
- else:
188
- print(f"Warning: Gradient smoothing failed: {result.get('error', 'Unknown')}")
189
- return parameters
190
-
191
- except Exception as e:
192
- print(f"Warning: Gradient smoothing error: {e}")
193
- return parameters
194
-
195
- def train_step(self, batch: Dict[str, torch.Tensor]) -> Dict[str, float]:
196
- """Enhanced training step with MarkovSpline integration."""
197
-
198
- # Standard forward pass
199
- self.optimizer.zero_grad()
200
-
201
- # Forward pass
202
- outputs = self.model(batch['input_bits'])
203
- # BitTransformerLM returns (logits, telemetry)
204
- if isinstance(outputs, tuple):
205
- logits, telemetry = outputs
206
- else:
207
- logits = outputs
208
-
209
- loss = self.criterion(logits.reshape(-1, logits.size(-1)), batch['target_bits'].reshape(-1))
210
-
211
- # Backward pass
212
- loss.backward()
213
-
214
- # Extract parameters and gradients for smoothing
215
- if self.gradient_smoothing:
216
- parameters = {}
217
- gradients = {}
218
-
219
- for name, param in self.model.named_parameters():
220
- if param.grad is not None:
221
- parameters[name] = param.data.clone()
222
- gradients[name] = param.grad.data.clone()
223
-
224
- # Apply MarkovSpline gradient smoothing
225
- smoothed_params = self.apply_gradient_smoothing(parameters, gradients)
226
-
227
- # Update model parameters with smoothed values
228
- for name, param in self.model.named_parameters():
229
- if name in smoothed_params:
230
- param.data = smoothed_params[name]
231
-
232
- # Standard optimizer step
233
- if self.max_grad_norm > 0:
234
- torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_norm)
235
-
236
- self.optimizer.step()
237
-
238
- # Collect metrics
239
- metrics = {
240
- 'loss': loss.item(),
241
- 'smoothing_applied': batch.get('smoothing_applied', torch.tensor(False)).float().mean().item()
242
- }
243
-
244
- if hasattr(batch, 'smoothing_applied'):
245
- metrics['data_smoothing_rate'] = batch['smoothing_applied'].float().mean().item()
246
-
247
- return metrics
248
-
249
- def train_epoch(self,
250
- train_loader: DataLoader,
251
- epoch: int) -> Dict[str, float]:
252
- """Train one epoch with MarkovSpline enhancements."""
253
-
254
- self.model.train()
255
- epoch_metrics = {
256
- 'loss': 0.0,
257
- 'smoothing_applied': 0.0,
258
- 'data_smoothing_rate': 0.0,
259
- 'gradient_smoothing_success': 0.0
260
- }
261
-
262
- num_batches = 0
263
-
264
- for batch_idx, batch in enumerate(train_loader):
265
- # Move batch to device
266
- for key in batch:
267
- if isinstance(batch[key], torch.Tensor):
268
- batch[key] = batch[key].to(self.device)
269
-
270
- # Training step with MarkovSpline integration
271
- step_metrics = self.train_step(batch)
272
-
273
- # Accumulate metrics
274
- for key, value in step_metrics.items():
275
- if key in epoch_metrics:
276
- epoch_metrics[key] += value
277
-
278
- num_batches += 1
279
-
280
- # Log progress
281
- if batch_idx % 10 == 0:
282
- print(f" Batch {batch_idx:3d}: Loss={step_metrics['loss']:.4f}")
283
-
284
- # Average metrics
285
- for key in epoch_metrics:
286
- epoch_metrics[key] /= num_batches
287
-
288
- return epoch_metrics
289
-
290
- def get_markov_spline_metrics(self) -> Dict[str, Any]:
291
- """Get comprehensive MarkovSpline performance metrics."""
292
-
293
- metrics = self.markov_module.get_performance_metrics()
294
-
295
- # Add training-specific metrics
296
- metrics['training_integration'] = {
297
- 'gradient_smoothing_enabled': self.gradient_smoothing,
298
- 'data_smoothing_enabled': self.data_smoothing,
299
- 'smoothing_strength': self.smoothing_strength,
300
- 'gradient_smooth_operations': len(self.gradient_smooth_history)
301
- }
302
-
303
- if self.gradient_smooth_history:
304
- recent_gradient_metrics = self.gradient_smooth_history[-10:] # Last 10 operations
305
- metrics['recent_gradient_smoothing'] = {
306
- 'average_metrics': {
307
- key: np.mean([m.get(key, 0) for m in recent_gradient_metrics])
308
- for key in recent_gradient_metrics[0].keys()
309
- } if recent_gradient_metrics else {}
310
- }
311
-
312
- return metrics
313
-
314
- def save_enhanced_checkpoint(self,
315
- checkpoint_path: str,
316
- epoch: int,
317
- metrics: Dict[str, float]):
318
- """Save checkpoint with MarkovSpline state."""
319
-
320
- # Standard checkpoint data
321
- checkpoint = {
322
- 'epoch': epoch,
323
- 'model_state_dict': self.model.state_dict(),
324
- 'optimizer_state_dict': self.optimizer.state_dict(),
325
- 'metrics': metrics,
326
- 'config': self.model.get_config()
327
- }
328
-
329
- # Add MarkovSpline metrics
330
- checkpoint['markov_spline_metrics'] = self.get_markov_spline_metrics()
331
- checkpoint['markov_spline_config'] = {
332
- 'gradient_smoothing': self.gradient_smoothing,
333
- 'data_smoothing': self.data_smoothing,
334
- 'smoothing_strength': self.smoothing_strength
335
- }
336
-
337
- # Save MarkovSpline module state
338
- markov_state_path = Path(checkpoint_path).parent / 'markov_spline_state'
339
- self.markov_module.save_module_state(markov_state_path)
340
-
341
- torch.save(checkpoint, checkpoint_path)
342
- print(f"✅ Enhanced checkpoint saved: {checkpoint_path}")
343
-
344
-
345
- def create_markov_enhanced_training_config(base_config: Dict) -> Dict:
346
- """Create training configuration with MarkovSpline enhancements."""
347
-
348
- enhanced_config = base_config.copy()
349
-
350
- # MarkovSpline specific settings
351
- enhanced_config.update({
352
- 'markov_spline': {
353
- 'enabled': True,
354
- 'gradient_smoothing': True,
355
- 'data_smoothing': True,
356
- 'smoothing_strength': 0.1,
357
- 'num_states': 10,
358
- 'spline_type': 'cubic',
359
- 'adaptive_smoothing': True
360
- },
361
- 'data_preprocessing': {
362
- 'smooth_training_data': True,
363
- 'preserve_features': True,
364
- 'preprocessing_strength': 0.15
365
- },
366
- 'gradient_optimization': {
367
- 'smooth_gradients': True,
368
- 'momentum_states': 10,
369
- 'learning_rate_smoothing': 0.2
370
- }
371
- })
372
-
373
- return enhanced_config
374
-
375
-
376
- def run_markov_enhanced_training(config_file: str = None):
377
- """Run BitTransformerLM training with MarkovSpline enhancements."""
378
-
379
- # Load configuration
380
- if config_file and os.path.exists(config_file):
381
- with open(config_file, 'r') as f:
382
- config = json.load(f)
383
- else:
384
- # Default enhanced configuration
385
- config = create_markov_enhanced_training_config({
386
- 'model': {
387
- 'd_model': 128,
388
- 'nhead': 8,
389
- 'num_layers': 4,
390
- 'dim_feedforward': 512,
391
- 'max_seq_len': 512
392
- },
393
- 'training': {
394
- 'batch_size': 8,
395
- 'learning_rate': 1e-4,
396
- 'epochs': 10,
397
- 'max_grad_norm': 1.0
398
- }
399
- })
400
-
401
- print("🌊 Starting MarkovSpline-Enhanced BitTransformerLM Training")
402
- print(f"📋 Configuration: {json.dumps(config, indent=2)}")
403
-
404
- # Initialize model
405
- model_config = config['model']
406
- model = BitTransformerLM(**model_config)
407
-
408
- # Initialize enhanced trainer
409
- trainer = MarkovSplineEnhancedTrainer(
410
- model=model,
411
- markov_config=config.get('markov_spline'),
412
- gradient_smoothing=config['markov_spline']['gradient_smoothing'],
413
- data_smoothing=config['markov_spline']['data_smoothing'],
414
- smoothing_strength=config['markov_spline']['smoothing_strength'],
415
- **config['training']
416
- )
417
-
418
- print("🚀 Enhanced training pipeline initialized successfully!")
419
- return trainer, config
420
-
421
-
422
- if __name__ == '__main__':
423
- import argparse
424
-
425
- parser = argparse.ArgumentParser(description='MarkovSpline-Enhanced BitTransformerLM Training')
426
- parser.add_argument('--config', '-c', help='Configuration file path')
427
- parser.add_argument('--output-dir', '-o', default='./markov_enhanced_checkpoints',
428
- help='Output directory for checkpoints')
429
-
430
- args = parser.parse_args()
431
-
432
- # Create output directory
433
- os.makedirs(args.output_dir, exist_ok=True)
434
-
435
- # Run enhanced training
436
- trainer, config = run_markov_enhanced_training(args.config)
437
-
438
- print(f"📊 MarkovSpline metrics: {trainer.get_markov_spline_metrics()}")