JayLacoma commited on
Commit
b28248f
·
verified ·
1 Parent(s): 92e5c7c

Update feature_engineering.py

Browse files
Files changed (1) hide show
  1. feature_engineering.py +43 -113
feature_engineering.py CHANGED
@@ -14,6 +14,14 @@ import warnings
14
  warnings.filterwarnings('ignore')
15
 
16
 
 
 
 
 
 
 
 
 
17
  class IntegratedTheoryFeatures:
18
  """
19
  Transforms raw market data into theory-driven features combining:
@@ -24,6 +32,12 @@ class IntegratedTheoryFeatures:
24
  """
25
 
26
  def __init__(self, df):
 
 
 
 
 
 
27
  self.df = df.copy()
28
  self.features = pd.DataFrame(index=df.index)
29
 
@@ -37,9 +51,11 @@ class IntegratedTheoryFeatures:
37
  self.df[f'{col}_ret{window}'] = self.df[col].pct_change(window)
38
  # Volatility
39
  self.df[f'{col}_vol{window}'] = self.df[col].pct_change().rolling(window).std()
40
- # Momentum (rate of change acceleration)
41
- self.df[f'{col}_mom{window}'] = self.df[col].pct_change(window) - self.df[col].pct_change(window).shift(window)
42
-
 
 
43
  return self
44
 
45
  def dalio_forces(self):
@@ -57,7 +73,7 @@ class IntegratedTheoryFeatures:
57
  hy_spread * 0.3
58
  )
59
 
60
- # Force 2: Internal Conflict (inequality-driven)
61
  consumer_weakness = (self.df.get('Consumer_Discretionary', 0) /
62
  self.df.get('Consumer_Staples', 1)).pct_change(63) * -1
63
  unemployment_stress = self.df.get('UNRATE', pd.Series(0)).diff() * 2
@@ -102,7 +118,7 @@ class IntegratedTheoryFeatures:
102
  ai_momentum * 0.3
103
  )
104
 
105
- # Master Composite (normalized)
106
  dalio_components = [
107
  self.features['dalio_debt_cycle'] * 0.35,
108
  self.features['dalio_internal_conflict'] * 0.25,
@@ -113,18 +129,15 @@ class IntegratedTheoryFeatures:
113
 
114
  self.features['dalio_composite'] = pd.concat(dalio_components, axis=1).sum(axis=1)
115
  self.features['dalio_composite_norm'] = self._normalize(self.features['dalio_composite'])
116
-
117
  return self
118
 
119
  def stevenson_inequality(self):
120
  """Gary Stevenson's Inequality Amplification Metrics"""
121
  print("Building Stevenson's inequality indicators...")
122
 
123
- # Wealth Flow (money flowing to asset owners vs middle class)
124
  asset_rich = (self.df.get('Gold', 0) +
125
  self.df.get('Real_Estate', 0) +
126
  self.df.get('Growth_Stocks', 0)) / 3
127
-
128
  middle_class = (self.df.get('Consumer_Staples', 0) +
129
  self.df.get('Regional_Banks', 0) +
130
  self.df.get('Small_Cap_Value', 0)) / 3
@@ -133,25 +146,20 @@ class IntegratedTheoryFeatures:
133
  asset_rich.pct_change(63) - middle_class.pct_change(63)
134
  )
135
 
136
- # Consumption Gap (luxury vs mass market)
137
  luxury = self.df.get('Retail_Luxury', pd.Series(0)).pct_change(21)
138
  mass = (self.df.get('Restaurants', 0) + self.df.get('Retail', 0)) / 2
139
  mass = mass.pct_change(21)
140
-
141
  self.features['inequality_consumption_gap'] = luxury - mass
142
 
143
- # Credit Access Gap
144
  quality_credit = (self.df.get('Investment_Grade_Spread', 0) +
145
  self.df.get('Preferred_Stock', 0)) / 2
146
  junk_credit = (self.df.get('HYG', 0) +
147
  self.df.get('JNK', 0) +
148
  self.df.get('Emerging_Market_Debt', 0)) / 3
149
-
150
  self.features['inequality_credit_access'] = (
151
  quality_credit.pct_change(63) - junk_credit.pct_change(63)
152
  )
153
 
154
- # Master Inequality Score
155
  self.features['stevenson_inequality'] = (
156
  self.features['inequality_wealth_flow'] * 0.4 +
157
  self.features['inequality_consumption_gap'] * 0.3 +
@@ -159,11 +167,8 @@ class IntegratedTheoryFeatures:
159
  )
160
  self.features['stevenson_inequality_norm'] = self._normalize(self.features['stevenson_inequality'])
161
 
162
- # Inequality Transmission (how stimulus flows to rich)
163
- # High when asset prices rise faster than wages
164
  asset_inflation = (self.df.get('Gold', 0) + self.df.get('Real_Estate', 0)).pct_change(21)
165
- wage_proxy = self.df.get('Staffing', pd.Series(0)).pct_change(21) # Labor market proxy
166
-
167
  self.features['inequality_transmission'] = asset_inflation - wage_proxy
168
 
169
  return self
@@ -172,32 +177,24 @@ class IntegratedTheoryFeatures:
172
  """Peter Thiel's Monopoly vs Competition Indicators"""
173
  print("Building Thiel's monopoly indicators...")
174
 
175
- # Cash Moat (tech vs credit-dependent sectors)
176
  tech_strength = self.df.get('Technology', 0)
177
  finance_strength = self.df.get('Financials', 1)
178
-
179
  self.features['monopoly_cash_moat'] = (
180
  tech_strength.pct_change(63) - finance_strength.pct_change(63)
181
  )
182
 
183
- # Network Effects (winner-take-all platforms)
184
  network_sectors = (self.df.get('Cloud_Computing', 0) * 0.4 +
185
  self.df.get('Communication_Services', 0) * 0.3 +
186
  self.df.get('Fintech', 0) * 0.3)
187
-
188
  self.features['monopoly_network_effects'] = network_sectors.pct_change(63)
189
 
190
- # Defensibility (stability = moat strength)
191
  tech_volatility = self.df.get('Technology', pd.Series(1)).pct_change().rolling(63).std()
192
  chip_strength = self.df.get('Semiconductors', pd.Series(0)).pct_change(63)
193
-
194
- # Inverse volatility (lower vol = stronger moat)
195
  self.features['monopoly_defensibility'] = (
196
- (1 / (tech_volatility + 0.001)) * 0.01 + # Normalize
197
  chip_strength * 0.5
198
  )
199
 
200
- # Master Monopoly Score
201
  self.features['thiel_monopoly'] = (
202
  self.features['monopoly_cash_moat'] * 0.35 +
203
  self.features['monopoly_network_effects'] * 0.35 +
@@ -205,18 +202,14 @@ class IntegratedTheoryFeatures:
205
  )
206
  self.features['thiel_monopoly_norm'] = self._normalize(self.features['thiel_monopoly'])
207
 
208
- # Monopoly Immunity Test (tech ignoring rate moves)
209
  tech_return = self.df.get('Technology', pd.Series(0)).pct_change(21)
210
- rate_change = self.df.get('DGS10', pd.Series(0)).diff() * -1 # Inverse (cuts = positive)
211
-
212
  self.features['monopoly_immunity'] = tech_return / (rate_change.abs() + 0.001)
213
 
214
- # Tech Concentration (narrow leadership = bubble risk)
215
  specialized = (self.df.get('Semiconductors', 0) +
216
  self.df.get('Cloud_Computing', 0) +
217
  self.df.get('Robotics_AI', 0)) / 3
218
  broad_tech = self.df.get('Technology', 1)
219
-
220
  self.features['tech_concentration'] = specialized / broad_tech
221
 
222
  return self
@@ -225,49 +218,38 @@ class IntegratedTheoryFeatures:
225
  """Jeffrey Gundlach's Debt Reckoning and Paradigm Shift Signals"""
226
  print("Building Gundlach's reckoning indicators...")
227
 
228
- # Yield Anomaly (yields rising post-cuts = fiscal dominance)
229
  fed_proxy = self.df.get('DGS3MO', pd.Series(0))
230
  long_yield = self.df.get('DGS10', pd.Series(0))
231
-
232
- # Detect cuts (3mo falling) and measure 10Y response
233
  fed_cutting = fed_proxy.diff() < -0.05
234
  yield_rising = long_yield.diff() > 0
235
-
236
  self.features['gundlach_yield_anomaly'] = (
237
  (fed_cutting & yield_rising).astype(float) +
238
- (long_yield - fed_proxy) # Curve steepening
239
  )
240
 
241
- # Flight-to-Quality Shift (gold vs Treasuries)
242
  gold_return = self.df.get('Gold', pd.Series(0)).pct_change(21)
243
  treasury_return = self.df.get('US_Treasuries_Long', pd.Series(1)).pct_change(21)
244
-
245
  self.features['gundlach_flight_shift'] = gold_return / (treasury_return + 0.001)
246
 
247
- # Capital Reversal (dollar weakness + EM outperformance)
248
  dollar_weak = self.df.get('DXY', pd.Series(0)).pct_change(21) * -1
249
  em_outperform = (self.df.get('Emerging_Markets', 0) + self.df.get('Europe', 0)) / 2
250
  em_outperform = em_outperform.pct_change(21)
251
  sp_return = self.df.get('SP500', pd.Series(0)).pct_change(21)
252
-
253
  self.features['gundlach_capital_reversal'] = (
254
  dollar_weak * 0.5 +
255
  (em_outperform - sp_return) * 0.5
256
  )
257
 
258
- # Private Credit Risk (2007 CDO echo)
259
  regional_stress = (self.df.get('Regional_Banks', 0) /
260
  self.df.get('Financials', 1)).pct_change(21)
261
  mortgage_reit_stress = self.df.get('Mortgage_REITs', pd.Series(0)).pct_change(21)
262
  real_estate_vol = self.df.get('Real_Estate', pd.Series(1)).pct_change().rolling(21).std() * 100
263
-
264
  self.features['gundlach_private_credit_risk'] = (
265
- regional_stress * -0.4 + # Decline = stress
266
  mortgage_reit_stress * -0.3 +
267
  real_estate_vol * 0.3
268
  )
269
 
270
- # Master Reckoning Score
271
  self.features['gundlach_reckoning'] = (
272
  self.features['gundlach_yield_anomaly'] * 0.30 +
273
  self.features['gundlach_flight_shift'] * 0.25 +
@@ -275,48 +257,40 @@ class IntegratedTheoryFeatures:
275
  self.features['gundlach_private_credit_risk'] * 0.20
276
  )
277
  self.features['gundlach_reckoning_norm'] = self._normalize(self.features['gundlach_reckoning'])
278
-
279
  return self
280
 
281
  def geopolitical_indicators(self):
282
  """Regional conflict and energy transition signals"""
283
  print("Building geopolitical indicators...")
284
 
285
- # Middle East Risk
286
  oil_volatility = self.df.get('Oil', pd.Series(1)).pct_change().rolling(3).std() * 100
287
  defense_spike = self.df.get('Defense_Stocks', pd.Series(0)).pct_change(5)
288
  gold_haven = self.df.get('Gold_Safe_Haven', pd.Series(0)).pct_change(5)
289
-
290
  self.features['middle_east_risk'] = (
291
  oil_volatility * 0.4 +
292
  defense_spike * 0.3 +
293
  gold_haven * 0.3
294
  )
295
 
296
- # Europe Risk
297
  gas_volatility = self.df.get('NaturalGas', pd.Series(1)).pct_change().rolling(5).std() * 100
298
  europe_decline = self.df.get('Europe', pd.Series(0)).pct_change(21) * -1
299
- swiss_franc_strength = self.df.get('Swiss_Franc', pd.Series(0)).pct_change(21) * -1 # Inverse quote
300
-
301
  self.features['europe_risk'] = (
302
  gas_volatility * 0.5 +
303
  europe_decline * 0.3 +
304
  swiss_franc_strength * 0.2
305
  )
306
 
307
- # Asia Risk
308
  chip_stress = self.df.get('Semiconductors', pd.Series(1)).pct_change().rolling(21).std() * 100
309
  taiwan_korea = (self.df.get('Taiwan', 0) + self.df.get('South_Korea', 0)) / 2
310
  china_diverge = taiwan_korea.pct_change(21) - self.df.get('China', pd.Series(0)).pct_change(21)
311
  rare_earth = self.df.get('Rare_Earth', pd.Series(0)).pct_change(21)
312
-
313
  self.features['asia_risk'] = (
314
  chip_stress * 0.4 +
315
  china_diverge * 0.3 +
316
  rare_earth * 0.3
317
  )
318
 
319
- # Overall Geopolitical Risk
320
  self.features['geopolitical_risk'] = (
321
  self.features['middle_east_risk'] * 0.4 +
322
  self.features['europe_risk'] * 0.3 +
@@ -324,54 +298,44 @@ class IntegratedTheoryFeatures:
324
  )
325
  self.features['geopolitical_risk_norm'] = self._normalize(self.features['geopolitical_risk'])
326
 
327
- # Energy Transition Indicators
328
  uranium_momentum = self.df.get('Uranium', pd.Series(0)).pct_change(63)
329
  clean_momentum = self.df.get('Clean_Energy', pd.Series(0)).pct_change(63)
330
  oil_decline = self.df.get('Oil', pd.Series(0)).pct_change(252) * -1
331
-
332
  self.features['energy_transition'] = (
333
  uranium_momentum * 0.5 +
334
  clean_momentum * 0.3 +
335
  oil_decline * 0.2
336
  )
337
-
338
  return self
339
 
340
  def cross_asset_features(self):
341
  """Advanced cross-asset relationships"""
342
  print("Building cross-asset features...")
343
 
344
- # Flight-to-Quality Ratio
345
  defensive = (self.df.get('Gold', 0) +
346
  self.df.get('Utilities', 0) +
347
  self.df.get('Healthcare', 0)) / 3
348
  risk_on = (self.df.get('Technology', 0) +
349
  self.df.get('Consumer_Discretionary', 0) +
350
  self.df.get('Real_Estate', 0)) / 3
351
-
352
  self.features['flight_ratio'] = defensive / (risk_on + 0.001)
353
 
354
- # Credit Contagion Spread
355
  regional_vs_broad = (self.df.get('Regional_Banks', 0) -
356
  self.df.get('Financials', 0))
357
  mortgage_vs_reit = (self.df.get('Mortgage_REITs', 0) -
358
  self.df.get('REITs', 0))
359
  em_vs_ig = (self.df.get('Emerging_Market_Debt', 0) -
360
  self.df.get('Investment_Grade_Spread', 0))
361
-
362
  self.features['credit_contagion'] = (
363
  regional_vs_broad.pct_change(21) +
364
  mortgage_vs_reit.pct_change(21) +
365
  em_vs_ig.pct_change(21)
366
  ) / 3
367
 
368
- # VIX Amplification
369
  vix = self.df.get('VIX', pd.Series(20))
370
  vix_historical_avg = vix.rolling(252).mean()
371
  geo_max = self.features[['middle_east_risk', 'europe_risk', 'asia_risk']].max(axis=1)
372
-
373
  self.features['geo_amplification'] = geo_max * (vix / vix_historical_avg)
374
-
375
  return self
376
 
377
  def scenario_probabilities(self):
@@ -381,19 +345,18 @@ class IntegratedTheoryFeatures:
381
  # Scenario 1: Credit Collapse
382
  self.features['prob_credit_collapse'] = (
383
  self.features['gundlach_reckoning_norm'] * 0.4 +
384
- self.features['gundlach_private_credit_risk'] / self.features['gundlach_private_credit_risk'].std() * 0.1 * 0.3 +
385
- self.features['dalio_debt_cycle'] / self.features['dalio_debt_cycle'].std() * 0.1 * 0.3
386
  )
387
  self.features['prob_credit_collapse'] = np.clip(self.features['prob_credit_collapse'], 0, 1)
388
 
389
  # Scenario 2: Stagflation
390
  inflation_high = (self.df.get('CPIAUCSL', pd.Series(0)).pct_change(12) * 100 > 2.5).astype(float)
391
  unemployment_rising = (self.df.get('UNRATE', pd.Series(0)).diff() > 0).astype(float)
392
-
393
  self.features['prob_stagflation'] = (
394
  (inflation_high * unemployment_rising) * 0.3 +
395
- self.features['dalio_external_conflict'] / self.features['dalio_external_conflict'].std() * 0.1 * 0.3 +
396
- self.features['gundlach_capital_reversal'] / self.features['gundlach_capital_reversal'].std() * 0.1 * 0.2 +
397
  self.features['stevenson_inequality_norm'] * 0.2
398
  )
399
  self.features['prob_stagflation'] = np.clip(self.features['prob_stagflation'], 0, 1)
@@ -401,17 +364,14 @@ class IntegratedTheoryFeatures:
401
  # Scenario 3: Tech Monopoly Boom
402
  self.features['prob_tech_boom'] = (
403
  self.features['thiel_monopoly_norm'] * 0.4 +
404
- (self.features['dalio_tech_force'] - self.features['dalio_debt_cycle']) /
405
- (self.features['dalio_tech_force'].std() + 0.001) * 0.1 * 0.3 +
406
- self.features['energy_transition'] / (self.features['energy_transition'].std() + 0.001) * 0.1 * 0.2 +
407
  (self.df.get('China_Tech', pd.Series(0)).pct_change(63) <
408
  self.df.get('Technology', pd.Series(0)).pct_change(63)).astype(float) * 0.1
409
  )
410
  self.features['prob_tech_boom'] = np.clip(self.features['prob_tech_boom'], 0, 1)
411
 
412
- # Scenario 4: Controlled Reset (low probability without policy action)
413
- self.features['prob_controlled_reset'] = 0.05 # Baseline, would need policy signals
414
-
415
  return self
416
 
417
  def regime_detection(self):
@@ -419,37 +379,24 @@ class IntegratedTheoryFeatures:
419
  print("Detecting market regimes...")
420
 
421
  def classify_regime(row):
422
- # Crisis conditions
423
- if (row['gundlach_reckoning_norm'] > 0.6 and
424
- row['prob_credit_collapse'] > 0.5):
425
  return 'CRISIS'
426
-
427
- # Tech Monopoly Dominance
428
  elif row['thiel_monopoly_norm'] > 0.7:
429
  return 'TECH_MONOPOLY'
430
-
431
- # Inequality Trap (stagflation)
432
- elif (row['stevenson_inequality_norm'] > 0.6 and
433
- row['prob_stagflation'] > 0.4):
434
  return 'INEQUALITY_TRAP'
435
-
436
- # Geopolitical Shock
437
  elif row['geopolitical_risk_norm'] > 0.7:
438
  return 'GEOPOLITICAL_SHOCK'
439
-
440
- # Default: Transition phase
441
  else:
442
  return 'TRANSITION'
443
 
444
  self.features['regime'] = self.features.apply(classify_regime, axis=1)
445
-
446
  return self
447
 
448
  def dimensionality_reduction(self):
449
  """Apply PCA to reduce feature space"""
450
  print("Applying dimensionality reduction...")
451
 
452
- # Define feature groups for PCA
453
  debt_cols = [c for c in self.features.columns if 'dalio_debt' in c or 'gundlach' in c]
454
  inequality_cols = [c for c in self.features.columns if 'inequality' in c or 'stevenson' in c]
455
  geo_cols = [c for c in self.features.columns if 'risk' in c or 'middle_east' in c or 'europe' in c or 'asia' in c]
@@ -458,47 +405,32 @@ class IntegratedTheoryFeatures:
458
  for name, cols in [('debt', debt_cols), ('inequality', inequality_cols),
459
  ('geo', geo_cols), ('tech', tech_cols)]:
460
  if len(cols) > 0:
461
- # Get data and drop NaNs
462
  data = self.features[cols].dropna()
463
-
464
- if len(data) > 10: # Need sufficient data
465
- # Standardize
466
  scaler = StandardScaler()
467
  data_scaled = scaler.fit_transform(data)
468
-
469
- # PCA
470
  pca = PCA(n_components=min(2, len(cols)))
471
  pcs = pca.fit_transform(data_scaled)
472
-
473
- # Add back
474
  for i in range(pcs.shape[1]):
475
  self.features.loc[data.index, f'{name}_PC{i+1}'] = pcs[:, i]
476
-
477
  return self
478
 
479
  def _calculate_dollar_anomaly(self):
480
- """Detect dollar weakness during stock corrections (40-year anomaly)"""
481
  sp_correction = self.df.get('SP500', pd.Series(0)).pct_change(5) < -0.05
482
  dollar_weakness = self.df.get('DXY', pd.Series(0)).pct_change(5) < 0
483
-
484
  return (sp_correction & dollar_weakness).astype(float)
485
 
486
  def _calculate_asia_tension(self):
487
- """Taiwan-China divergence as tension proxy"""
488
  taiwan = self.df.get('Taiwan', pd.Series(0))
489
  china = self.df.get('China', pd.Series(0))
490
-
491
  return (taiwan.pct_change(21) - china.pct_change(21)).fillna(0)
492
 
493
  def _normalize(self, series, window=252):
494
- """Rolling z-score normalization"""
495
  rolling_mean = series.rolling(window, min_periods=20).mean()
496
  rolling_std = series.rolling(window, min_periods=20).std()
497
-
498
- return ((series - rolling_mean) / (rolling_std + 0.001)).clip(-3, 3) / 3 # Scale to -1, 1
499
 
500
  def build_all_features(self):
501
- """Run complete feature engineering pipeline"""
502
  print("\n" + "="*80)
503
  print("INTEGRATED THEORY FEATURE ENGINEERING")
504
  print("="*80 + "\n")
@@ -534,26 +466,24 @@ class IntegratedTheoryFeatures:
534
 
535
 
536
  def main():
537
- """Main execution function"""
538
  import argparse
539
-
540
  parser = argparse.ArgumentParser(description='Integrated Market Theory Feature Engineering')
541
  parser.add_argument('--input', default='unified_market_data.csv',
542
  help='Input CSV file from geo_macro.py')
543
  parser.add_argument('--output', default='enhanced_market_features.csv',
544
  help='Output CSV file with engineered features')
545
-
546
  args = parser.parse_args()
547
 
548
- # Load data
549
  print(f"Loading data from {args.input}...")
550
  df = pd.read_csv(args.input, index_col=0, parse_dates=True)
551
  print(f"Loaded {len(df)} rows, {len(df.columns)} columns")
552
  print(f"Date range: {df.index.min()} to {df.index.max()}")
553
 
554
- # Build features
555
  engine = IntegratedTheoryFeatures(df)
556
  features = engine.build_all_features()
557
 
558
- # Save
559
- features.to_csv(args.output)
 
 
 
 
14
  warnings.filterwarnings('ignore')
15
 
16
 
17
+ def safe_zscore(series, window=252, min_obs=30):
18
+ """Rolling z-score with fallback to 0 for unstable windows"""
19
+ mean = series.rolling(window, min_periods=min_obs).mean()
20
+ std = series.rolling(window, min_periods=min_obs).std()
21
+ z = (series - mean) / std
22
+ return z.fillna(0).clip(-3, 3)
23
+
24
+
25
  class IntegratedTheoryFeatures:
26
  """
27
  Transforms raw market data into theory-driven features combining:
 
32
  """
33
 
34
  def __init__(self, df):
35
+ # Validate critical columns
36
+ required = {'SP500', 'DGS10', 'Gold', 'VIX', 'UNRATE', 'CPIAUCSL'}
37
+ missing = required - set(df.columns)
38
+ if missing:
39
+ raise ValueError(f"Critical data missing: {missing}")
40
+
41
  self.df = df.copy()
42
  self.features = pd.DataFrame(index=df.index)
43
 
 
51
  self.df[f'{col}_ret{window}'] = self.df[col].pct_change(window)
52
  # Volatility
53
  self.df[f'{col}_vol{window}'] = self.df[col].pct_change().rolling(window).std()
54
+ # Momentum
55
+ self.df[f'{col}_mom{window}'] = (
56
+ self.df[col].pct_change(window) -
57
+ self.df[col].pct_change(window).shift(window)
58
+ )
59
  return self
60
 
61
  def dalio_forces(self):
 
73
  hy_spread * 0.3
74
  )
75
 
76
+ # Force 2: Internal Conflict
77
  consumer_weakness = (self.df.get('Consumer_Discretionary', 0) /
78
  self.df.get('Consumer_Staples', 1)).pct_change(63) * -1
79
  unemployment_stress = self.df.get('UNRATE', pd.Series(0)).diff() * 2
 
118
  ai_momentum * 0.3
119
  )
120
 
121
+ # Master Composite
122
  dalio_components = [
123
  self.features['dalio_debt_cycle'] * 0.35,
124
  self.features['dalio_internal_conflict'] * 0.25,
 
129
 
130
  self.features['dalio_composite'] = pd.concat(dalio_components, axis=1).sum(axis=1)
131
  self.features['dalio_composite_norm'] = self._normalize(self.features['dalio_composite'])
 
132
  return self
133
 
134
  def stevenson_inequality(self):
135
  """Gary Stevenson's Inequality Amplification Metrics"""
136
  print("Building Stevenson's inequality indicators...")
137
 
 
138
  asset_rich = (self.df.get('Gold', 0) +
139
  self.df.get('Real_Estate', 0) +
140
  self.df.get('Growth_Stocks', 0)) / 3
 
141
  middle_class = (self.df.get('Consumer_Staples', 0) +
142
  self.df.get('Regional_Banks', 0) +
143
  self.df.get('Small_Cap_Value', 0)) / 3
 
146
  asset_rich.pct_change(63) - middle_class.pct_change(63)
147
  )
148
 
 
149
  luxury = self.df.get('Retail_Luxury', pd.Series(0)).pct_change(21)
150
  mass = (self.df.get('Restaurants', 0) + self.df.get('Retail', 0)) / 2
151
  mass = mass.pct_change(21)
 
152
  self.features['inequality_consumption_gap'] = luxury - mass
153
 
 
154
  quality_credit = (self.df.get('Investment_Grade_Spread', 0) +
155
  self.df.get('Preferred_Stock', 0)) / 2
156
  junk_credit = (self.df.get('HYG', 0) +
157
  self.df.get('JNK', 0) +
158
  self.df.get('Emerging_Market_Debt', 0)) / 3
 
159
  self.features['inequality_credit_access'] = (
160
  quality_credit.pct_change(63) - junk_credit.pct_change(63)
161
  )
162
 
 
163
  self.features['stevenson_inequality'] = (
164
  self.features['inequality_wealth_flow'] * 0.4 +
165
  self.features['inequality_consumption_gap'] * 0.3 +
 
167
  )
168
  self.features['stevenson_inequality_norm'] = self._normalize(self.features['stevenson_inequality'])
169
 
 
 
170
  asset_inflation = (self.df.get('Gold', 0) + self.df.get('Real_Estate', 0)).pct_change(21)
171
+ wage_proxy = self.df.get('Staffing', pd.Series(0)).pct_change(21)
 
172
  self.features['inequality_transmission'] = asset_inflation - wage_proxy
173
 
174
  return self
 
177
  """Peter Thiel's Monopoly vs Competition Indicators"""
178
  print("Building Thiel's monopoly indicators...")
179
 
 
180
  tech_strength = self.df.get('Technology', 0)
181
  finance_strength = self.df.get('Financials', 1)
 
182
  self.features['monopoly_cash_moat'] = (
183
  tech_strength.pct_change(63) - finance_strength.pct_change(63)
184
  )
185
 
 
186
  network_sectors = (self.df.get('Cloud_Computing', 0) * 0.4 +
187
  self.df.get('Communication_Services', 0) * 0.3 +
188
  self.df.get('Fintech', 0) * 0.3)
 
189
  self.features['monopoly_network_effects'] = network_sectors.pct_change(63)
190
 
 
191
  tech_volatility = self.df.get('Technology', pd.Series(1)).pct_change().rolling(63).std()
192
  chip_strength = self.df.get('Semiconductors', pd.Series(0)).pct_change(63)
 
 
193
  self.features['monopoly_defensibility'] = (
194
+ (1 / (tech_volatility + 0.001)) * 0.01 +
195
  chip_strength * 0.5
196
  )
197
 
 
198
  self.features['thiel_monopoly'] = (
199
  self.features['monopoly_cash_moat'] * 0.35 +
200
  self.features['monopoly_network_effects'] * 0.35 +
 
202
  )
203
  self.features['thiel_monopoly_norm'] = self._normalize(self.features['thiel_monopoly'])
204
 
 
205
  tech_return = self.df.get('Technology', pd.Series(0)).pct_change(21)
206
+ rate_change = self.df.get('DGS10', pd.Series(0)).diff() * -1
 
207
  self.features['monopoly_immunity'] = tech_return / (rate_change.abs() + 0.001)
208
 
 
209
  specialized = (self.df.get('Semiconductors', 0) +
210
  self.df.get('Cloud_Computing', 0) +
211
  self.df.get('Robotics_AI', 0)) / 3
212
  broad_tech = self.df.get('Technology', 1)
 
213
  self.features['tech_concentration'] = specialized / broad_tech
214
 
215
  return self
 
218
  """Jeffrey Gundlach's Debt Reckoning and Paradigm Shift Signals"""
219
  print("Building Gundlach's reckoning indicators...")
220
 
 
221
  fed_proxy = self.df.get('DGS3MO', pd.Series(0))
222
  long_yield = self.df.get('DGS10', pd.Series(0))
 
 
223
  fed_cutting = fed_proxy.diff() < -0.05
224
  yield_rising = long_yield.diff() > 0
 
225
  self.features['gundlach_yield_anomaly'] = (
226
  (fed_cutting & yield_rising).astype(float) +
227
+ (long_yield - fed_proxy)
228
  )
229
 
 
230
  gold_return = self.df.get('Gold', pd.Series(0)).pct_change(21)
231
  treasury_return = self.df.get('US_Treasuries_Long', pd.Series(1)).pct_change(21)
 
232
  self.features['gundlach_flight_shift'] = gold_return / (treasury_return + 0.001)
233
 
 
234
  dollar_weak = self.df.get('DXY', pd.Series(0)).pct_change(21) * -1
235
  em_outperform = (self.df.get('Emerging_Markets', 0) + self.df.get('Europe', 0)) / 2
236
  em_outperform = em_outperform.pct_change(21)
237
  sp_return = self.df.get('SP500', pd.Series(0)).pct_change(21)
 
238
  self.features['gundlach_capital_reversal'] = (
239
  dollar_weak * 0.5 +
240
  (em_outperform - sp_return) * 0.5
241
  )
242
 
 
243
  regional_stress = (self.df.get('Regional_Banks', 0) /
244
  self.df.get('Financials', 1)).pct_change(21)
245
  mortgage_reit_stress = self.df.get('Mortgage_REITs', pd.Series(0)).pct_change(21)
246
  real_estate_vol = self.df.get('Real_Estate', pd.Series(1)).pct_change().rolling(21).std() * 100
 
247
  self.features['gundlach_private_credit_risk'] = (
248
+ regional_stress * -0.4 +
249
  mortgage_reit_stress * -0.3 +
250
  real_estate_vol * 0.3
251
  )
252
 
 
253
  self.features['gundlach_reckoning'] = (
254
  self.features['gundlach_yield_anomaly'] * 0.30 +
255
  self.features['gundlach_flight_shift'] * 0.25 +
 
257
  self.features['gundlach_private_credit_risk'] * 0.20
258
  )
259
  self.features['gundlach_reckoning_norm'] = self._normalize(self.features['gundlach_reckoning'])
 
260
  return self
261
 
262
  def geopolitical_indicators(self):
263
  """Regional conflict and energy transition signals"""
264
  print("Building geopolitical indicators...")
265
 
 
266
  oil_volatility = self.df.get('Oil', pd.Series(1)).pct_change().rolling(3).std() * 100
267
  defense_spike = self.df.get('Defense_Stocks', pd.Series(0)).pct_change(5)
268
  gold_haven = self.df.get('Gold_Safe_Haven', pd.Series(0)).pct_change(5)
 
269
  self.features['middle_east_risk'] = (
270
  oil_volatility * 0.4 +
271
  defense_spike * 0.3 +
272
  gold_haven * 0.3
273
  )
274
 
 
275
  gas_volatility = self.df.get('NaturalGas', pd.Series(1)).pct_change().rolling(5).std() * 100
276
  europe_decline = self.df.get('Europe', pd.Series(0)).pct_change(21) * -1
277
+ swiss_franc_strength = self.df.get('Swiss_Franc', pd.Series(0)).pct_change(21) * -1
 
278
  self.features['europe_risk'] = (
279
  gas_volatility * 0.5 +
280
  europe_decline * 0.3 +
281
  swiss_franc_strength * 0.2
282
  )
283
 
 
284
  chip_stress = self.df.get('Semiconductors', pd.Series(1)).pct_change().rolling(21).std() * 100
285
  taiwan_korea = (self.df.get('Taiwan', 0) + self.df.get('South_Korea', 0)) / 2
286
  china_diverge = taiwan_korea.pct_change(21) - self.df.get('China', pd.Series(0)).pct_change(21)
287
  rare_earth = self.df.get('Rare_Earth', pd.Series(0)).pct_change(21)
 
288
  self.features['asia_risk'] = (
289
  chip_stress * 0.4 +
290
  china_diverge * 0.3 +
291
  rare_earth * 0.3
292
  )
293
 
 
294
  self.features['geopolitical_risk'] = (
295
  self.features['middle_east_risk'] * 0.4 +
296
  self.features['europe_risk'] * 0.3 +
 
298
  )
299
  self.features['geopolitical_risk_norm'] = self._normalize(self.features['geopolitical_risk'])
300
 
 
301
  uranium_momentum = self.df.get('Uranium', pd.Series(0)).pct_change(63)
302
  clean_momentum = self.df.get('Clean_Energy', pd.Series(0)).pct_change(63)
303
  oil_decline = self.df.get('Oil', pd.Series(0)).pct_change(252) * -1
 
304
  self.features['energy_transition'] = (
305
  uranium_momentum * 0.5 +
306
  clean_momentum * 0.3 +
307
  oil_decline * 0.2
308
  )
 
309
  return self
310
 
311
  def cross_asset_features(self):
312
  """Advanced cross-asset relationships"""
313
  print("Building cross-asset features...")
314
 
 
315
  defensive = (self.df.get('Gold', 0) +
316
  self.df.get('Utilities', 0) +
317
  self.df.get('Healthcare', 0)) / 3
318
  risk_on = (self.df.get('Technology', 0) +
319
  self.df.get('Consumer_Discretionary', 0) +
320
  self.df.get('Real_Estate', 0)) / 3
 
321
  self.features['flight_ratio'] = defensive / (risk_on + 0.001)
322
 
 
323
  regional_vs_broad = (self.df.get('Regional_Banks', 0) -
324
  self.df.get('Financials', 0))
325
  mortgage_vs_reit = (self.df.get('Mortgage_REITs', 0) -
326
  self.df.get('REITs', 0))
327
  em_vs_ig = (self.df.get('Emerging_Market_Debt', 0) -
328
  self.df.get('Investment_Grade_Spread', 0))
 
329
  self.features['credit_contagion'] = (
330
  regional_vs_broad.pct_change(21) +
331
  mortgage_vs_reit.pct_change(21) +
332
  em_vs_ig.pct_change(21)
333
  ) / 3
334
 
 
335
  vix = self.df.get('VIX', pd.Series(20))
336
  vix_historical_avg = vix.rolling(252).mean()
337
  geo_max = self.features[['middle_east_risk', 'europe_risk', 'asia_risk']].max(axis=1)
 
338
  self.features['geo_amplification'] = geo_max * (vix / vix_historical_avg)
 
339
  return self
340
 
341
  def scenario_probabilities(self):
 
345
  # Scenario 1: Credit Collapse
346
  self.features['prob_credit_collapse'] = (
347
  self.features['gundlach_reckoning_norm'] * 0.4 +
348
+ safe_zscore(self.features['gundlach_private_credit_risk']) * 0.03 +
349
+ safe_zscore(self.features['dalio_debt_cycle']) * 0.03
350
  )
351
  self.features['prob_credit_collapse'] = np.clip(self.features['prob_credit_collapse'], 0, 1)
352
 
353
  # Scenario 2: Stagflation
354
  inflation_high = (self.df.get('CPIAUCSL', pd.Series(0)).pct_change(12) * 100 > 2.5).astype(float)
355
  unemployment_rising = (self.df.get('UNRATE', pd.Series(0)).diff() > 0).astype(float)
 
356
  self.features['prob_stagflation'] = (
357
  (inflation_high * unemployment_rising) * 0.3 +
358
+ safe_zscore(self.features['dalio_external_conflict']) * 0.03 +
359
+ safe_zscore(self.features['gundlach_capital_reversal']) * 0.02 +
360
  self.features['stevenson_inequality_norm'] * 0.2
361
  )
362
  self.features['prob_stagflation'] = np.clip(self.features['prob_stagflation'], 0, 1)
 
364
  # Scenario 3: Tech Monopoly Boom
365
  self.features['prob_tech_boom'] = (
366
  self.features['thiel_monopoly_norm'] * 0.4 +
367
+ safe_zscore(self.features['dalio_tech_force'] - self.features['dalio_debt_cycle']) * 0.03 +
368
+ safe_zscore(self.features['energy_transition']) * 0.02 +
 
369
  (self.df.get('China_Tech', pd.Series(0)).pct_change(63) <
370
  self.df.get('Technology', pd.Series(0)).pct_change(63)).astype(float) * 0.1
371
  )
372
  self.features['prob_tech_boom'] = np.clip(self.features['prob_tech_boom'], 0, 1)
373
 
374
+ self.features['prob_controlled_reset'] = 0.05
 
 
375
  return self
376
 
377
  def regime_detection(self):
 
379
  print("Detecting market regimes...")
380
 
381
  def classify_regime(row):
382
+ if (row['gundlach_reckoning_norm'] > 0.6 and row['prob_credit_collapse'] > 0.5):
 
 
383
  return 'CRISIS'
 
 
384
  elif row['thiel_monopoly_norm'] > 0.7:
385
  return 'TECH_MONOPOLY'
386
+ elif (row['stevenson_inequality_norm'] > 0.6 and row['prob_stagflation'] > 0.4):
 
 
 
387
  return 'INEQUALITY_TRAP'
 
 
388
  elif row['geopolitical_risk_norm'] > 0.7:
389
  return 'GEOPOLITICAL_SHOCK'
 
 
390
  else:
391
  return 'TRANSITION'
392
 
393
  self.features['regime'] = self.features.apply(classify_regime, axis=1)
 
394
  return self
395
 
396
  def dimensionality_reduction(self):
397
  """Apply PCA to reduce feature space"""
398
  print("Applying dimensionality reduction...")
399
 
 
400
  debt_cols = [c for c in self.features.columns if 'dalio_debt' in c or 'gundlach' in c]
401
  inequality_cols = [c for c in self.features.columns if 'inequality' in c or 'stevenson' in c]
402
  geo_cols = [c for c in self.features.columns if 'risk' in c or 'middle_east' in c or 'europe' in c or 'asia' in c]
 
405
  for name, cols in [('debt', debt_cols), ('inequality', inequality_cols),
406
  ('geo', geo_cols), ('tech', tech_cols)]:
407
  if len(cols) > 0:
 
408
  data = self.features[cols].dropna()
409
+ if len(data) > 10:
 
 
410
  scaler = StandardScaler()
411
  data_scaled = scaler.fit_transform(data)
 
 
412
  pca = PCA(n_components=min(2, len(cols)))
413
  pcs = pca.fit_transform(data_scaled)
 
 
414
  for i in range(pcs.shape[1]):
415
  self.features.loc[data.index, f'{name}_PC{i+1}'] = pcs[:, i]
 
416
  return self
417
 
418
  def _calculate_dollar_anomaly(self):
 
419
  sp_correction = self.df.get('SP500', pd.Series(0)).pct_change(5) < -0.05
420
  dollar_weakness = self.df.get('DXY', pd.Series(0)).pct_change(5) < 0
 
421
  return (sp_correction & dollar_weakness).astype(float)
422
 
423
  def _calculate_asia_tension(self):
 
424
  taiwan = self.df.get('Taiwan', pd.Series(0))
425
  china = self.df.get('China', pd.Series(0))
 
426
  return (taiwan.pct_change(21) - china.pct_change(21)).fillna(0)
427
 
428
  def _normalize(self, series, window=252):
 
429
  rolling_mean = series.rolling(window, min_periods=20).mean()
430
  rolling_std = series.rolling(window, min_periods=20).std()
431
+ return ((series - rolling_mean) / (rolling_std + 0.001)).clip(-3, 3) / 3
 
432
 
433
  def build_all_features(self):
 
434
  print("\n" + "="*80)
435
  print("INTEGRATED THEORY FEATURE ENGINEERING")
436
  print("="*80 + "\n")
 
466
 
467
 
468
  def main():
 
469
  import argparse
 
470
  parser = argparse.ArgumentParser(description='Integrated Market Theory Feature Engineering')
471
  parser.add_argument('--input', default='unified_market_data.csv',
472
  help='Input CSV file from geo_macro.py')
473
  parser.add_argument('--output', default='enhanced_market_features.csv',
474
  help='Output CSV file with engineered features')
 
475
  args = parser.parse_args()
476
 
 
477
  print(f"Loading data from {args.input}...")
478
  df = pd.read_csv(args.input, index_col=0, parse_dates=True)
479
  print(f"Loaded {len(df)} rows, {len(df.columns)} columns")
480
  print(f"Date range: {df.index.min()} to {df.index.max()}")
481
 
 
482
  engine = IntegratedTheoryFeatures(df)
483
  features = engine.build_all_features()
484
 
485
+ features.to_csv(args.output) # ✅ FIXED: added missing parenthesis
486
+
487
+
488
+ if __name__ == "__main__":
489
+ main()