Edwin Salguero
Enhanced FRED ML with improved Reports & Insights page, fixed alignment analysis, and comprehensive analytics improvements
2469150
| #!/usr/bin/env python3 | |
| """ | |
| Fixes Demonstration | |
| Demonstrate the fixes applied to the economic analysis pipeline | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| def create_test_data(): | |
| """Create test data to demonstrate fixes""" | |
| # Create date range | |
| dates = pd.date_range('2020-01-01', '2024-12-31', freq='Q') | |
| # Test data with the issues | |
| data = { | |
| 'GDPC1': [22000, 22100, 22200, 22300, 22400, 22500, 22600, 22700, 22800, 22900, 23000, 23100, 23200, 23300, 23400, 23500, 23600, 23700, 23800, 23900], # Billions | |
| 'CPIAUCSL': [258.0, 258.5, 259.0, 259.5, 260.0, 260.5, 261.0, 261.5, 262.0, 262.5, 263.0, 263.5, 264.0, 264.5, 265.0, 265.5, 266.0, 266.5, 267.0, 267.5], # Index | |
| 'INDPRO': [100.0, 100.5, 101.0, 101.5, 102.0, 102.5, 103.0, 103.5, 104.0, 104.5, 105.0, 105.5, 106.0, 106.5, 107.0, 107.5, 108.0, 108.5, 109.0, 109.5], # Index | |
| 'RSAFS': [500000, 502000, 504000, 506000, 508000, 510000, 512000, 514000, 516000, 518000, 520000, 522000, 524000, 526000, 528000, 530000, 532000, 534000, 536000, 538000], # Millions | |
| 'FEDFUNDS': [0.08, 0.09, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27], # Decimal form | |
| 'DGS10': [1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4] # Decimal form | |
| } | |
| df = pd.DataFrame(data, index=dates) | |
| return df | |
| def demonstrate_fixes(): | |
| """Demonstrate the fixes applied""" | |
| print("=== ECONOMIC ANALYSIS FIXES DEMONSTRATION ===\n") | |
| # Create test data | |
| raw_data = create_test_data() | |
| print("1. ORIGINAL DATA (with issues):") | |
| print(raw_data.tail()) | |
| print() | |
| print("2. APPLYING FIXES:") | |
| print() | |
| # Fix 1: Unit Normalization | |
| print("FIX 1: Unit Normalization") | |
| print("-" * 30) | |
| normalized_data = raw_data.copy() | |
| # Apply unit fixes | |
| normalized_data['GDPC1'] = raw_data['GDPC1'] / 1000 # Billions to trillions | |
| normalized_data['RSAFS'] = raw_data['RSAFS'] / 1000 # Millions to billions | |
| normalized_data['FEDFUNDS'] = raw_data['FEDFUNDS'] * 100 # Decimal to percentage | |
| normalized_data['DGS10'] = raw_data['DGS10'] * 100 # Decimal to percentage | |
| print("After unit normalization:") | |
| print(normalized_data.tail()) | |
| print() | |
| # Fix 2: Growth Rate Calculation | |
| print("FIX 2: Proper Growth Rate Calculation") | |
| print("-" * 40) | |
| growth_data = normalized_data.pct_change() * 100 | |
| growth_data = growth_data.dropna() | |
| print("Growth rates (percent change):") | |
| print(growth_data.tail()) | |
| print() | |
| # Fix 3: Safe MAPE Calculation | |
| print("FIX 3: Safe MAPE Calculation") | |
| print("-" * 30) | |
| # Test MAPE with problematic data | |
| actual_problematic = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) | |
| forecast_problematic = np.array([0.15, 0.25, 0.35, 0.45, 0.55]) | |
| # Original MAPE (can fail) | |
| try: | |
| original_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / actual_problematic)) * 100 | |
| print(f"Original MAPE: {original_mape:.2f}%") | |
| except: | |
| print("Original MAPE: ERROR (division by zero)") | |
| # Fixed MAPE | |
| denominator = np.maximum(np.abs(actual_problematic), 1e-5) | |
| fixed_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / denominator)) * 100 | |
| print(f"Fixed MAPE: {fixed_mape:.2f}%") | |
| print() | |
| # Fix 4: Forecast Period Scaling | |
| print("FIX 4: Forecast Period Scaling") | |
| print("-" * 35) | |
| base_periods = 4 | |
| freq_scaling = {'D': 90, 'M': 3, 'Q': 1} | |
| print("Original forecast_periods = 4") | |
| print("Scaled by frequency:") | |
| for freq, scale in freq_scaling.items(): | |
| scaled = base_periods * scale | |
| print(f" {freq} (daily): {base_periods} -> {scaled} periods") | |
| print() | |
| # Fix 5: Correlation Analysis with Normalized Data | |
| print("FIX 5: Correlation Analysis with Normalized Data") | |
| print("-" * 50) | |
| # Original correlation (dominated by scale) | |
| original_corr = raw_data.corr() | |
| print("Original correlation (scale-dominated):") | |
| print(original_corr.round(3)) | |
| print() | |
| # Fixed correlation (normalized) | |
| fixed_corr = growth_data.corr() | |
| print("Fixed correlation (normalized growth rates):") | |
| print(fixed_corr.round(3)) | |
| print() | |
| # Fix 6: Data Quality Metrics | |
| print("FIX 6: Enhanced Data Quality Metrics") | |
| print("-" * 40) | |
| # Calculate comprehensive quality metrics | |
| quality_metrics = {} | |
| for column in growth_data.columns: | |
| series = growth_data[column].dropna() | |
| quality_metrics[column] = { | |
| 'mean': series.mean(), | |
| 'std': series.std(), | |
| 'skewness': series.skew(), | |
| 'kurtosis': series.kurtosis(), | |
| 'missing_pct': (growth_data[column].isna().sum() / len(growth_data)) * 100 | |
| } | |
| print("Quality metrics for growth rates:") | |
| for col, metrics in quality_metrics.items(): | |
| print(f" {col}:") | |
| print(f" Mean: {metrics['mean']:.4f}%") | |
| print(f" Std: {metrics['std']:.4f}%") | |
| print(f" Skewness: {metrics['skewness']:.4f}") | |
| print(f" Kurtosis: {metrics['kurtosis']:.4f}") | |
| print(f" Missing: {metrics['missing_pct']:.1f}%") | |
| print() | |
| # Summary of fixes | |
| print("=== SUMMARY OF FIXES APPLIED ===") | |
| print() | |
| fixes = [ | |
| "1. Unit Normalization:", | |
| " • GDP: billions → trillions", | |
| " • Retail Sales: millions → billions", | |
| " • Interest Rates: decimal → percentage", | |
| "", | |
| "2. Growth Rate Calculation:", | |
| " • Explicit percent change calculation", | |
| " • Proper interpretation of results", | |
| "", | |
| "3. Safe MAPE Calculation:", | |
| " • Added epsilon to prevent division by zero", | |
| " • More robust error metrics", | |
| "", | |
| "4. Forecast Period Scaling:", | |
| " • Scale periods by data frequency", | |
| " • Appropriate horizons for different series", | |
| "", | |
| "5. Data Normalization:", | |
| " • Z-score or growth rate normalization", | |
| " • Prevents scale bias in correlations", | |
| "", | |
| "6. Stationarity Enforcement:", | |
| " • ADF tests before causality analysis", | |
| " • Differencing for non-stationary series", | |
| "", | |
| "7. Enhanced Error Handling:", | |
| " • Robust missing data handling", | |
| " • Graceful failure recovery", | |
| "" | |
| ] | |
| for fix in fixes: | |
| print(fix) | |
| print("=== IMPACT OF FIXES ===") | |
| print() | |
| impacts = [ | |
| "• More accurate economic interpretations", | |
| "• Proper scale comparisons between indicators", | |
| "• Robust forecasting with appropriate horizons", | |
| "• Reliable statistical tests and correlations", | |
| "• Better error handling and data quality", | |
| "• Consistent frequency alignment", | |
| "• Safe mathematical operations" | |
| ] | |
| for impact in impacts: | |
| print(impact) | |
| print() | |
| print("These fixes address all the major math issues identified in the original analysis.") | |
| if __name__ == "__main__": | |
| demonstrate_fixes() |