""" Oregon Timber Market Timing Model — Final Implementation ========================================================= Predicts Douglas fir stumpage prices and recommends optimal marketing timing for a 20-acre Oregon timber tract using Ridge regression with walk-forward validation and Monte Carlo optimal stopping. Based on: Faustmann rotation model (1849), Clarke & Reed real-options extension (1989), and Pacific NW timber economics literature (USDA PNW-GTR-423, PNW-RP-436). """ import pandas as pd import numpy as np import matplotlib; matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.linear_model import Ridge from sklearn.metrics import r2_score, mean_absolute_error import json, warnings warnings.filterwarnings('ignore') np.random.seed(42) OUT_DIR = '/app' # ── Load ──────────────────────────────────────────────────────────────────── df = pd.read_csv(f'{OUT_DIR}/oregon_timber_data.csv', parse_dates=['date']) df = df.set_index('date') TARGET = 'stumpage_price_mbf' print(f"Loaded: {len(df)} quarters, {df.index[0].date()} → {df.index[-1].date()}") print(f"Price: ${df[TARGET].min():.0f}–${df[TARGET].max():.0f}/MBF, μ=${df[TARGET].mean():.0f}\n") # ── Features ───────────────────────────────────────────────────────────────── def build_features(df_): p = df_[TARGET] out = pd.DataFrame({ 'price_lag1': p.shift(1), 'price_lag2': p.shift(2), 'price_lag4': p.shift(4), 'price_roll4': p.rolling(4).mean().shift(1), 'price_chg_yoy': p.pct_change(4).shift(1), 'lumber_ppi': df_['lumber_ppi'], 'lumber_ppi_lag1': df_['lumber_ppi'].shift(1), 'lumber_chg_yoy': df_['lumber_ppi'].pct_change(4), 'lumber_futures': df_['lumber_futures_mbf'], 'housing_lag2': df_['housing_starts'].shift(2), 'housing_lag4': df_['housing_starts'].shift(4), 'housing_chg': df_['housing_starts'].pct_change(4).shift(2), 'mortgage_lag1': df_['mortgage_30yr'].shift(1), 'mortgage_chg': (df_['mortgage_30yr'] - df_['mortgage_30yr'].shift(4)).shift(1), 'cad_lag1': df_['cad_usd'].shift(1), 'unemployment': df_['unemployment_rate'].shift(1), 'sentiment': df_['consumer_sentiment'].shift(1), }, index=df_.index) q = df_.index.quarter out['q1'] = (q == 1).astype(float) out['q2'] = (q == 2).astype(float) out['q3'] = (q == 3).astype(float) out['year_norm'] = (df_.index.year - 1993) / 31.0 out[TARGET] = df_[TARGET] return out.dropna() feat_df = build_features(df) X_cols = [c for c in feat_df.columns if c != TARGET] X, y = feat_df[X_cols].values, feat_df[TARGET].values n = len(feat_df) print(f"Features: {len(X_cols)} | Training obs: {n}") # ── Walk-forward CV ───────────────────────────────────────────────────────── print("\n=== Walk-Forward Validation (2015–2024) ===\n") windows = [(80,88), (88,96), (96,104), (104,112), (112,n)] actual, pred = [], [] for tr_end, te_end in windows: m = Pipeline([('sc', StandardScaler()), ('ridge', Ridge(alpha=5.0))]) m.fit(X[:tr_end], y[:tr_end]) yp = m.predict(X[tr_end:te_end]) r2v = r2_score(y[tr_end:te_end], yp) actual.extend(y[tr_end:te_end]); pred.extend(yp) print(f" Train={tr_end} Test={feat_df.index[tr_end].strftime('%Y')}→{feat_df.index[min(te_end,n)-1].strftime('%Y')} " f"({te_end-tr_end}q) | R²={r2v:+.3f} MAE=${mean_absolute_error(y[tr_end:te_end], yp):.0f}/MBF") ovr_r2 = r2_score(actual, pred) ovr_mae = mean_absolute_error(actual, pred) print(f"\n Overall R²={ovr_r2:.3f} MAE=${ovr_mae:.0f}/MBF (explains {ovr_r2*100:.0f}% of out-of-sample variance)") # ── Final model ───────────────────────────────────────────────────────────── model = Pipeline([('sc', StandardScaler()), ('ridge', Ridge(alpha=5.0))]) model.fit(X, y) print(f"\nFinal model (full dataset) R² = {r2_score(y, model.predict(X)):.3f}") # ── 8-Quarter Forecast ────────────────────────────────────────────────────── print("\n=== 8-Quarter Forecast ===\n") def project_series(s, n_ahead, reversion=0.1): v = s.iloc[-1] m = s.tail(20).mean() std = s.tail(20).std() out = [] for _ in range(n_ahead): v = v * (1 - reversion) + m * reversion + np.random.normal(0, std * 0.05) out.append(v) return out n_ahead = 8 fc_dates = pd.date_range(df.index[-1] + pd.DateOffset(months=3), periods=n_ahead, freq='QS') n_boot = 500 boot_preds = np.zeros((n_boot, n_ahead)) for b in range(n_boot): lum = project_series(df['lumber_ppi'], n_ahead, reversion=0.08) luf = project_series(df['lumber_futures_mbf'], n_ahead, reversion=0.08) hou = project_series(df['housing_starts'], n_ahead, reversion=0.06) mor = project_series(df['mortgage_30yr'], n_ahead, reversion=0.05) cad = project_series(df['cad_usd'], n_ahead, reversion=0.05) une = project_series(df['unemployment_rate'], n_ahead, reversion=0.05) sen = project_series(df['consumer_sentiment'], n_ahead, reversion=0.05) hp = list(df[TARGET].values) # historical prices, grows as we forecast for i in range(n_ahead): p1 = hp[-1]; p2 = hp[-2]; p4 = hp[-4] if len(hp)>=4 else hp[-1] pr4 = np.mean(hp[-4:]) pcy = (hp[-1]/hp[-5]-1) if len(hp)>=5 else 0 lpp = lum[i]; lpl = lum[i-1] if i>0 else df['lumber_ppi'].iloc[-1] lcy = (lum[i]/(lum[i-4] if i>=4 else df['lumber_ppi'].iloc[-(4-i)])-1) lfu = luf[i] hl2 = hou[i-2] if i>=2 else df['housing_starts'].iloc[-(2-i)] hl4 = hou[i-4] if i>=4 else df['housing_starts'].iloc[-(4-i)] hcg = (hl2/(hou[max(0,i-6)] if i>=6 else df['housing_starts'].iloc[-(6-i)])-1) ml1 = mor[i-1] if i>0 else df['mortgage_30yr'].iloc[-1] mcg = mor[i] - (mor[i-4] if i>=4 else df['mortgage_30yr'].iloc[-(4-i)]) cl1 = cad[i-1] if i>0 else df['cad_usd'].iloc[-1] ul1 = une[i-1] if i>0 else df['unemployment_rate'].iloc[-1] sl1 = sen[i-1] if i>0 else df['consumer_sentiment'].iloc[-1] qq = fc_dates[i].quarter; yr = (fc_dates[i].year - 1993) / 31.0 row = np.array([[ p1, p2, p4, pr4, pcy, lpp, lpl, lcy, lfu, hl2, hl4, hcg, ml1, mcg, cl1, ul1, sl1, float(qq==1), float(qq==2), float(qq==3), yr ]]) pred_val = model.predict(row)[0] boot_preds[b, i] = pred_val hp.append(pred_val) fc_point = np.median(boot_preds, axis=0) fc_ci10 = np.percentile(boot_preds, 10, axis=0) fc_ci90 = np.percentile(boot_preds, 90, axis=0) fc_ci25 = np.percentile(boot_preds, 25, axis=0) fc_ci75 = np.percentile(boot_preds, 75, axis=0) for i, (d, p, lo, hi) in enumerate(zip(fc_dates, fc_point, fc_ci10, fc_ci90)): print(f" Q{d.quarter} {d.year}: ${p:.0f}/MBF [${lo:.0f} – ${hi:.0f}]") # ── Harvest Timing Decision ───────────────────────────────────────────────── print("\n=== Harvest Timing: 20-Acre Oregon Tract ===\n") ACRES = 20; MBF_AC = 35; TOTAL_MBF = ACRES * MBF_AC HOLD_COST_Q = (400 * ACRES) / 4 cp = df[TARGET].iloc[-1] pct = (df[TARGET] < cp).mean() hi3 = df[TARGET].tail(12).max(); lo3 = df[TARGET].tail(12).min() print(f" Tract: {ACRES} acres × {MBF_AC} MBF/ac = {TOTAL_MBF:,} MBF") print(f" Current price: ${cp:,.0f}/MBF") print(f" Gross value today: ${cp * TOTAL_MBF:,.0f}") print(f" Percentile rank: {pct*100:.0f}th | 3-yr range: ${lo3:,.0f}–${hi3:,.0f}/MBF") # Monte Carlo optimal stopping n_sims = 50_000 lr = np.diff(np.log(df[TARGET].tail(40).values)) mu_q, sig_q = np.mean(lr), np.std(lr) paths = cp * np.cumprod(np.exp(mu_q + sig_q * np.random.randn(n_sims, 8)), axis=1) paths = np.hstack([np.full((n_sims, 1), cp), paths]) net_rev = paths * TOTAL_MBF - (np.arange(9) * HOLD_COST_Q)[np.newaxis, :] opt_q = np.argmax(net_rev, axis=1) timing_pct = np.bincount(opt_q, minlength=9) / n_sims * 100 prob_wait = (np.max(net_rev[:, 1:], axis=1) > net_rev[:, 0]).mean() qlabels = {0:'Now (Q4 2024)', 1:'Q1 2025', 2:'Q2 2025', 3:'Q3 2025', 4:'Q4 2025', 5:'Q1 2026', 6:'Q2 2026', 7:'Q3 2026', 8:'Q4 2026'} print(f"\n Monte Carlo ({n_sims:,} paths, μ={mu_q:.3f}/q, σ={sig_q:.3f}/q):") print(f" Prob. waiting improves: {prob_wait*100:.0f}%") print(f" Expected max net revenue: ${np.mean(np.max(net_rev, axis=1)):,.0f}") print(f" Optimal timing distribution:") for qi, tp in enumerate(timing_pct): print(f" {qlabels[qi]:18s} {tp:5.1f}% {'█'*int(tp/1.5)}") # Scoring score = 0; reasons = [] if pct >= 0.70: score += 2; reasons.append(f"Above-average price ({pct*100:.0f}th percentile)") elif pct >= 0.50: score += 1; reasons.append(f"Near-average price ({pct*100:.0f}th percentile)") else: score -= 1; reasons.append(f"Below-average price ({pct*100:.0f}th percentile)") if fc_point[1] < cp * 0.97: score += 1; reasons.append(f"Model forecasts softening to ~${fc_point[1]:.0f}/MBF (Q2 2025)") elif fc_point[3] > cp * 1.08: score -= 1; reasons.append(f"Model forecasts increase to ~${fc_point[3]:.0f}/MBF in 2025") if prob_wait < 0.50: score += 2; reasons.append(f"Only {prob_wait*100:.0f}% chance waiting improves net revenue") elif prob_wait > 0.65: score -= 1; reasons.append(f"{prob_wait*100:.0f}% of simulations show better outcome if you wait") if cp >= hi3 * 0.80: score += 1; reasons.append(f"Near 3-yr high ({cp/hi3*100:.0f}% of ${hi3:,.0f} peak)") elif cp <= lo3 * 1.15: score -= 2; reasons.append(f"Near 3-yr low — avoid selling now") sq = df.index[-1].quarter if sq == 1: score -= 1; reasons.append("Q1: wet season, poor logging conditions in Pacific NW") elif sq == 3: score += 1; reasons.append("Q3: peak logging season, favorable market timing") if score >= 4: rec, urg, clr = "SELL NOW", "Strong", "green" elif score >= 2: rec, urg, clr = "MARKET SOON", "Moderate", "yellowgreen" elif score >= 0: rec, urg, clr = "MONITOR — 6mo", "Neutral", "orange" else: rec, urg, clr = "WAIT", "Wait for better market", "red" print(f"\n{'='*60}") print(f" RECOMMENDATION: {rec} (score {score}/7, {urg})") print(f"{'='*60}") for r in reasons: print(f" • {r}") # ── Charts ────────────────────────────────────────────────────────────────── print("\n=== Charts ===") fig = plt.figure(figsize=(18, 22)) gs = gridspec.GridSpec(4, 2, hspace=0.45, wspace=0.35) BR, NV, GR, RD = '#6B4226', '#1B3A6B', '#2D8653', '#C0392B' # [1] History + fit + forecast ax1 = fig.add_subplot(gs[0, :]) fit = model.predict(X) ax1.fill_between(feat_df.index, 0, df.loc[feat_df.index, TARGET], alpha=0.1, color=BR) ax1.plot(feat_df.index, df.loc[feat_df.index, TARGET], color=BR, lw=1.8, label='Historical Stumpage Price', zorder=3) ax1.plot(feat_df.index, fit, '--', color=NV, lw=1.2, alpha=0.7, label=f'Model Fit (R²={r2_score(y, fit):.2f})', zorder=2) ax1.fill_between(fc_dates, fc_ci10, fc_ci90, alpha=0.18, color=NV, label='10–90% Forecast') ax1.fill_between(fc_dates, fc_ci25, fc_ci75, alpha=0.32, color=NV, label='25–75% Forecast') ax1.plot(fc_dates, fc_point, 'o-', color=NV, lw=2.2, ms=6, label='Forecast (median)', zorder=4) ax1.axvline(df.index[-1], color='gray', ls=':', lw=1.5, label='Today') ax1.set_title('Oregon Douglas Fir Stumpage Price: Historical + 8-Quarter Forecast', fontsize=13, fontweight='bold') ax1.set_ylabel('Price ($/MBF)', fontsize=11) ax1.legend(fontsize=9, loc='upper left', ncol=3); ax1.grid(True, alpha=0.3) ax1.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}')) # [2] Walk-forward CV ax2 = fig.add_subplot(gs[1, 0]) wf_labels = [f"{feat_df.index[w[0]].strftime('%Y')}" for w in windows] wf_r2 = [] for tr_e, te_e in windows: mp = Pipeline([('sc', StandardScaler()), ('ridge', Ridge(alpha=5.0))]) mp.fit(X[:tr_e], y[:tr_e]) wf_r2.append(r2_score(y[tr_e:te_e], mp.predict(X[tr_e:te_e]))) clrs = [GR if r > 0 else RD for r in wf_r2] bars2 = ax2.bar(range(len(windows)), wf_r2, color=clrs, edgecolor='black', lw=0.5) for b, r in zip(bars2, wf_r2): ax2.text(b.get_x()+b.get_width()/2, r+0.03 if r>=0 else r-0.08, f'{r:.2f}', ha='center', fontsize=9, fontweight='bold') ax2.axhline(ovr_r2, color=NV, ls='--', lw=1.5, label=f'Overall R²={ovr_r2:.2f}') ax2.axhline(0, color='black', lw=0.8) ax2.set_xticks(range(len(windows))); ax2.set_xticklabels(wf_labels, fontsize=9) ax2.set_title('Walk-Forward Validation R² (2015–2024)', fontsize=12) ax2.set_ylabel('R²'); ax2.legend(fontsize=9); ax2.grid(True, alpha=0.3, axis='y') # [3] Feature importance ax3 = fig.add_subplot(gs[1, 1]) coef = np.abs(model.named_steps['ridge'].coef_) fi = pd.DataFrame({'feature': X_cols, 'importance': coef}).sort_values('importance', ascending=True).tail(12) ax3.barh(range(len(fi)), fi['importance'], color=plt.cm.RdYlGn(fi['importance']/fi['importance'].max()), edgecolor='black', lw=0.3) ax3.set_yticks(range(len(fi))); ax3.set_yticklabels([f.replace('_',' ') for f in fi['feature']], fontsize=9) ax3.set_title('Feature Importance (|coef|)\nTop 12 Price Drivers', fontsize=12); ax3.grid(True, alpha=0.3, axis='x') # [4] Price distribution ax4 = fig.add_subplot(gs[2, 0]) ax4.hist(df[TARGET], bins=25, color=BR, alpha=0.6, edgecolor='black', lw=0.3, density=True) ax4.axvline(cp, color=NV, lw=2.5, label=f'Current: ${cp:.0f}/MBF ({pct*100:.0f}th)') ax4.axvline(np.percentile(df[TARGET], 75), color=GR, ls='--', lw=1.5, label=f'75th: ${np.percentile(df[TARGET], 75):.0f}') ax4.axvline(np.percentile(df[TARGET], 25), color=RD, ls='--', lw=1.5, label=f'25th: ${np.percentile(df[TARGET], 25):.0f}') ax4.set_title('Price Distribution (1993–2024)', fontsize=12); ax4.legend(fontsize=8); ax4.grid(True, alpha=0.3) ax4.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}')) # [5] MC timing ax5 = fig.add_subplot(gs[2, 1]) bar_clrs5 = [GR if p==max(timing_pct) else NV for p in timing_pct] bars5 = ax5.bar(range(9), timing_pct, color=bar_clrs5, edgecolor='black', lw=0.5) for b, tp in zip(bars5, timing_pct): ax5.text(b.get_x()+b.get_width()/2, b.get_height()+0.3, f'{tp:.0f}%', ha='center', fontsize=8) ax5.set_xticks(range(9)); ax5.set_xticklabels([qlabels[i].replace(' ','\n') for i in range(9)], fontsize=7) ax5.set_title(f'Optimal Sell Timing ({n_sims:,} MC paths)', fontsize=12); ax5.grid(True, alpha=0.3, axis='y') # [6] Revenue sensitivity ax6 = fig.add_subplot(gs[3, :]) pr = np.linspace(100, 900, 300) ax6.plot(pr, pr*TOTAL_MBF/1000, color=GR, lw=2, label='Sell now') ax6.plot(pr, (pr*TOTAL_MBF - HOLD_COST_Q*4)/1000, '--', color='orange', lw=1.8, label='After 1 yr hold') ax6.plot(pr, (pr*TOTAL_MBF - HOLD_COST_Q*8)/1000, '--', color=RD, lw=1.8, label='After 2 yr hold') ax6.fill_between(pr, (pr*TOTAL_MBF - HOLD_COST_Q*4)/1000, pr*TOTAL_MBF/1000, alpha=0.12, color=RD) ax6.axvline(cp, color=NV, lw=2.5, ls=':', label=f'Current: ${cp:.0f}/MBF → ${cp*TOTAL_MBF/1000:.0f}K') ax6.axvspan(np.min(fc_ci10), np.max(fc_ci90), alpha=0.07, color=NV, label=f'2025–2026 forecast range') ax6.text(0.74, 0.92, f"RECOMMENDATION\n{rec}\n{urg} (score {score}/7)", transform=ax6.transAxes, fontsize=12, fontweight='bold', ha='center', va='top', bbox=dict(boxstyle='round,pad=0.6', facecolor=clr, alpha=0.25, edgecolor=clr, lw=2)) ax6.set_title(f'Revenue Sensitivity ({ACRES} ac × {MBF_AC} MBF/ac = {TOTAL_MBF:,} MBF)', fontsize=12) ax6.set_xlabel('$/MBF'); ax6.legend(fontsize=9, loc='upper left', ncol=2); ax6.grid(True, alpha=0.3) ax6.xaxis.set_major_formatter(plt.FuncFormatter(lambda x,p: f'${x:,.0f}')) ax6.yaxis.set_major_formatter(plt.FuncFormatter(lambda x,p: f'${x:,.0f}K')) ax6.set_xlim(100, 900) fig.suptitle('Oregon Timber Market Timing Analysis\n20-Acre Douglas Fir Tract | Ridge Regression + Monte Carlo', fontsize=15, fontweight='bold', y=0.995) plt.savefig(f'{OUT_DIR}/timber_analysis.png', dpi=150, bbox_inches='tight') plt.close() print(" ✓ timber_analysis.png") # ── Save outputs ──────────────────────────────────────────────────────────── fc = pd.DataFrame({ 'quarter': [f"Q{d.quarter} {d.year}" for d in fc_dates], 'date': fc_dates, 'price_median': fc_point.round(0), 'price_ci10': fc_ci10.round(0), 'price_ci25': fc_ci25.round(0), 'price_ci75': fc_ci75.round(0), 'price_ci90': fc_ci90.round(0), 'gross_value_median': (fc_point * TOTAL_MBF).round(0), 'gross_value_ci10': (fc_ci10 * TOTAL_MBF).round(0), 'gross_value_ci90': (fc_ci90 * TOTAL_MBF).round(0), }) fc.to_csv(f'{OUT_DIR}/forecast_results.csv', index=False) wf_r2_all = [] for tr_e, te_e in windows: m2 = Pipeline([('sc', StandardScaler()), ('ridge', Ridge(alpha=5.0))]) m2.fit(X[:tr_e], y[:tr_e]) wf_r2_all.append({'period': f"{feat_df.index[tr_e].strftime('%Y')}-{feat_df.index[min(te_e,n)-1].strftime('%Y')}", 'R2': r2_score(y[tr_e:te_e], m2.predict(X[tr_e:te_e]))}) report = f"""OREGON TIMBER MARKET TIMING REPORT ================================= Date: {pd.Timestamp.now().strftime('%B %d, %Y')} Tract: {ACRES} acres | {TOTAL_MBF:,} MBF MODEL PERFORMANCE (Walk-Forward CV, 2015–2024) Overall R²: {ovr_r2:.3f} | MAE: ${ovr_mae:.0f}/MBF Per-window: {' | '.join(f"{w['period']} R²={w['R2']:+.2f}" for w in wf_r2_all)} MARKET: ${cp:,.0f}/MBF (pct {pct*100:.0f}) | 3yr hi ${hi3:,.0f}, lo ${lo3:,.0f} 8Q FORECAST: {fc[['quarter','price_median','price_ci10','price_ci90']].to_string(index=False)} RECOMMENDATION: {rec} (score {score}/7, {urg}) {chr(10).join(' • '+r for r in reasons)} MC: μ={mu_q:+.3f}/q σ={sig_q:.3f}/q | P(wait better)={prob_wait*100:.0f}% KEY DRIVERS: {fi.sort_values('importance', ascending=False).head(8)[['feature','importance']].to_string(index=False)} PRODUCTION NOTES: - Replace synthetic data with ODF quarterly records (oregon.gov/ODF) - Add FRED API: housing starts (HOUST1F), lumber PPI (WPU0811), mortgage (MORTGAGE30US) - Subscribe to Random Lengths weekly composite (randomlengths.com) - Cruise survey your stand for actual MBF/acre (±30% from default) - Re-run quarterly as new data arrives """ with open(f'{OUT_DIR}/timber_report.txt', 'w') as f: f.write(report) print(f"\n{'='*60}") print(f" RECOMMENDATION: {rec} (score {score}/7, {urg})") print(f"{'='*60}") print(f"\nOutputs: timber_analysis.png, forecast_results.csv, timber_report.txt")