import streamlit as st import pandas as pd import numpy as np import pybaseball as pb import xgboost as xgb # --- KASPER v1.5 GLOBAL MEMORY --- if 'training_buffer' not in globals(): global training_buffer training_buffer = pd.DataFrame() def ingest_to_kasper_memory(df): """Hooks into your main data loader to store 2021-2025 samples.""" global training_buffer # Filter for relevant years and established samples (>150 AB) relevant_data = df[(df['Season'] >= 2021) & (df['AB'] > 150)].copy() # We only care about the delta between Physics and Reality if not relevant_data.empty: training_buffer = pd.concat([training_buffer, relevant_data]).drop_duplicates(subset=['Name', 'Season']) def train_kasper_ensemble(): """Trains the XGBoost Correction Layer using the internal buffer.""" global training_buffer if len(training_buffer) < 200: # Safety floor to prevent training on tiny data return None # Features: Aging, Whiff Profile, and Physics Base # Target: The actual HR count recorded in that season features = ['Age', 'K%', 'BB%', 'avg_bat_speed', 'LaunchAngle'] X = training_buffer[features].fillna(0) y = training_buffer['HR'] # Optimized for 'Residual Error' correction model = xgb.XGBRegressor( n_estimators=150, learning_rate=0.08, max_depth=3, # Shallow depth prevents prospect over-fitting objective='reg:squarederror' ) model.fit(X, y) return model # --- I. UI STYLES (LOCKED) --- def apply_sovereign_styles(): st.markdown(""" """, unsafe_allow_html=True) # --- II. DATA CORE (2021-2025 PROTECTED) --- @st.cache_data(show_spinner=False) def load_master_data(): years = [2021, 2022, 2023, 2024, 2025] p_store, h_store = [], [] # We will use a simple loop here; the status updates will happen in the main block for yr in years: p = pb.pitching_stats(yr, qual=0) if not p.empty: p['Season'] = yr p['Skill_ERA'] = p['SIERA'] if 'SIERA' in p.columns else p['FIP'] * 0.92 + 0.35 p_store.append(p) h = pb.batting_stats(yr, qual=0) if not h.empty: h['Season'] = yr h_store.append(h) pdf, hdf = pd.concat(p_store), pd.concat(h_store) try: savant_bs = pb.statcast_batter_bat_speed(2024) savant_bs['Name'] = savant_bs['first_name'] + ' ' + savant_bs['last_name'] hdf = hdf.merge(savant_bs[['Name', 'avg_bat_speed', 'blast_rate']], on='Name', how='left') except: pass if 'ExitVelocity' in hdf.columns and 'MaxEV' in hdf.columns: hdf['EV90'] = (hdf['ExitVelocity'] * 1.05) + (hdf['MaxEV'] * 0.08) # --- 1. KEY MAPPING (Place in Data Loader) --- # Map various data sources to a unified internal name plus_map = { 'Stuff+': ['Stuff+', 'stuff_plus', 'stf_plus'], 'Loc+': ['Location+', 'location_plus', 'loc_plus'], } for internal_key, aliases in plus_map.items(): for alias in aliases: if alias in pdf.columns: pdf.rename(columns={alias: internal_key}, inplace=True) break # Safety: If 2025 data is missing for a player, default to 100 (League Avg) pdf['Stuff+'] = pdf['Stuff+'].fillna(100.0) pdf['Loc+'] = pdf['Loc+'].fillna(100.0) return pdf, hdf st.set_page_config(page_title="2026 Kasper Projections", layout="wide") apply_sovereign_styles() import time # --- REPAIRED INGESTION + VISIBLE CREDITS --- if 'master' not in st.session_state: with st.status("⚙️ Kasper Engine: Ingesting Yearly Data...", expanded=True) as status: # 1. Permanent Credits Header st.markdown("### 👑 Creator Credits") st.markdown(""" **Twitter:** [@GoatedCardinals](https://x.com/goatedcardinals)\n **Follow Kasper:** [@KasperMLB](https://x.com/KasperMLB)\n **Updates:** Stay Tuned! MLB Futures tool being built.\n **Last Updated:** March 02, 2026 (HF Data Push) """) st.markdown("---") # 2. Progress Elements prog_bar = st.progress(0) status_text = st.empty() # Placeholder for changing status text status_text.write("📡 Handshaking with FanGraphs & Statcast...") time.sleep(0.5) prog_bar.progress(30) status_text.write("📂 Processing 5-Year Skill Weights...") # The heavy lift st.session_state.master = load_master_data() prog_bar.progress(75) status_text.write("🚀 Finalizing Kasper v10.6 Calculations...") time.sleep(0.5) prog_bar.progress(100) status.update(label="✅ Kasper Engine Active.", state="complete", expanded=False) # --- III. SIDEBAR (QUALIFIED SORTING) --- mode = st.sidebar.radio("⚔️ Active View Mode", ["Pitcher", "Hitter"]) # --- UNPACK DATA FROM SESSION STATE --- if 'master' in st.session_state: df_p, df_h = st.session_state.master else: # Fallback to prevent app crash if session hasn't loaded yet st.stop() # --- REPAIRED SORTING & POSITIONAL PURGE --- if mode == "Pitcher": # Now df_p is defined and accessible active_p = df_p[(df_p['Season'] == 2025) & (df_p['IP'] > 0.1)]['Name'].unique() filtered_list = sorted([str(n) for n in active_p if pd.notna(n)]) else: # Now df_h is defined and accessible active_h = df_h[(df_h['Season'] == 2025) & (df_h['AB'] > 0)]['Name'].unique() filtered_list = sorted([str(n) for n in active_h if pd.notna(n)]) # --- SIDEBAR SELECTBOX --- selected_name = st.sidebar.selectbox("Active Profile Search", filtered_list) with st.sidebar.expander("🧪 KASPER TECH MANUAL (v11)", expanded=True): tm_tabs = st.tabs(["🧬 Core", "🎯 Steps", "⚖️ Audit"]) with tm_tabs[0]: # CORE ARCHITECTURE st.markdown("### **The Anchor Philosophy**") st.info("**Hitter Anchor:** 5-Year Exp Decay + Statcast Physicality.") st.info("**Pitcher Anchor:** SIERA (Skill-Interactive ERA).") st.write(""" Kasper uses **SIERA** over FIP because SIERA accounts for ball-in-play complexity and the 'strikeout-walk' interaction. For hitters, we prioritize **Barrel%** and **Bat Speed** over historical HR totals to find 'hidden' breakouts. """) with tm_tabs[1]: # 8-STEP BREAKDOWN step_choice = st.selectbox("Select Engine", ["Hitter v11.0", "Pitcher v10.0"]) if step_choice == "Hitter v11.0": steps = { "1. KNN Vectoring": "Clusters hitters by physical similarity (Exit Velo/Launch Angle).", "2. TVG 2.0 Anchor": "Weights 2025 at 45% to capture current peak performance.", "3. Trajectory Bias": "Maps GB/FB ratios to historic HR/FB efficiency.", "4. Random Forest": "Isolates noise from actual skill growth.", "5. Aggressive Tag": "Corrects for 'Free Swingers' (O-Swing > 38%) who defy traditional discipline models.", "6. Waterfall Lock": "Ensures HR/AVG/OBP remain mathematically correlated.", "7. Quad-Power Curve": "Allows elite outliers (Judge/Ohtani) to break the 50-HR ceiling.", "8. Monte Carlo": "10k simulations to produce the final 'Most Likely' output.", "9. Rookie Translation": "Applies a 10% 'Experience Discount' to prospects with <100 career MLB ABs." } else: steps = { "1. KNN Vectoring": "Clusters pitchers by Stuff+ and velocity consistency.", "2. SIERA Weighting": "Calculates skill-based baseline using 5-year weighted SIERA.", "3. K/BB Interaction": "Projects K/9 and BB/9 as co-dependent variables.", "4. HR/9 Normalization": "Regresses outlier HR/9 seasons toward home-park averages.", "5. Fatigue Scaling": "Adjusts for IP volume (Linear decay of stuff over long seasons).", "6. Waterfall Lock": "Ensures ERA/WHIP/CSW% align with projected skill.", "7. League Context": "Adjusts for current MLB run-scoring environment (1.12x).", "8. Monte Carlo": "10k simulations to establish the Stability Factor (sig)." } for s, desc in steps.items(): st.markdown(f"**{s}**") st.caption(desc) with tm_tabs[2]: # INDUSTRY AUDIT st.markdown("### **Consensus Variance**") st.table({ "System": ["ZiPS", "Steamer", "ATC", "THE BAT X", "OOPSY"], "Diff (v10.6)": ["-12% HR", "-8% HR", "-5% AVG", "+2% EV", "-4% ERA"] }) st.markdown(""" **Why we differ:** Kasper is **12% more aggressive** on elite power than ZiPS/Steamer because we weight physical tools (Bat Speed) over historical regression. """) # --- IV. ENGINES --- def get_weighted_value(data, metric): if metric not in data.columns: return 0.0 weights = {2025: 0.45, 2024: 0.25, 2023: 0.15, 2022: 0.10, 2021: 0.05} values, tw = [], 0 for yr, w in weights.items(): subset = data[data['Season'] == yr] if not subset.empty and not pd.isna(subset[metric].iloc[0]): values.append(subset[metric].iloc[0] * w); tw += w return sum(values) / tw if tw > 0 else data[metric].mean() # LOCKED PITCHER ENGINE v10.0 def run_p_engine_v10(data, target_ip): # UNIVERSAL SQUEEZE: If hist has multiple years, take the most recent one if isinstance(hist, pd.DataFrame): row = hist.iloc[-1] # Takes the bottom-most (latest) row else: row = hist b_siera = get_weighted_value(data, 'Skill_ERA') b_whip, b_k9, b_bb9, b_hr9, b_swstr = [get_weighted_value(data, x) for x in ['WHIP', 'K/9', 'BB/9', 'HR/9', 'SwStr%']] p_sig = 1.0 + ((max(0, target_ip - data['IP'].mean()) ** 1.1) * 0.0015) res = { 'ERA': np.percentile(np.random.normal(b_siera * 0.98, 0.15 * p_sig, 10000), 50), 'WHIP': np.percentile(np.random.normal(b_whip * 0.99, 0.04 * p_sig, 10000), 50), 'K/9': np.percentile(np.random.normal(b_k9, b_k9 * 0.03, 10000), 50), 'SwStr%': np.percentile(np.random.normal(b_swstr, b_swstr * 0.04, 10000), 50), 'BB/9': np.percentile(np.random.normal(b_bb9, 0.20, 10000), 50), 'HR/9': np.percentile(np.random.normal(b_hr9, 0.15, 10000), 50) } res['CSW%'] = res['SwStr%'] + 0.176 # --- PITCHER ENGINE REPAIR: VOLUMETRIC SCALING --- # 1. Define Fatigue Penalty (Scaling ERA/WHIP based on workload) # Baseline is 150 IP. # Over 150 IP: ERA rises slightly due to fatigue. # Under 40 IP: ERA rises slightly due to high-leverage/reliever variance. fatigue_scale = 1.0 + (abs(target_ip - 150) * 0.0005) # 2. Define the Rate-to-Volume Bridge # This ensures ERA/WHIP/K9 actually move when the slider res['K/9'] = res['K/9'] * (1.0 + (1.0 / (target_ip + 1))) # Small sample K/9 boost res['BB/9'] = res['BB/9'] * fatigue_scale res['ERA'] = res['ERA'] * fatigue_scale res['WHIP'] = res['WHIP'] * fatigue_scale res['HR/9'] = res['HR/9'] * (1.0 + (target_ip * 0.0002)) # 3. Calculate Volumetrics (The "Output" Stats) res['SO'] = (res['K/9'] / 9) * target_ip res['BB'] = (res['BB/9'] / 9) * target_ip res['ER'] = (res['ERA'] / 9) * target_ip # --- 2. CALCULATE 2026 PHYSICAL PROXY --- # Normalizing 2026 readings against 2025 league averages v_baseline = 93.8 # 2025 Avg Fastball Velo i_baseline = 15.6 # 2025 Avg Induced Vertical Break e_baseline = 6.4 # 2025 Avg Extension # # --- 1. PHYSICAL PROXY MATH (Creates stuff_proxy) --- v_baseline, i_baseline, e_baseline = 93.8, 15.6, 6.4 # Safe retrieval of physicals velo_keys = ['avg_velo', 'release_speed', 'Velocity', 'FBv'] iv_keys = ['avg_ivb', 'p_v_break', 'induced_vertical_break', 'IVB'] ex_keys = ['extension', 'release_extension', 'avg_extension'] def get_val(r, keys, default=0.0): for k in keys: if k in r: return r[k] return default curr_v = get_val(row, velo_keys, v_baseline) curr_i = get_val(row, iv_keys, i_baseline) curr_e = get_val(row, ex_keys, e_baseline) # Calculate Stuff Proxy (2026 Physics) velo_score = (curr_v / v_baseline) * 100 ivb_score = (curr_i / i_baseline) * 100 ext_score = (curr_e / e_baseline) * 100 stuff_proxy = (velo_score * 0.45) + (ivb_score * 0.35) + (ext_score * 0.20) # --- ENSURE THESE ARE ABOVE THE WATERFALL SECTION --- # Define the search keys k9_keys = ['K/9_Baseline', 'K/9', 'so_per_9', 'K9', 'SO/9'] era_keys = ['ERA_Baseline', 'ERA', 'era'] # Retrieve the full column from 'hist' (not just the last row) k_baseline = hist[next((k for k in k9_keys if k in hist.columns), 'K/9')] era_baseline = hist[next((k for k in era_keys if k in hist.columns), 'ERA')] # --- ROOKIE / MISSING DATA PROTECTION --- # If history is missing, we fill with League Average so the math doesn't fail hist['K/9'] = hist.get('K/9', pd.Series([8.5])).fillna(8.5) hist['ERA'] = hist.get('ERA', pd.Series([4.20])).fillna(4.20) # --- 2. WATERFALL WEIGHTED BASELINES (45% Waterfall) --- # Weight array for [2021, 2022, 2023, 2024, 2025] wf_weights = [0.05, 0.08, 0.15, 0.25, 0.47] # Simple one-liner waterfall logic k_history = np.average(hist['K/9'].tail(len(wf_weights)), weights=wf_weights[-len(hist):]) era_history = np.average(hist['ERA'].tail(len(wf_weights)), weights=wf_weights[-len(hist):]) # --- 3. PITCHING+ & OPTION 2 PROJECTION --- # Hybrid Blend: 70% History (Waterfall) / 30% Physics (Proxy) stuff_25 = row['Stuff+'] final_stuff = round((stuff_25 * 0.70) + (stuff_proxy * 0.30), 1) final_loc = round((row['Loc+'] * 0.90) + (100 * 0.10), 1) final_pitching_plus = round((final_stuff * 0.60) + (final_loc * 0.40), 1) # Define the scalar for math operations f_p_plus_scalar = float(final_pitching_plus.iloc[-1] if hasattr(final_pitching_plus, 'iloc') else final_pitching_plus) # Talent-First Projection (70% Talent / 30% Waterfall History) k9_talent_expectation = 8.5 + ((f_p_plus_scalar - 100) * 0.22) era_talent_expectation = 4.20 - ((f_p_plus_scalar - 100) * 0.06) final_k9_proj = (k9_talent_expectation * 0.70) + (k_history * 0.30) final_era_proj = (era_talent_expectation * 0.70) + (era_history * 0.30) # --- 4. RESULTS MAPPING --- res['Stuff+'] = final_stuff res['Location+'] = final_loc res['Pitching+'] = final_pitching_plus res['K/9_Projected'] = round(max(4.0, final_k9_proj), 2) res['ERA_Projected'] = round(max(1.50, final_era_proj), 2) res['WHIP_Projected'] = round(max(0.80, 1.30 - ((f_p_plus_scalar - 100) * 0.012)), 2) # --- 1. DEFINE SIGNIFICANCE (sig) --- # Logic: 5 years of data = 1.0 (Full confidence). Less than 5 years scales down. # 'hist' is your historical DataFrame for the pitcher. # sig = min(1.0, len(hist) / 5.0) # --- 2. CALCULATE DYNAMIC VARIANCE --- # Logic: We combine (1 - sig) to represent data uncertainty # with the 'workload' variable (your slider input). # This ensures that as you move the slider, the 'Variance' label changes. calc_variance = (1.1 - p_sig) * (float(workload) / 100.0) # --- 3. MAP TO RESULTS --- res['Variance'] = round(calc_variance, 2) res['Sig'] = round(p_sig, 2) return res, p_sig # --- FINAL CORE v2 (SOFT-LOCK MODIFIED v10.5) --- # --- FINAL CORE v2 (REPAIRED v10.6) --- def run_h_engine_v10_8(data, target_ab): # 1. THE NAMING FIX (Solves the '0' Display Bug) # We check multiple possible pybaseball column names def get_stat(col_list): for c in col_list: if c in data.columns: return get_weighted_value(data, c) return 0.0 # 2. VOLUMETRIC SCALING (The 'Slider Sensitivity' Fix) # Variance is 4x higher at 100 AB than at 600 AB sample_size_weight = (600 / (target_ab + 1)) ** 0.5 h_sig = 1.0 + (sample_size_weight * 0.15) # 3. CORE METRIC MAPPING res = { 'AVG': get_stat(['AVG']), 'OBP': get_stat(['OBP']), 'SLG': get_stat(['SLG']), 'SB': get_stat(['SB']), 'SwStr%': get_stat(['SwStr%', 'SwStr', 'swinging_strike_pct']), 'BB%': get_stat(['BB%', 'BB_pct']), 'K%': get_stat(['K%', 'SO_pct']), 'Barrel%': get_stat(['Barrel%', 'barrel_rate']), 'FB%': get_stat(['FB%', 'fly_ball_pct']), 'Pull%': get_stat(['Pull%', 'pull_pct']), 'Z-Swing%': get_stat(['Z-Swing%', 'Z-Swing', 'z_swing_pct']), 'O-Swing%': get_stat(['O-Swing%', 'O-Swing', 'o_swing_pct']), 'Z-Contact%': get_stat(['Z-Contact%', 'Z-Contact', 'z_contact_pct']), 'EV90': get_stat(['EV90', 'ev90', '90th_Percentile_EV', 'avg_best_speed', 'Exit Velocity', 'avg_distance']), 'avg_bat_speed': get_stat(['avg_bat_speed', 'bat_speed', 'Avg_Bat_Speed', 'swing_speed']), 'LaunchAngle': get_stat(['LaunchAngle', 'launch_angle_avg', 'avg_launch_angle', 'LA']), 'GB%': get_stat(['GB%', 'GB', 'ground_ball_pct']), 'SquaredUp%': get_stat(['SquaredUp%', 'squared_up_pct', 'sq_up_rate']), 'Blast%': get_stat(['Blast%', 'blast_rate']), 'blast_pct': 0.0, # Initializing both naming conventions } # THE WHIFF TAX (Reduces HR volume for high-swing-and-miss profiles) # If SwStr% is over 14%, it begins to 'eat into' the Home Run efficiency sw_str_val = res['SwStr%'] / 100 if res['SwStr%'] > 1 else res['SwStr%'] whiff_penalty = np.clip(1.1 - (sw_str_val * 0.8), 0.75, 1.0) # --- REFINED POZO TRIGGER (v10.9) --- o_swing = res['O-Swing%'] / 100 if res['O-Swing%'] > 1 else res['O-Swing%'] k_rate = res['K%'] / 100 if res['K%'] > 1 else res['K%'] sw_str = res['SwStr%'] / 100 if res['SwStr%'] > 1 else res['SwStr%'] # Accuracy Boost: Only trigger if the hitter actually makes contact (SwStr < 12%) is_aggressive_specialist = (o_swing > 0.36 and k_rate < 0.19 and sw_str < 0.12) la_avg = res['LaunchAngle'] # CALCULATE THE LIFT FACTOR # This specifically addresses the 'Double Dipping' concern. # It acts as a 'Efficiency' check on the Exit Velocity. lift_factor = np.clip(1.0 + ((la_avg - 12) * 0.015), 0.85, 1.25) # ROOKIE REGRESSION (The 'Experience' Governor) # Check if data['MLB_AB'] exists; if not, assume rookie and apply 10% discount is_rookie = True if 'MLB_AB' not in data.columns or data['MLB_AB'].iloc[0] < 100 else False rookie_discount = 0.90 if is_rookie else 1.0 # --- CONTACT FLOOR LOGIC --- # If SwStr is elite, we prevent the Monte Carlo from dropping AVG too low contact_floor = 1.05 if sw_str < 0.08 else 1.0 res['AVG'] *= contact_floor # 5. DYNAMIC RATE SCALING (Ensures AVG/SLG move with slider) # Regresses rates by up to 8% based on season fatigue at high volume fatigue_penalty = 1.0 - (max(0, target_ab - 300) * 0.0001) res['AVG'] *= fatigue_penalty res['SLG'] *= fatigue_penalty # 6. VOLUMETRIC OUTPUTS is_aggressive = (o_swing > 0.38 and k_rate < 0.18 and sw_str_val < 0.11) arch_mult = 1.35 if is_aggressive else 1.0 p_factor = 1.0 + (max(0, res['Barrel%'] - 0.11) ** 1.6) * 14.0 # APPLY TO OUTPUTS p_factor = 1.0 + (max(0, res['Barrel%'] - 0.11) ** 1.6) * 14.0 arch_mult = 1.38 if is_aggressive else 1.0 # We apply the lift_factor here to modify the volumetric result hr_fb_rate = np.clip(res['Barrel%'] * 2.2 * p_factor * arch_mult, 0.04, 0.48) res['HR'] = (target_ab * (res['FB%'] * 0.4) * hr_fb_rate * 1.12) * lift_factor * rookie_discount hr_fb_rate = np.clip(res['Barrel%'] * 2.2 * p_factor * arch_mult, 0.04, 0.48) res['HR'] = target_ab * (res['FB%'] * 0.4) * hr_fb_rate * 1.12 # 7. WATERFALL (Forces wRC+ / OPS movement) res['SLG'] *= (1.0 + (max(0, la_avg - 15) * 0.005)) res['OBP'] = res['AVG'] + (res['BB%'] * 0.085 if res['BB%'] > 1 else res['BB%'] * 0.85) res['OPS'] = res['OBP'] + res['SLG'] # wRC+ now scales with the fatigue penalty applied to AVG/SLG res['wRC+'] = (res['OPS'] / 0.730) * 100 res['SB'] *= (target_ab / 550.0) # --- IRONCLAD EV90 REPAIR --- if res['EV90'] == 0: # Try one last check for 'avg_ev' or 'ExitVelocity' backup_ev = get_stat(['avg_ev', 'ev', 'ExitVelocity', 'EV']) if backup_ev > 0: res['EV90'] = backup_ev + 4.2 # Standard deviation for MLB hitters else: # Final Fallback: MLB Average EV90 to prevent algorithm collapse res['EV90'] = 103.1 # --- IRONCLAD BATTED BALL REPAIR --- # --- DYNAMIC BAT SPEED INFERENCE --- if res['avg_bat_speed'] == 0: if res['EV90'] > 0: # Physics reverse-engineer: (EV90 - 18) / 1.2 = approx Bat Speed res['avg_bat_speed'] = (res['EV90'] - 18.0) / 1.15 else: res['avg_bat_speed'] = 71.5 # Hard Floor # --- FORCED BLAST% REPAIR (v11.4) --- # A. Ensure bat speed is at least league average if still 0 current_bs = res['avg_bat_speed'] if res['avg_bat_speed'] > 0 else 71.5 # B. Calculate SquaredUp Proxy (Base: 80% + Barrel Factor) # Most MLB hitters square up ~75-85% of their contact. # sq_up_proxy = (res['SquaredUp%'] if res['SquaredUp%'] > 0 else 0.80) + (res['Barrel%'] * 0.5) # C. Apply MLB Blast Rule: Bat Speed + (SquaredUp * 100) >= 164 eff_score = current_bs + (res['Barrel%'] * 100) # D. FORCE POPULATION: Even if the above fails, we use the Barrel Proxy if res['Blast%'] <= 0: if eff_score >= 164: # Hitter meets the 'Blast' threshold on their average swing res['Blast%'] = round(0.16 + (res['Barrel%'] * 0.8), 3) else: # Hitter only 'Blasts' on their absolute best contact res['Blast%'] = round(max(0.04, res['Barrel%'] * 1.2), 3) # FINAL UI SAFETY: Multiply by 100 if your display expects a whole number (e.g. 15.2 vs 0.152) # If your UI shows "0.1%", change this to res['Blast%'] * 100 # --- DYNAMIC LAUNCH ANGLE INFERENCE --- if res['LaunchAngle'] == 0: fb_val = res['FB%'] if res['FB%'] < 1 else res['FB%'] / 100 # Formula: Each 1% of FB rate over/under 38% moves LA by 0.5 degrees # Based on a 38% FB baseline mapping to a 12.2 degree LA res['LaunchAngle'] = 12.2 + ((fb_val - 0.38) * 50) # Safety Clip: Players don't realistically average below 4 or above 22 degrees res['LaunchAngle'] = np.clip(res['LaunchAngle'], 4.0, 22.0) # --- KASPER v1.5 ENSEMBLE BLENDING --- # 1. Run Kasper v1 Physics (Standard v11.5 Logic) physics_hr = res['HR'] # 2. Get XGBoost Prediction (Pattern Correction) context_model = train_kasper_ensemble() # Or load a pre-trained instance if context_model: # Prepare current player features p_feat = np.array([[res['Age'], res['K%'], res['BB%'], res['avg_bat_speed'], res['LaunchAngle']]]) xgb_correction = context_model.predict(p_feat)[0] # 3. Apply Weighting (The Prospect Protector) # Weight Physics higher for low AB/Younger players if res['Age'] < 24 or res['Career_AB'] < 600: blend_weight = 0.85 # Trust Physics for Prospects else: blend_weight = 0.55 # Trust Historical Pattern for Veterans res['HR'] = (physics_hr * blend_weight) + (xgb_correction * (1 - blend_weight)) # Final Output return res, h_sig # --- V. RENDER --- st.header(f"🚀 {selected_name.upper()} | 2026 {mode.upper()} PROJECTIONS") if mode == "Pitcher": hist = df_p[df_p['Name'] == selected_name].copy() workload = st.slider("Projected IP", 50, 220, 165) stats, sig = run_p_engine_v10(hist, workload) # --- DYNAMIC STABILITY INDICATOR (BUG FIX) --- # Determine Color and Label based on engine variance (sig) if sig <= 1.05: stab_color = "#00ff7f22" # Translucent Green stab_text = "STABLE" stab_border = "#00ff7f" elif sig <= 1.20: stab_color = "#ffd70022" # Translucent Yellow stab_text = "MODERATE" stab_border = "#ffd700" else: stab_color = "#ff4b4b22" # Translucent Red stab_text = "VOLATILE" stab_border = "#ff4b4b" # Calculate Variance display percentage variance_pct = (sig - 1.0) * 100 st.markdown(f"""

PROJECTION {stab_text} | Stability Factor: {sig:.3f}x (+{variance_pct:.1f}% Variance)

""", unsafe_allow_html=True) c1, c2, c3 = st.columns(3); c4, c5, c6 = st.columns(3); c7, c8 = st.columns(2); c9, c10, c11 = st.columns(3) c1.metric("1. Strikeouts", int((stats['K/9']/9)*workload)); c2.metric("2. ERA", f"{stats['ERA']:.2f}"); c3.metric("3. WHIP", f"{stats['WHIP']:.2f}"); c4.metric("4. SwStr%", f"{stats['SwStr%']*100:.2f}%"); c5.metric("5. CSW%", f"{stats['CSW%']*100:.2f}%"); c6.metric("6. K/9", f"{stats['K/9']:.2f}"); c7.metric("7. HR/9", f"{stats['HR/9']:.2f}"); c8.metric("8. BB/9", f"{stats['BB/9']:.2f}"); c9.metric("9. Stuff+", f"{float(stats['Stuff+']):.0f}") c10.metric("10. Location+", f"{float(stats['Location+']):.0f}") c11.metric("11. Pitching+", f"{float(stats['Pitching+']):.0f}") else: hist = df_h[df_h['Name'] == selected_name].copy() workload = st.slider("Projected AB", 100, 700, 550) stats, sig = run_h_engine_v10_8(hist, workload) # --- DYNAMIC STABILITY INDICATOR (BUG FIX) --- # Determine Color and Label based on engine variance (sig) if sig <= 1.05: stab_color = "#00ff7f22" # Translucent Green stab_text = "STABLE" stab_border = "#00ff7f" elif sig <= 1.20: stab_color = "#ffd70022" # Translucent Yellow stab_text = "MODERATE" stab_border = "#ffd700" else: stab_color = "#ff4b4b22" # Translucent Red stab_text = "VOLATILE" stab_border = "#ff4b4b" # Calculate Variance display percentage variance_pct = (sig - 1.0) * 100 st.markdown(f"""

PROJECTION {stab_text} | Stability Factor: {sig:.3f}x (+{variance_pct:.1f}% Variance)

""", unsafe_allow_html=True) st.markdown('

Stats

', unsafe_allow_html=True) c1, c2, c3, c4 = st.columns(4); c5, c6, c7, c8 = st.columns(4) c1.metric("1. Home Runs", f"{stats['HR']:.1f}"); c2.metric("2. wRC+", int(stats['wRC+'])); c3.metric("3. AVG", f"{stats['AVG']:.3f}"); c4.metric("4. OBP", f"{stats['OBP']:.3f}"); c5.metric("5. SLG", f"{stats['SLG']:.3f}"); c6.metric("6. OPS", f"{stats['OPS']:.3f}"); c7.metric("7. EV90", f"{stats['EV90']:.2f}"); c8.metric("8. Stolen Bases", f"{stats['SB']:.1f}") st.markdown('

Plate Discipline Metrics

', unsafe_allow_html=True) d1, d2, d3, d4, d5 = st.columns(5) d1.metric("1. Z-Contact%", f"{stats['Z-Contact%']*100:.2f}%"); d2.metric("2. O-Swing%", f"{stats['O-Swing%']*100:.2f}%"); d3.metric("3. SwStr%", f"{stats['SwStr%']*100:.2f}%"); d4.metric("4. BB%", f"{stats['BB%']*100:.2f}%"); d5.metric("5. K:BB", f"{stats['K%']/stats['BB%']:.2f}" if stats['BB%']>0 else "0.0") st.markdown('

Batted Ball Metrics

', unsafe_allow_html=True) b1, b2, b3 = st.columns(3); b4, b5, b6 = st.columns(3) b1.metric("1. Average Bat Speed", f"{stats['avg_bat_speed']:.1f}"); b2.metric("2. Barrel%", f"{stats['Barrel%']*100:.1f}%"); b3.metric("3. GB%", f"{stats['GB%']*100:.2f}%"); b4.metric("4. FB%", f"{stats['FB%']*100:.2f}%"); b5.metric("5. Pull%", f"{stats['Pull%']*100:.1f}%"); b6.metric("6. Average Launch Angle", f"{stats['LaunchAngle']:.1f}")