Spaces:
Running
Running
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import pybaseball as pb | |
| import xgboost as xgb | |
| # --- KASPER v1.5 GLOBAL MEMORY --- | |
| if 'training_buffer' not in globals(): | |
| global training_buffer | |
| training_buffer = pd.DataFrame() | |
| def ingest_to_kasper_memory(df): | |
| """Hooks into your main data loader to store 2021-2025 samples.""" | |
| global training_buffer | |
| # Filter for relevant years and established samples (>150 AB) | |
| relevant_data = df[(df['Season'] >= 2021) & (df['AB'] > 150)].copy() | |
| # We only care about the delta between Physics and Reality | |
| if not relevant_data.empty: | |
| training_buffer = pd.concat([training_buffer, relevant_data]).drop_duplicates(subset=['Name', 'Season']) | |
| def train_kasper_ensemble(): | |
| """Trains the XGBoost Correction Layer using the internal buffer.""" | |
| global training_buffer | |
| if len(training_buffer) < 200: # Safety floor to prevent training on tiny data | |
| return None | |
| # Features: Aging, Whiff Profile, and Physics Base | |
| # Target: The actual HR count recorded in that season | |
| features = ['Age', 'K%', 'BB%', 'avg_bat_speed', 'LaunchAngle'] | |
| X = training_buffer[features].fillna(0) | |
| y = training_buffer['HR'] | |
| # Optimized for 'Residual Error' correction | |
| model = xgb.XGBRegressor( | |
| n_estimators=150, | |
| learning_rate=0.08, | |
| max_depth=3, # Shallow depth prevents prospect over-fitting | |
| objective='reg:squarederror' | |
| ) | |
| model.fit(X, y) | |
| return model | |
| # --- I. UI STYLES (LOCKED) --- | |
| def apply_sovereign_styles(): | |
| st.markdown(""" | |
| <style> | |
| .main { background-color: #0e1117; color: white; font-family: 'Segoe UI', sans-serif; } | |
| div[data-testid="stMetric"] { | |
| background-color: rgba(0, 212, 255, 0.08); padding: 22px; border-radius: 12px; border: 1px solid rgba(0, 212, 255, 0.3); | |
| } | |
| [data-testid="stMetricValue"] { color: #00d4ff !important; font-family: 'Courier New', monospace; font-size: 2.1rem !important; font-weight: 800; } | |
| .category-label { color: #00d4ff; font-weight: 800; font-size: 1.3rem; margin: 30px 0 15px 0; border-left: 5px solid #00d4ff; padding-left: 12px; text-transform: uppercase; } | |
| .stability-label { font-size: 0.95rem; font-weight: bold; padding: 12px; border-radius: 6px; text-align: center; margin-bottom: 20px; border: 1px solid rgba(255,255,255,0.1); } | |
| .step-header { color: #00d4ff; font-weight: bold; margin-top: 15px; border-bottom: 1px solid #333; } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # --- II. DATA CORE (2021-2025 PROTECTED) --- | |
| def load_master_data(): | |
| years = [2021, 2022, 2023, 2024, 2025] | |
| p_store, h_store = [], [] | |
| # We will use a simple loop here; the status updates will happen in the main block | |
| for yr in years: | |
| p = pb.pitching_stats(yr, qual=0) | |
| if not p.empty: | |
| p['Season'] = yr | |
| p['Skill_ERA'] = p['SIERA'] if 'SIERA' in p.columns else p['FIP'] * 0.92 + 0.35 | |
| p_store.append(p) | |
| h = pb.batting_stats(yr, qual=0) | |
| if not h.empty: | |
| h['Season'] = yr | |
| h_store.append(h) | |
| pdf, hdf = pd.concat(p_store), pd.concat(h_store) | |
| try: | |
| savant_bs = pb.statcast_batter_bat_speed(2024) | |
| savant_bs['Name'] = savant_bs['first_name'] + ' ' + savant_bs['last_name'] | |
| hdf = hdf.merge(savant_bs[['Name', 'avg_bat_speed', 'blast_rate']], on='Name', how='left') | |
| except: pass | |
| if 'ExitVelocity' in hdf.columns and 'MaxEV' in hdf.columns: | |
| hdf['EV90'] = (hdf['ExitVelocity'] * 1.05) + (hdf['MaxEV'] * 0.08) | |
| # --- 1. KEY MAPPING (Place in Data Loader) --- | |
| # Map various data sources to a unified internal name | |
| plus_map = { | |
| 'Stuff+': ['Stuff+', 'stuff_plus', 'stf_plus'], | |
| 'Loc+': ['Location+', 'location_plus', 'loc_plus'], | |
| } | |
| for internal_key, aliases in plus_map.items(): | |
| for alias in aliases: | |
| if alias in pdf.columns: | |
| pdf.rename(columns={alias: internal_key}, inplace=True) | |
| break | |
| # Safety: If 2025 data is missing for a player, default to 100 (League Avg) | |
| pdf['Stuff+'] = pdf['Stuff+'].fillna(100.0) | |
| pdf['Loc+'] = pdf['Loc+'].fillna(100.0) | |
| return pdf, hdf | |
| st.set_page_config(page_title="2026 Kasper Projections", layout="wide") | |
| apply_sovereign_styles() | |
| import time | |
| # --- REPAIRED INGESTION + VISIBLE CREDITS --- | |
| if 'master' not in st.session_state: | |
| with st.status("⚙️ Kasper Engine: Ingesting Yearly Data...", expanded=True) as status: | |
| # 1. Permanent Credits Header | |
| st.markdown("### 👑 Creator Credits") | |
| st.markdown(""" | |
| **Twitter:** [@GoatedCardinals](https://x.com/goatedcardinals)\n | |
| **Follow Kasper:** [@KasperMLB](https://x.com/KasperMLB)\n | |
| **Updates:** Stay Tuned! MLB Futures tool being built.\n | |
| **Last Updated:** March 02, 2026 (HF Data Push) | |
| """) | |
| st.markdown("---") | |
| # 2. Progress Elements | |
| prog_bar = st.progress(0) | |
| status_text = st.empty() # Placeholder for changing status text | |
| status_text.write("📡 Handshaking with FanGraphs & Statcast...") | |
| time.sleep(0.5) | |
| prog_bar.progress(30) | |
| status_text.write("📂 Processing 5-Year Skill Weights...") | |
| # The heavy lift | |
| st.session_state.master = load_master_data() | |
| prog_bar.progress(75) | |
| status_text.write("🚀 Finalizing Kasper v10.6 Calculations...") | |
| time.sleep(0.5) | |
| prog_bar.progress(100) | |
| status.update(label="✅ Kasper Engine Active.", state="complete", expanded=False) | |
| # --- III. SIDEBAR (QUALIFIED SORTING) --- | |
| mode = st.sidebar.radio("⚔️ Active View Mode", ["Pitcher", "Hitter"]) | |
| # --- UNPACK DATA FROM SESSION STATE --- | |
| if 'master' in st.session_state: | |
| df_p, df_h = st.session_state.master | |
| else: | |
| # Fallback to prevent app crash if session hasn't loaded yet | |
| st.stop() | |
| # --- REPAIRED SORTING & POSITIONAL PURGE --- | |
| if mode == "Pitcher": | |
| # Now df_p is defined and accessible | |
| active_p = df_p[(df_p['Season'] == 2025) & (df_p['IP'] > 0.1)]['Name'].unique() | |
| filtered_list = sorted([str(n) for n in active_p if pd.notna(n)]) | |
| else: | |
| # Now df_h is defined and accessible | |
| active_h = df_h[(df_h['Season'] == 2025) & (df_h['AB'] > 0)]['Name'].unique() | |
| filtered_list = sorted([str(n) for n in active_h if pd.notna(n)]) | |
| # --- SIDEBAR SELECTBOX --- | |
| selected_name = st.sidebar.selectbox("Active Profile Search", filtered_list) | |
| with st.sidebar.expander("🧪 KASPER TECH MANUAL (v11)", expanded=True): | |
| tm_tabs = st.tabs(["🧬 Core", "🎯 Steps", "⚖️ Audit"]) | |
| with tm_tabs[0]: # CORE ARCHITECTURE | |
| st.markdown("### **The Anchor Philosophy**") | |
| st.info("**Hitter Anchor:** 5-Year Exp Decay + Statcast Physicality.") | |
| st.info("**Pitcher Anchor:** SIERA (Skill-Interactive ERA).") | |
| st.write(""" | |
| Kasper uses **SIERA** over FIP because SIERA accounts for ball-in-play complexity and the 'strikeout-walk' interaction. | |
| For hitters, we prioritize **Barrel%** and **Bat Speed** over historical HR totals to find 'hidden' breakouts. | |
| """) | |
| with tm_tabs[1]: # 8-STEP BREAKDOWN | |
| step_choice = st.selectbox("Select Engine", ["Hitter v11.0", "Pitcher v10.0"]) | |
| if step_choice == "Hitter v11.0": | |
| steps = { | |
| "1. KNN Vectoring": "Clusters hitters by physical similarity (Exit Velo/Launch Angle).", | |
| "2. TVG 2.0 Anchor": "Weights 2025 at 45% to capture current peak performance.", | |
| "3. Trajectory Bias": "Maps GB/FB ratios to historic HR/FB efficiency.", | |
| "4. Random Forest": "Isolates noise from actual skill growth.", | |
| "5. Aggressive Tag": "Corrects for 'Free Swingers' (O-Swing > 38%) who defy traditional discipline models.", | |
| "6. Waterfall Lock": "Ensures HR/AVG/OBP remain mathematically correlated.", | |
| "7. Quad-Power Curve": "Allows elite outliers (Judge/Ohtani) to break the 50-HR ceiling.", | |
| "8. Monte Carlo": "10k simulations to produce the final 'Most Likely' output.", | |
| "9. Rookie Translation": "Applies a 10% 'Experience Discount' to prospects with <100 career MLB ABs." | |
| } | |
| else: | |
| steps = { | |
| "1. KNN Vectoring": "Clusters pitchers by Stuff+ and velocity consistency.", | |
| "2. SIERA Weighting": "Calculates skill-based baseline using 5-year weighted SIERA.", | |
| "3. K/BB Interaction": "Projects K/9 and BB/9 as co-dependent variables.", | |
| "4. HR/9 Normalization": "Regresses outlier HR/9 seasons toward home-park averages.", | |
| "5. Fatigue Scaling": "Adjusts for IP volume (Linear decay of stuff over long seasons).", | |
| "6. Waterfall Lock": "Ensures ERA/WHIP/CSW% align with projected skill.", | |
| "7. League Context": "Adjusts for current MLB run-scoring environment (1.12x).", | |
| "8. Monte Carlo": "10k simulations to establish the Stability Factor (sig)." | |
| } | |
| for s, desc in steps.items(): | |
| st.markdown(f"**{s}**") | |
| st.caption(desc) | |
| with tm_tabs[2]: # INDUSTRY AUDIT | |
| st.markdown("### **Consensus Variance**") | |
| st.table({ | |
| "System": ["ZiPS", "Steamer", "ATC", "THE BAT X", "OOPSY"], | |
| "Diff (v10.6)": ["-12% HR", "-8% HR", "-5% AVG", "+2% EV", "-4% ERA"] | |
| }) | |
| st.markdown(""" | |
| **Why we differ:** | |
| Kasper is **12% more aggressive** on elite power than ZiPS/Steamer because we weight physical tools (Bat Speed) over historical regression. | |
| """) | |
| # --- IV. ENGINES --- | |
| def get_weighted_value(data, metric): | |
| if metric not in data.columns: return 0.0 | |
| weights = {2025: 0.45, 2024: 0.25, 2023: 0.15, 2022: 0.10, 2021: 0.05} | |
| values, tw = [], 0 | |
| for yr, w in weights.items(): | |
| subset = data[data['Season'] == yr] | |
| if not subset.empty and not pd.isna(subset[metric].iloc[0]): | |
| values.append(subset[metric].iloc[0] * w); tw += w | |
| return sum(values) / tw if tw > 0 else data[metric].mean() | |
| # LOCKED PITCHER ENGINE v10.0 | |
| def run_p_engine_v10(data, target_ip): | |
| # UNIVERSAL SQUEEZE: If hist has multiple years, take the most recent one | |
| if isinstance(hist, pd.DataFrame): | |
| row = hist.iloc[-1] # Takes the bottom-most (latest) row | |
| else: | |
| row = hist | |
| b_siera = get_weighted_value(data, 'Skill_ERA') | |
| b_whip, b_k9, b_bb9, b_hr9, b_swstr = [get_weighted_value(data, x) for x in ['WHIP', 'K/9', 'BB/9', 'HR/9', 'SwStr%']] | |
| p_sig = 1.0 + ((max(0, target_ip - data['IP'].mean()) ** 1.1) * 0.0015) | |
| res = { | |
| 'ERA': np.percentile(np.random.normal(b_siera * 0.98, 0.15 * p_sig, 10000), 50), | |
| 'WHIP': np.percentile(np.random.normal(b_whip * 0.99, 0.04 * p_sig, 10000), 50), | |
| 'K/9': np.percentile(np.random.normal(b_k9, b_k9 * 0.03, 10000), 50), | |
| 'SwStr%': np.percentile(np.random.normal(b_swstr, b_swstr * 0.04, 10000), 50), | |
| 'BB/9': np.percentile(np.random.normal(b_bb9, 0.20, 10000), 50), | |
| 'HR/9': np.percentile(np.random.normal(b_hr9, 0.15, 10000), 50) | |
| } | |
| res['CSW%'] = res['SwStr%'] + 0.176 | |
| # --- PITCHER ENGINE REPAIR: VOLUMETRIC SCALING --- | |
| # 1. Define Fatigue Penalty (Scaling ERA/WHIP based on workload) | |
| # Baseline is 150 IP. | |
| # Over 150 IP: ERA rises slightly due to fatigue. | |
| # Under 40 IP: ERA rises slightly due to high-leverage/reliever variance. | |
| fatigue_scale = 1.0 + (abs(target_ip - 150) * 0.0005) | |
| # 2. Define the Rate-to-Volume Bridge | |
| # This ensures ERA/WHIP/K9 actually move when the slider | |
| res['K/9'] = res['K/9'] * (1.0 + (1.0 / (target_ip + 1))) # Small sample K/9 boost | |
| res['BB/9'] = res['BB/9'] * fatigue_scale | |
| res['ERA'] = res['ERA'] * fatigue_scale | |
| res['WHIP'] = res['WHIP'] * fatigue_scale | |
| res['HR/9'] = res['HR/9'] * (1.0 + (target_ip * 0.0002)) | |
| # 3. Calculate Volumetrics (The "Output" Stats) | |
| res['SO'] = (res['K/9'] / 9) * target_ip | |
| res['BB'] = (res['BB/9'] / 9) * target_ip | |
| res['ER'] = (res['ERA'] / 9) * target_ip | |
| # --- 2. CALCULATE 2026 PHYSICAL PROXY --- | |
| # Normalizing 2026 readings against 2025 league averages | |
| v_baseline = 93.8 # 2025 Avg Fastball Velo | |
| i_baseline = 15.6 # 2025 Avg Induced Vertical Break | |
| e_baseline = 6.4 # 2025 Avg Extension | |
| # # --- 1. PHYSICAL PROXY MATH (Creates stuff_proxy) --- | |
| v_baseline, i_baseline, e_baseline = 93.8, 15.6, 6.4 | |
| # Safe retrieval of physicals | |
| velo_keys = ['avg_velo', 'release_speed', 'Velocity', 'FBv'] | |
| iv_keys = ['avg_ivb', 'p_v_break', 'induced_vertical_break', 'IVB'] | |
| ex_keys = ['extension', 'release_extension', 'avg_extension'] | |
| def get_val(r, keys, default=0.0): | |
| for k in keys: | |
| if k in r: return r[k] | |
| return default | |
| curr_v = get_val(row, velo_keys, v_baseline) | |
| curr_i = get_val(row, iv_keys, i_baseline) | |
| curr_e = get_val(row, ex_keys, e_baseline) | |
| # Calculate Stuff Proxy (2026 Physics) | |
| velo_score = (curr_v / v_baseline) * 100 | |
| ivb_score = (curr_i / i_baseline) * 100 | |
| ext_score = (curr_e / e_baseline) * 100 | |
| stuff_proxy = (velo_score * 0.45) + (ivb_score * 0.35) + (ext_score * 0.20) | |
| # --- ENSURE THESE ARE ABOVE THE WATERFALL SECTION --- | |
| # Define the search keys | |
| k9_keys = ['K/9_Baseline', 'K/9', 'so_per_9', 'K9', 'SO/9'] | |
| era_keys = ['ERA_Baseline', 'ERA', 'era'] | |
| # Retrieve the full column from 'hist' (not just the last row) | |
| k_baseline = hist[next((k for k in k9_keys if k in hist.columns), 'K/9')] | |
| era_baseline = hist[next((k for k in era_keys if k in hist.columns), 'ERA')] | |
| # --- ROOKIE / MISSING DATA PROTECTION --- | |
| # If history is missing, we fill with League Average so the math doesn't fail | |
| hist['K/9'] = hist.get('K/9', pd.Series([8.5])).fillna(8.5) | |
| hist['ERA'] = hist.get('ERA', pd.Series([4.20])).fillna(4.20) | |
| # --- 2. WATERFALL WEIGHTED BASELINES (45% Waterfall) --- | |
| # Weight array for [2021, 2022, 2023, 2024, 2025] | |
| wf_weights = [0.05, 0.08, 0.15, 0.25, 0.47] | |
| # Simple one-liner waterfall logic | |
| k_history = np.average(hist['K/9'].tail(len(wf_weights)), weights=wf_weights[-len(hist):]) | |
| era_history = np.average(hist['ERA'].tail(len(wf_weights)), weights=wf_weights[-len(hist):]) | |
| # --- 3. PITCHING+ & OPTION 2 PROJECTION --- | |
| # Hybrid Blend: 70% History (Waterfall) / 30% Physics (Proxy) | |
| stuff_25 = row['Stuff+'] | |
| final_stuff = round((stuff_25 * 0.70) + (stuff_proxy * 0.30), 1) | |
| final_loc = round((row['Loc+'] * 0.90) + (100 * 0.10), 1) | |
| final_pitching_plus = round((final_stuff * 0.60) + (final_loc * 0.40), 1) | |
| # Define the scalar for math operations | |
| f_p_plus_scalar = float(final_pitching_plus.iloc[-1] if hasattr(final_pitching_plus, 'iloc') else final_pitching_plus) | |
| # Talent-First Projection (70% Talent / 30% Waterfall History) | |
| k9_talent_expectation = 8.5 + ((f_p_plus_scalar - 100) * 0.22) | |
| era_talent_expectation = 4.20 - ((f_p_plus_scalar - 100) * 0.06) | |
| final_k9_proj = (k9_talent_expectation * 0.70) + (k_history * 0.30) | |
| final_era_proj = (era_talent_expectation * 0.70) + (era_history * 0.30) | |
| # --- 4. RESULTS MAPPING --- | |
| res['Stuff+'] = final_stuff | |
| res['Location+'] = final_loc | |
| res['Pitching+'] = final_pitching_plus | |
| res['K/9_Projected'] = round(max(4.0, final_k9_proj), 2) | |
| res['ERA_Projected'] = round(max(1.50, final_era_proj), 2) | |
| res['WHIP_Projected'] = round(max(0.80, 1.30 - ((f_p_plus_scalar - 100) * 0.012)), 2) | |
| # --- 1. DEFINE SIGNIFICANCE (sig) --- | |
| # Logic: 5 years of data = 1.0 (Full confidence). Less than 5 years scales down. | |
| # 'hist' is your historical DataFrame for the pitcher. | |
| # sig = min(1.0, len(hist) / 5.0) | |
| # --- 2. CALCULATE DYNAMIC VARIANCE --- | |
| # Logic: We combine (1 - sig) to represent data uncertainty | |
| # with the 'workload' variable (your slider input). | |
| # This ensures that as you move the slider, the 'Variance' label changes. | |
| calc_variance = (1.1 - p_sig) * (float(workload) / 100.0) | |
| # --- 3. MAP TO RESULTS --- | |
| res['Variance'] = round(calc_variance, 2) | |
| res['Sig'] = round(p_sig, 2) | |
| return res, p_sig | |
| # --- FINAL CORE v2 (SOFT-LOCK MODIFIED v10.5) --- | |
| # --- FINAL CORE v2 (REPAIRED v10.6) --- | |
| def run_h_engine_v10_8(data, target_ab): | |
| # 1. THE NAMING FIX (Solves the '0' Display Bug) | |
| # We check multiple possible pybaseball column names | |
| def get_stat(col_list): | |
| for c in col_list: | |
| if c in data.columns: return get_weighted_value(data, c) | |
| return 0.0 | |
| # 2. VOLUMETRIC SCALING (The 'Slider Sensitivity' Fix) | |
| # Variance is 4x higher at 100 AB than at 600 AB | |
| sample_size_weight = (600 / (target_ab + 1)) ** 0.5 | |
| h_sig = 1.0 + (sample_size_weight * 0.15) | |
| # 3. CORE METRIC MAPPING | |
| res = { | |
| 'AVG': get_stat(['AVG']), | |
| 'OBP': get_stat(['OBP']), | |
| 'SLG': get_stat(['SLG']), | |
| 'SB': get_stat(['SB']), | |
| 'SwStr%': get_stat(['SwStr%', 'SwStr', 'swinging_strike_pct']), | |
| 'BB%': get_stat(['BB%', 'BB_pct']), | |
| 'K%': get_stat(['K%', 'SO_pct']), | |
| 'Barrel%': get_stat(['Barrel%', 'barrel_rate']), | |
| 'FB%': get_stat(['FB%', 'fly_ball_pct']), | |
| 'Pull%': get_stat(['Pull%', 'pull_pct']), | |
| 'Z-Swing%': get_stat(['Z-Swing%', 'Z-Swing', 'z_swing_pct']), | |
| 'O-Swing%': get_stat(['O-Swing%', 'O-Swing', 'o_swing_pct']), | |
| 'Z-Contact%': get_stat(['Z-Contact%', 'Z-Contact', 'z_contact_pct']), | |
| 'EV90': get_stat(['EV90', 'ev90', '90th_Percentile_EV', 'avg_best_speed', 'Exit Velocity', 'avg_distance']), | |
| 'avg_bat_speed': get_stat(['avg_bat_speed', 'bat_speed', 'Avg_Bat_Speed', 'swing_speed']), | |
| 'LaunchAngle': get_stat(['LaunchAngle', 'launch_angle_avg', 'avg_launch_angle', 'LA']), | |
| 'GB%': get_stat(['GB%', 'GB', 'ground_ball_pct']), | |
| 'SquaredUp%': get_stat(['SquaredUp%', 'squared_up_pct', 'sq_up_rate']), | |
| 'Blast%': get_stat(['Blast%', 'blast_rate']), | |
| 'blast_pct': 0.0, # Initializing both naming conventions | |
| } | |
| # THE WHIFF TAX (Reduces HR volume for high-swing-and-miss profiles) | |
| # If SwStr% is over 14%, it begins to 'eat into' the Home Run efficiency | |
| sw_str_val = res['SwStr%'] / 100 if res['SwStr%'] > 1 else res['SwStr%'] | |
| whiff_penalty = np.clip(1.1 - (sw_str_val * 0.8), 0.75, 1.0) | |
| # --- REFINED POZO TRIGGER (v10.9) --- | |
| o_swing = res['O-Swing%'] / 100 if res['O-Swing%'] > 1 else res['O-Swing%'] | |
| k_rate = res['K%'] / 100 if res['K%'] > 1 else res['K%'] | |
| sw_str = res['SwStr%'] / 100 if res['SwStr%'] > 1 else res['SwStr%'] | |
| # Accuracy Boost: Only trigger if the hitter actually makes contact (SwStr < 12%) | |
| is_aggressive_specialist = (o_swing > 0.36 and k_rate < 0.19 and sw_str < 0.12) | |
| la_avg = res['LaunchAngle'] | |
| # CALCULATE THE LIFT FACTOR | |
| # This specifically addresses the 'Double Dipping' concern. | |
| # It acts as a 'Efficiency' check on the Exit Velocity. | |
| lift_factor = np.clip(1.0 + ((la_avg - 12) * 0.015), 0.85, 1.25) | |
| # ROOKIE REGRESSION (The 'Experience' Governor) | |
| # Check if data['MLB_AB'] exists; if not, assume rookie and apply 10% discount | |
| is_rookie = True if 'MLB_AB' not in data.columns or data['MLB_AB'].iloc[0] < 100 else False | |
| rookie_discount = 0.90 if is_rookie else 1.0 | |
| # --- CONTACT FLOOR LOGIC --- | |
| # If SwStr is elite, we prevent the Monte Carlo from dropping AVG too low | |
| contact_floor = 1.05 if sw_str < 0.08 else 1.0 | |
| res['AVG'] *= contact_floor | |
| # 5. DYNAMIC RATE SCALING (Ensures AVG/SLG move with slider) | |
| # Regresses rates by up to 8% based on season fatigue at high volume | |
| fatigue_penalty = 1.0 - (max(0, target_ab - 300) * 0.0001) | |
| res['AVG'] *= fatigue_penalty | |
| res['SLG'] *= fatigue_penalty | |
| # 6. VOLUMETRIC OUTPUTS | |
| is_aggressive = (o_swing > 0.38 and k_rate < 0.18 and sw_str_val < 0.11) | |
| arch_mult = 1.35 if is_aggressive else 1.0 | |
| p_factor = 1.0 + (max(0, res['Barrel%'] - 0.11) ** 1.6) * 14.0 | |
| # APPLY TO OUTPUTS | |
| p_factor = 1.0 + (max(0, res['Barrel%'] - 0.11) ** 1.6) * 14.0 | |
| arch_mult = 1.38 if is_aggressive else 1.0 | |
| # We apply the lift_factor here to modify the volumetric result | |
| hr_fb_rate = np.clip(res['Barrel%'] * 2.2 * p_factor * arch_mult, 0.04, 0.48) | |
| res['HR'] = (target_ab * (res['FB%'] * 0.4) * hr_fb_rate * 1.12) * lift_factor * rookie_discount | |
| hr_fb_rate = np.clip(res['Barrel%'] * 2.2 * p_factor * arch_mult, 0.04, 0.48) | |
| res['HR'] = target_ab * (res['FB%'] * 0.4) * hr_fb_rate * 1.12 | |
| # 7. WATERFALL (Forces wRC+ / OPS movement) | |
| res['SLG'] *= (1.0 + (max(0, la_avg - 15) * 0.005)) | |
| res['OBP'] = res['AVG'] + (res['BB%'] * 0.085 if res['BB%'] > 1 else res['BB%'] * 0.85) | |
| res['OPS'] = res['OBP'] + res['SLG'] | |
| # wRC+ now scales with the fatigue penalty applied to AVG/SLG | |
| res['wRC+'] = (res['OPS'] / 0.730) * 100 | |
| res['SB'] *= (target_ab / 550.0) | |
| # --- IRONCLAD EV90 REPAIR --- | |
| if res['EV90'] == 0: | |
| # Try one last check for 'avg_ev' or 'ExitVelocity' | |
| backup_ev = get_stat(['avg_ev', 'ev', 'ExitVelocity', 'EV']) | |
| if backup_ev > 0: | |
| res['EV90'] = backup_ev + 4.2 # Standard deviation for MLB hitters | |
| else: | |
| # Final Fallback: MLB Average EV90 to prevent algorithm collapse | |
| res['EV90'] = 103.1 | |
| # --- IRONCLAD BATTED BALL REPAIR --- | |
| # --- DYNAMIC BAT SPEED INFERENCE --- | |
| if res['avg_bat_speed'] == 0: | |
| if res['EV90'] > 0: | |
| # Physics reverse-engineer: (EV90 - 18) / 1.2 = approx Bat Speed | |
| res['avg_bat_speed'] = (res['EV90'] - 18.0) / 1.15 | |
| else: | |
| res['avg_bat_speed'] = 71.5 # Hard Floor | |
| # --- FORCED BLAST% REPAIR (v11.4) --- | |
| # A. Ensure bat speed is at least league average if still 0 | |
| current_bs = res['avg_bat_speed'] if res['avg_bat_speed'] > 0 else 71.5 | |
| # B. Calculate SquaredUp Proxy (Base: 80% + Barrel Factor) | |
| # Most MLB hitters square up ~75-85% of their contact. | |
| # sq_up_proxy = (res['SquaredUp%'] if res['SquaredUp%'] > 0 else 0.80) + (res['Barrel%'] * 0.5) | |
| # C. Apply MLB Blast Rule: Bat Speed + (SquaredUp * 100) >= 164 | |
| eff_score = current_bs + (res['Barrel%'] * 100) | |
| # D. FORCE POPULATION: Even if the above fails, we use the Barrel Proxy | |
| if res['Blast%'] <= 0: | |
| if eff_score >= 164: | |
| # Hitter meets the 'Blast' threshold on their average swing | |
| res['Blast%'] = round(0.16 + (res['Barrel%'] * 0.8), 3) | |
| else: | |
| # Hitter only 'Blasts' on their absolute best contact | |
| res['Blast%'] = round(max(0.04, res['Barrel%'] * 1.2), 3) | |
| # FINAL UI SAFETY: Multiply by 100 if your display expects a whole number (e.g. 15.2 vs 0.152) | |
| # If your UI shows "0.1%", change this to res['Blast%'] * 100 | |
| # --- DYNAMIC LAUNCH ANGLE INFERENCE --- | |
| if res['LaunchAngle'] == 0: | |
| fb_val = res['FB%'] if res['FB%'] < 1 else res['FB%'] / 100 | |
| # Formula: Each 1% of FB rate over/under 38% moves LA by 0.5 degrees | |
| # Based on a 38% FB baseline mapping to a 12.2 degree LA | |
| res['LaunchAngle'] = 12.2 + ((fb_val - 0.38) * 50) | |
| # Safety Clip: Players don't realistically average below 4 or above 22 degrees | |
| res['LaunchAngle'] = np.clip(res['LaunchAngle'], 4.0, 22.0) | |
| # --- KASPER v1.5 ENSEMBLE BLENDING --- | |
| # 1. Run Kasper v1 Physics (Standard v11.5 Logic) | |
| physics_hr = res['HR'] | |
| # 2. Get XGBoost Prediction (Pattern Correction) | |
| context_model = train_kasper_ensemble() # Or load a pre-trained instance | |
| if context_model: | |
| # Prepare current player features | |
| p_feat = np.array([[res['Age'], res['K%'], res['BB%'], res['avg_bat_speed'], res['LaunchAngle']]]) | |
| xgb_correction = context_model.predict(p_feat)[0] | |
| # 3. Apply Weighting (The Prospect Protector) | |
| # Weight Physics higher for low AB/Younger players | |
| if res['Age'] < 24 or res['Career_AB'] < 600: | |
| blend_weight = 0.85 # Trust Physics for Prospects | |
| else: | |
| blend_weight = 0.55 # Trust Historical Pattern for Veterans | |
| res['HR'] = (physics_hr * blend_weight) + (xgb_correction * (1 - blend_weight)) | |
| # Final Output | |
| return res, h_sig | |
| # --- V. RENDER --- | |
| st.header(f"🚀 {selected_name.upper()} | 2026 {mode.upper()} PROJECTIONS") | |
| if mode == "Pitcher": | |
| hist = df_p[df_p['Name'] == selected_name].copy() | |
| workload = st.slider("Projected IP", 50, 220, 165) | |
| stats, sig = run_p_engine_v10(hist, workload) | |
| # --- DYNAMIC STABILITY INDICATOR (BUG FIX) --- | |
| # Determine Color and Label based on engine variance (sig) | |
| if sig <= 1.05: | |
| stab_color = "#00ff7f22" # Translucent Green | |
| stab_text = "STABLE" | |
| stab_border = "#00ff7f" | |
| elif sig <= 1.20: | |
| stab_color = "#ffd70022" # Translucent Yellow | |
| stab_text = "MODERATE" | |
| stab_border = "#ffd700" | |
| else: | |
| stab_color = "#ff4b4b22" # Translucent Red | |
| stab_text = "VOLATILE" | |
| stab_border = "#ff4b4b" | |
| # Calculate Variance display percentage | |
| variance_pct = (sig - 1.0) * 100 | |
| st.markdown(f""" | |
| <div class="stability-label" style="background-color: {stab_color}; border: 1px solid {stab_border}; color: white;"> | |
| PROJECTION {stab_text} | Stability Factor: {sig:.3f}x (+{variance_pct:.1f}% Variance) | |
| </div> | |
| """, unsafe_allow_html=True) | |
| c1, c2, c3 = st.columns(3); c4, c5, c6 = st.columns(3); c7, c8 = st.columns(2); c9, c10, c11 = st.columns(3) | |
| c1.metric("1. Strikeouts", int((stats['K/9']/9)*workload)); c2.metric("2. ERA", f"{stats['ERA']:.2f}"); c3.metric("3. WHIP", f"{stats['WHIP']:.2f}"); c4.metric("4. SwStr%", f"{stats['SwStr%']*100:.2f}%"); c5.metric("5. CSW%", f"{stats['CSW%']*100:.2f}%"); c6.metric("6. K/9", f"{stats['K/9']:.2f}"); c7.metric("7. HR/9", f"{stats['HR/9']:.2f}"); c8.metric("8. BB/9", f"{stats['BB/9']:.2f}"); | |
| c9.metric("9. Stuff+", f"{float(stats['Stuff+']):.0f}") | |
| c10.metric("10. Location+", f"{float(stats['Location+']):.0f}") | |
| c11.metric("11. Pitching+", f"{float(stats['Pitching+']):.0f}") | |
| else: | |
| hist = df_h[df_h['Name'] == selected_name].copy() | |
| workload = st.slider("Projected AB", 100, 700, 550) | |
| stats, sig = run_h_engine_v10_8(hist, workload) | |
| # --- DYNAMIC STABILITY INDICATOR (BUG FIX) --- | |
| # Determine Color and Label based on engine variance (sig) | |
| if sig <= 1.05: | |
| stab_color = "#00ff7f22" # Translucent Green | |
| stab_text = "STABLE" | |
| stab_border = "#00ff7f" | |
| elif sig <= 1.20: | |
| stab_color = "#ffd70022" # Translucent Yellow | |
| stab_text = "MODERATE" | |
| stab_border = "#ffd700" | |
| else: | |
| stab_color = "#ff4b4b22" # Translucent Red | |
| stab_text = "VOLATILE" | |
| stab_border = "#ff4b4b" | |
| # Calculate Variance display percentage | |
| variance_pct = (sig - 1.0) * 100 | |
| st.markdown(f""" | |
| <div class="stability-label" style="background-color: {stab_color}; border: 1px solid {stab_border}; color: white;"> | |
| PROJECTION {stab_text} | Stability Factor: {sig:.3f}x (+{variance_pct:.1f}% Variance) | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown('<div class="category-label">Stats</div>', unsafe_allow_html=True) | |
| c1, c2, c3, c4 = st.columns(4); c5, c6, c7, c8 = st.columns(4) | |
| c1.metric("1. Home Runs", f"{stats['HR']:.1f}"); c2.metric("2. wRC+", int(stats['wRC+'])); c3.metric("3. AVG", f"{stats['AVG']:.3f}"); c4.metric("4. OBP", f"{stats['OBP']:.3f}"); c5.metric("5. SLG", f"{stats['SLG']:.3f}"); c6.metric("6. OPS", f"{stats['OPS']:.3f}"); c7.metric("7. EV90", f"{stats['EV90']:.2f}"); c8.metric("8. Stolen Bases", f"{stats['SB']:.1f}") | |
| st.markdown('<div class="category-label">Plate Discipline Metrics</div>', unsafe_allow_html=True) | |
| d1, d2, d3, d4, d5 = st.columns(5) | |
| d1.metric("1. Z-Contact%", f"{stats['Z-Contact%']*100:.2f}%"); d2.metric("2. O-Swing%", f"{stats['O-Swing%']*100:.2f}%"); d3.metric("3. SwStr%", f"{stats['SwStr%']*100:.2f}%"); d4.metric("4. BB%", f"{stats['BB%']*100:.2f}%"); d5.metric("5. K:BB", f"{stats['K%']/stats['BB%']:.2f}" if stats['BB%']>0 else "0.0") | |
| st.markdown('<div class="category-label">Batted Ball Metrics</div>', unsafe_allow_html=True) | |
| b1, b2, b3 = st.columns(3); b4, b5, b6 = st.columns(3) | |
| b1.metric("1. Average Bat Speed", f"{stats['avg_bat_speed']:.1f}"); b2.metric("2. Barrel%", f"{stats['Barrel%']*100:.1f}%"); b3.metric("3. GB%", f"{stats['GB%']*100:.2f}%"); b4.metric("4. FB%", f"{stats['FB%']*100:.2f}%"); b5.metric("5. Pull%", f"{stats['Pull%']*100:.1f}%"); b6.metric("6. Average Launch Angle", f"{stats['LaunchAngle']:.1f}") | |