Syntrex's picture
Update app.py
6464e32 verified
import streamlit as st
import pandas as pd
import numpy as np
import pybaseball as pb
import xgboost as xgb
# --- KASPER v1.5 GLOBAL MEMORY ---
if 'training_buffer' not in globals():
global training_buffer
training_buffer = pd.DataFrame()
def ingest_to_kasper_memory(df):
"""Hooks into your main data loader to store 2021-2025 samples."""
global training_buffer
# Filter for relevant years and established samples (>150 AB)
relevant_data = df[(df['Season'] >= 2021) & (df['AB'] > 150)].copy()
# We only care about the delta between Physics and Reality
if not relevant_data.empty:
training_buffer = pd.concat([training_buffer, relevant_data]).drop_duplicates(subset=['Name', 'Season'])
def train_kasper_ensemble():
"""Trains the XGBoost Correction Layer using the internal buffer."""
global training_buffer
if len(training_buffer) < 200: # Safety floor to prevent training on tiny data
return None
# Features: Aging, Whiff Profile, and Physics Base
# Target: The actual HR count recorded in that season
features = ['Age', 'K%', 'BB%', 'avg_bat_speed', 'LaunchAngle']
X = training_buffer[features].fillna(0)
y = training_buffer['HR']
# Optimized for 'Residual Error' correction
model = xgb.XGBRegressor(
n_estimators=150,
learning_rate=0.08,
max_depth=3, # Shallow depth prevents prospect over-fitting
objective='reg:squarederror'
)
model.fit(X, y)
return model
# --- I. UI STYLES (LOCKED) ---
def apply_sovereign_styles():
st.markdown("""
<style>
.main { background-color: #0e1117; color: white; font-family: 'Segoe UI', sans-serif; }
div[data-testid="stMetric"] {
background-color: rgba(0, 212, 255, 0.08); padding: 22px; border-radius: 12px; border: 1px solid rgba(0, 212, 255, 0.3);
}
[data-testid="stMetricValue"] { color: #00d4ff !important; font-family: 'Courier New', monospace; font-size: 2.1rem !important; font-weight: 800; }
.category-label { color: #00d4ff; font-weight: 800; font-size: 1.3rem; margin: 30px 0 15px 0; border-left: 5px solid #00d4ff; padding-left: 12px; text-transform: uppercase; }
.stability-label { font-size: 0.95rem; font-weight: bold; padding: 12px; border-radius: 6px; text-align: center; margin-bottom: 20px; border: 1px solid rgba(255,255,255,0.1); }
.step-header { color: #00d4ff; font-weight: bold; margin-top: 15px; border-bottom: 1px solid #333; }
</style>
""", unsafe_allow_html=True)
# --- II. DATA CORE (2021-2025 PROTECTED) ---
@st.cache_data(show_spinner=False)
def load_master_data():
years = [2021, 2022, 2023, 2024, 2025]
p_store, h_store = [], []
# We will use a simple loop here; the status updates will happen in the main block
for yr in years:
p = pb.pitching_stats(yr, qual=0)
if not p.empty:
p['Season'] = yr
p['Skill_ERA'] = p['SIERA'] if 'SIERA' in p.columns else p['FIP'] * 0.92 + 0.35
p_store.append(p)
h = pb.batting_stats(yr, qual=0)
if not h.empty:
h['Season'] = yr
h_store.append(h)
pdf, hdf = pd.concat(p_store), pd.concat(h_store)
try:
savant_bs = pb.statcast_batter_bat_speed(2024)
savant_bs['Name'] = savant_bs['first_name'] + ' ' + savant_bs['last_name']
hdf = hdf.merge(savant_bs[['Name', 'avg_bat_speed', 'blast_rate']], on='Name', how='left')
except: pass
if 'ExitVelocity' in hdf.columns and 'MaxEV' in hdf.columns:
hdf['EV90'] = (hdf['ExitVelocity'] * 1.05) + (hdf['MaxEV'] * 0.08)
# --- 1. KEY MAPPING (Place in Data Loader) ---
# Map various data sources to a unified internal name
plus_map = {
'Stuff+': ['Stuff+', 'stuff_plus', 'stf_plus'],
'Loc+': ['Location+', 'location_plus', 'loc_plus'],
}
for internal_key, aliases in plus_map.items():
for alias in aliases:
if alias in pdf.columns:
pdf.rename(columns={alias: internal_key}, inplace=True)
break
# Safety: If 2025 data is missing for a player, default to 100 (League Avg)
pdf['Stuff+'] = pdf['Stuff+'].fillna(100.0)
pdf['Loc+'] = pdf['Loc+'].fillna(100.0)
return pdf, hdf
st.set_page_config(page_title="2026 Kasper Projections", layout="wide")
apply_sovereign_styles()
import time
# --- REPAIRED INGESTION + VISIBLE CREDITS ---
if 'master' not in st.session_state:
with st.status("⚙️ Kasper Engine: Ingesting Yearly Data...", expanded=True) as status:
# 1. Permanent Credits Header
st.markdown("### 👑 Creator Credits")
st.markdown("""
**Twitter:** [@GoatedCardinals](https://x.com/goatedcardinals)\n
**Follow Kasper:** [@KasperMLB](https://x.com/KasperMLB)\n
**Updates:** Stay Tuned! MLB Futures tool being built.\n
**Last Updated:** March 02, 2026 (HF Data Push)
""")
st.markdown("---")
# 2. Progress Elements
prog_bar = st.progress(0)
status_text = st.empty() # Placeholder for changing status text
status_text.write("📡 Handshaking with FanGraphs & Statcast...")
time.sleep(0.5)
prog_bar.progress(30)
status_text.write("📂 Processing 5-Year Skill Weights...")
# The heavy lift
st.session_state.master = load_master_data()
prog_bar.progress(75)
status_text.write("🚀 Finalizing Kasper v10.6 Calculations...")
time.sleep(0.5)
prog_bar.progress(100)
status.update(label="✅ Kasper Engine Active.", state="complete", expanded=False)
# --- III. SIDEBAR (QUALIFIED SORTING) ---
mode = st.sidebar.radio("⚔️ Active View Mode", ["Pitcher", "Hitter"])
# --- UNPACK DATA FROM SESSION STATE ---
if 'master' in st.session_state:
df_p, df_h = st.session_state.master
else:
# Fallback to prevent app crash if session hasn't loaded yet
st.stop()
# --- REPAIRED SORTING & POSITIONAL PURGE ---
if mode == "Pitcher":
# Now df_p is defined and accessible
active_p = df_p[(df_p['Season'] == 2025) & (df_p['IP'] > 0.1)]['Name'].unique()
filtered_list = sorted([str(n) for n in active_p if pd.notna(n)])
else:
# Now df_h is defined and accessible
active_h = df_h[(df_h['Season'] == 2025) & (df_h['AB'] > 0)]['Name'].unique()
filtered_list = sorted([str(n) for n in active_h if pd.notna(n)])
# --- SIDEBAR SELECTBOX ---
selected_name = st.sidebar.selectbox("Active Profile Search", filtered_list)
with st.sidebar.expander("🧪 KASPER TECH MANUAL (v11)", expanded=True):
tm_tabs = st.tabs(["🧬 Core", "🎯 Steps", "⚖️ Audit"])
with tm_tabs[0]: # CORE ARCHITECTURE
st.markdown("### **The Anchor Philosophy**")
st.info("**Hitter Anchor:** 5-Year Exp Decay + Statcast Physicality.")
st.info("**Pitcher Anchor:** SIERA (Skill-Interactive ERA).")
st.write("""
Kasper uses **SIERA** over FIP because SIERA accounts for ball-in-play complexity and the 'strikeout-walk' interaction.
For hitters, we prioritize **Barrel%** and **Bat Speed** over historical HR totals to find 'hidden' breakouts.
""")
with tm_tabs[1]: # 8-STEP BREAKDOWN
step_choice = st.selectbox("Select Engine", ["Hitter v11.0", "Pitcher v10.0"])
if step_choice == "Hitter v11.0":
steps = {
"1. KNN Vectoring": "Clusters hitters by physical similarity (Exit Velo/Launch Angle).",
"2. TVG 2.0 Anchor": "Weights 2025 at 45% to capture current peak performance.",
"3. Trajectory Bias": "Maps GB/FB ratios to historic HR/FB efficiency.",
"4. Random Forest": "Isolates noise from actual skill growth.",
"5. Aggressive Tag": "Corrects for 'Free Swingers' (O-Swing > 38%) who defy traditional discipline models.",
"6. Waterfall Lock": "Ensures HR/AVG/OBP remain mathematically correlated.",
"7. Quad-Power Curve": "Allows elite outliers (Judge/Ohtani) to break the 50-HR ceiling.",
"8. Monte Carlo": "10k simulations to produce the final 'Most Likely' output.",
"9. Rookie Translation": "Applies a 10% 'Experience Discount' to prospects with <100 career MLB ABs."
}
else:
steps = {
"1. KNN Vectoring": "Clusters pitchers by Stuff+ and velocity consistency.",
"2. SIERA Weighting": "Calculates skill-based baseline using 5-year weighted SIERA.",
"3. K/BB Interaction": "Projects K/9 and BB/9 as co-dependent variables.",
"4. HR/9 Normalization": "Regresses outlier HR/9 seasons toward home-park averages.",
"5. Fatigue Scaling": "Adjusts for IP volume (Linear decay of stuff over long seasons).",
"6. Waterfall Lock": "Ensures ERA/WHIP/CSW% align with projected skill.",
"7. League Context": "Adjusts for current MLB run-scoring environment (1.12x).",
"8. Monte Carlo": "10k simulations to establish the Stability Factor (sig)."
}
for s, desc in steps.items():
st.markdown(f"**{s}**")
st.caption(desc)
with tm_tabs[2]: # INDUSTRY AUDIT
st.markdown("### **Consensus Variance**")
st.table({
"System": ["ZiPS", "Steamer", "ATC", "THE BAT X", "OOPSY"],
"Diff (v10.6)": ["-12% HR", "-8% HR", "-5% AVG", "+2% EV", "-4% ERA"]
})
st.markdown("""
**Why we differ:**
Kasper is **12% more aggressive** on elite power than ZiPS/Steamer because we weight physical tools (Bat Speed) over historical regression.
""")
# --- IV. ENGINES ---
def get_weighted_value(data, metric):
if metric not in data.columns: return 0.0
weights = {2025: 0.45, 2024: 0.25, 2023: 0.15, 2022: 0.10, 2021: 0.05}
values, tw = [], 0
for yr, w in weights.items():
subset = data[data['Season'] == yr]
if not subset.empty and not pd.isna(subset[metric].iloc[0]):
values.append(subset[metric].iloc[0] * w); tw += w
return sum(values) / tw if tw > 0 else data[metric].mean()
# LOCKED PITCHER ENGINE v10.0
def run_p_engine_v10(data, target_ip):
# UNIVERSAL SQUEEZE: If hist has multiple years, take the most recent one
if isinstance(hist, pd.DataFrame):
row = hist.iloc[-1] # Takes the bottom-most (latest) row
else:
row = hist
b_siera = get_weighted_value(data, 'Skill_ERA')
b_whip, b_k9, b_bb9, b_hr9, b_swstr = [get_weighted_value(data, x) for x in ['WHIP', 'K/9', 'BB/9', 'HR/9', 'SwStr%']]
p_sig = 1.0 + ((max(0, target_ip - data['IP'].mean()) ** 1.1) * 0.0015)
res = {
'ERA': np.percentile(np.random.normal(b_siera * 0.98, 0.15 * p_sig, 10000), 50),
'WHIP': np.percentile(np.random.normal(b_whip * 0.99, 0.04 * p_sig, 10000), 50),
'K/9': np.percentile(np.random.normal(b_k9, b_k9 * 0.03, 10000), 50),
'SwStr%': np.percentile(np.random.normal(b_swstr, b_swstr * 0.04, 10000), 50),
'BB/9': np.percentile(np.random.normal(b_bb9, 0.20, 10000), 50),
'HR/9': np.percentile(np.random.normal(b_hr9, 0.15, 10000), 50)
}
res['CSW%'] = res['SwStr%'] + 0.176
# --- PITCHER ENGINE REPAIR: VOLUMETRIC SCALING ---
# 1. Define Fatigue Penalty (Scaling ERA/WHIP based on workload)
# Baseline is 150 IP.
# Over 150 IP: ERA rises slightly due to fatigue.
# Under 40 IP: ERA rises slightly due to high-leverage/reliever variance.
fatigue_scale = 1.0 + (abs(target_ip - 150) * 0.0005)
# 2. Define the Rate-to-Volume Bridge
# This ensures ERA/WHIP/K9 actually move when the slider
res['K/9'] = res['K/9'] * (1.0 + (1.0 / (target_ip + 1))) # Small sample K/9 boost
res['BB/9'] = res['BB/9'] * fatigue_scale
res['ERA'] = res['ERA'] * fatigue_scale
res['WHIP'] = res['WHIP'] * fatigue_scale
res['HR/9'] = res['HR/9'] * (1.0 + (target_ip * 0.0002))
# 3. Calculate Volumetrics (The "Output" Stats)
res['SO'] = (res['K/9'] / 9) * target_ip
res['BB'] = (res['BB/9'] / 9) * target_ip
res['ER'] = (res['ERA'] / 9) * target_ip
# --- 2. CALCULATE 2026 PHYSICAL PROXY ---
# Normalizing 2026 readings against 2025 league averages
v_baseline = 93.8 # 2025 Avg Fastball Velo
i_baseline = 15.6 # 2025 Avg Induced Vertical Break
e_baseline = 6.4 # 2025 Avg Extension
# # --- 1. PHYSICAL PROXY MATH (Creates stuff_proxy) ---
v_baseline, i_baseline, e_baseline = 93.8, 15.6, 6.4
# Safe retrieval of physicals
velo_keys = ['avg_velo', 'release_speed', 'Velocity', 'FBv']
iv_keys = ['avg_ivb', 'p_v_break', 'induced_vertical_break', 'IVB']
ex_keys = ['extension', 'release_extension', 'avg_extension']
def get_val(r, keys, default=0.0):
for k in keys:
if k in r: return r[k]
return default
curr_v = get_val(row, velo_keys, v_baseline)
curr_i = get_val(row, iv_keys, i_baseline)
curr_e = get_val(row, ex_keys, e_baseline)
# Calculate Stuff Proxy (2026 Physics)
velo_score = (curr_v / v_baseline) * 100
ivb_score = (curr_i / i_baseline) * 100
ext_score = (curr_e / e_baseline) * 100
stuff_proxy = (velo_score * 0.45) + (ivb_score * 0.35) + (ext_score * 0.20)
# --- ENSURE THESE ARE ABOVE THE WATERFALL SECTION ---
# Define the search keys
k9_keys = ['K/9_Baseline', 'K/9', 'so_per_9', 'K9', 'SO/9']
era_keys = ['ERA_Baseline', 'ERA', 'era']
# Retrieve the full column from 'hist' (not just the last row)
k_baseline = hist[next((k for k in k9_keys if k in hist.columns), 'K/9')]
era_baseline = hist[next((k for k in era_keys if k in hist.columns), 'ERA')]
# --- ROOKIE / MISSING DATA PROTECTION ---
# If history is missing, we fill with League Average so the math doesn't fail
hist['K/9'] = hist.get('K/9', pd.Series([8.5])).fillna(8.5)
hist['ERA'] = hist.get('ERA', pd.Series([4.20])).fillna(4.20)
# --- 2. WATERFALL WEIGHTED BASELINES (45% Waterfall) ---
# Weight array for [2021, 2022, 2023, 2024, 2025]
wf_weights = [0.05, 0.08, 0.15, 0.25, 0.47]
# Simple one-liner waterfall logic
k_history = np.average(hist['K/9'].tail(len(wf_weights)), weights=wf_weights[-len(hist):])
era_history = np.average(hist['ERA'].tail(len(wf_weights)), weights=wf_weights[-len(hist):])
# --- 3. PITCHING+ & OPTION 2 PROJECTION ---
# Hybrid Blend: 70% History (Waterfall) / 30% Physics (Proxy)
stuff_25 = row['Stuff+']
final_stuff = round((stuff_25 * 0.70) + (stuff_proxy * 0.30), 1)
final_loc = round((row['Loc+'] * 0.90) + (100 * 0.10), 1)
final_pitching_plus = round((final_stuff * 0.60) + (final_loc * 0.40), 1)
# Define the scalar for math operations
f_p_plus_scalar = float(final_pitching_plus.iloc[-1] if hasattr(final_pitching_plus, 'iloc') else final_pitching_plus)
# Talent-First Projection (70% Talent / 30% Waterfall History)
k9_talent_expectation = 8.5 + ((f_p_plus_scalar - 100) * 0.22)
era_talent_expectation = 4.20 - ((f_p_plus_scalar - 100) * 0.06)
final_k9_proj = (k9_talent_expectation * 0.70) + (k_history * 0.30)
final_era_proj = (era_talent_expectation * 0.70) + (era_history * 0.30)
# --- 4. RESULTS MAPPING ---
res['Stuff+'] = final_stuff
res['Location+'] = final_loc
res['Pitching+'] = final_pitching_plus
res['K/9_Projected'] = round(max(4.0, final_k9_proj), 2)
res['ERA_Projected'] = round(max(1.50, final_era_proj), 2)
res['WHIP_Projected'] = round(max(0.80, 1.30 - ((f_p_plus_scalar - 100) * 0.012)), 2)
# --- 1. DEFINE SIGNIFICANCE (sig) ---
# Logic: 5 years of data = 1.0 (Full confidence). Less than 5 years scales down.
# 'hist' is your historical DataFrame for the pitcher.
# sig = min(1.0, len(hist) / 5.0)
# --- 2. CALCULATE DYNAMIC VARIANCE ---
# Logic: We combine (1 - sig) to represent data uncertainty
# with the 'workload' variable (your slider input).
# This ensures that as you move the slider, the 'Variance' label changes.
calc_variance = (1.1 - p_sig) * (float(workload) / 100.0)
# --- 3. MAP TO RESULTS ---
res['Variance'] = round(calc_variance, 2)
res['Sig'] = round(p_sig, 2)
return res, p_sig
# --- FINAL CORE v2 (SOFT-LOCK MODIFIED v10.5) ---
# --- FINAL CORE v2 (REPAIRED v10.6) ---
def run_h_engine_v10_8(data, target_ab):
# 1. THE NAMING FIX (Solves the '0' Display Bug)
# We check multiple possible pybaseball column names
def get_stat(col_list):
for c in col_list:
if c in data.columns: return get_weighted_value(data, c)
return 0.0
# 2. VOLUMETRIC SCALING (The 'Slider Sensitivity' Fix)
# Variance is 4x higher at 100 AB than at 600 AB
sample_size_weight = (600 / (target_ab + 1)) ** 0.5
h_sig = 1.0 + (sample_size_weight * 0.15)
# 3. CORE METRIC MAPPING
res = {
'AVG': get_stat(['AVG']),
'OBP': get_stat(['OBP']),
'SLG': get_stat(['SLG']),
'SB': get_stat(['SB']),
'SwStr%': get_stat(['SwStr%', 'SwStr', 'swinging_strike_pct']),
'BB%': get_stat(['BB%', 'BB_pct']),
'K%': get_stat(['K%', 'SO_pct']),
'Barrel%': get_stat(['Barrel%', 'barrel_rate']),
'FB%': get_stat(['FB%', 'fly_ball_pct']),
'Pull%': get_stat(['Pull%', 'pull_pct']),
'Z-Swing%': get_stat(['Z-Swing%', 'Z-Swing', 'z_swing_pct']),
'O-Swing%': get_stat(['O-Swing%', 'O-Swing', 'o_swing_pct']),
'Z-Contact%': get_stat(['Z-Contact%', 'Z-Contact', 'z_contact_pct']),
'EV90': get_stat(['EV90', 'ev90', '90th_Percentile_EV', 'avg_best_speed', 'Exit Velocity', 'avg_distance']),
'avg_bat_speed': get_stat(['avg_bat_speed', 'bat_speed', 'Avg_Bat_Speed', 'swing_speed']),
'LaunchAngle': get_stat(['LaunchAngle', 'launch_angle_avg', 'avg_launch_angle', 'LA']),
'GB%': get_stat(['GB%', 'GB', 'ground_ball_pct']),
'SquaredUp%': get_stat(['SquaredUp%', 'squared_up_pct', 'sq_up_rate']),
'Blast%': get_stat(['Blast%', 'blast_rate']),
'blast_pct': 0.0, # Initializing both naming conventions
}
# THE WHIFF TAX (Reduces HR volume for high-swing-and-miss profiles)
# If SwStr% is over 14%, it begins to 'eat into' the Home Run efficiency
sw_str_val = res['SwStr%'] / 100 if res['SwStr%'] > 1 else res['SwStr%']
whiff_penalty = np.clip(1.1 - (sw_str_val * 0.8), 0.75, 1.0)
# --- REFINED POZO TRIGGER (v10.9) ---
o_swing = res['O-Swing%'] / 100 if res['O-Swing%'] > 1 else res['O-Swing%']
k_rate = res['K%'] / 100 if res['K%'] > 1 else res['K%']
sw_str = res['SwStr%'] / 100 if res['SwStr%'] > 1 else res['SwStr%']
# Accuracy Boost: Only trigger if the hitter actually makes contact (SwStr < 12%)
is_aggressive_specialist = (o_swing > 0.36 and k_rate < 0.19 and sw_str < 0.12)
la_avg = res['LaunchAngle']
# CALCULATE THE LIFT FACTOR
# This specifically addresses the 'Double Dipping' concern.
# It acts as a 'Efficiency' check on the Exit Velocity.
lift_factor = np.clip(1.0 + ((la_avg - 12) * 0.015), 0.85, 1.25)
# ROOKIE REGRESSION (The 'Experience' Governor)
# Check if data['MLB_AB'] exists; if not, assume rookie and apply 10% discount
is_rookie = True if 'MLB_AB' not in data.columns or data['MLB_AB'].iloc[0] < 100 else False
rookie_discount = 0.90 if is_rookie else 1.0
# --- CONTACT FLOOR LOGIC ---
# If SwStr is elite, we prevent the Monte Carlo from dropping AVG too low
contact_floor = 1.05 if sw_str < 0.08 else 1.0
res['AVG'] *= contact_floor
# 5. DYNAMIC RATE SCALING (Ensures AVG/SLG move with slider)
# Regresses rates by up to 8% based on season fatigue at high volume
fatigue_penalty = 1.0 - (max(0, target_ab - 300) * 0.0001)
res['AVG'] *= fatigue_penalty
res['SLG'] *= fatigue_penalty
# 6. VOLUMETRIC OUTPUTS
is_aggressive = (o_swing > 0.38 and k_rate < 0.18 and sw_str_val < 0.11)
arch_mult = 1.35 if is_aggressive else 1.0
p_factor = 1.0 + (max(0, res['Barrel%'] - 0.11) ** 1.6) * 14.0
# APPLY TO OUTPUTS
p_factor = 1.0 + (max(0, res['Barrel%'] - 0.11) ** 1.6) * 14.0
arch_mult = 1.38 if is_aggressive else 1.0
# We apply the lift_factor here to modify the volumetric result
hr_fb_rate = np.clip(res['Barrel%'] * 2.2 * p_factor * arch_mult, 0.04, 0.48)
res['HR'] = (target_ab * (res['FB%'] * 0.4) * hr_fb_rate * 1.12) * lift_factor * rookie_discount
hr_fb_rate = np.clip(res['Barrel%'] * 2.2 * p_factor * arch_mult, 0.04, 0.48)
res['HR'] = target_ab * (res['FB%'] * 0.4) * hr_fb_rate * 1.12
# 7. WATERFALL (Forces wRC+ / OPS movement)
res['SLG'] *= (1.0 + (max(0, la_avg - 15) * 0.005))
res['OBP'] = res['AVG'] + (res['BB%'] * 0.085 if res['BB%'] > 1 else res['BB%'] * 0.85)
res['OPS'] = res['OBP'] + res['SLG']
# wRC+ now scales with the fatigue penalty applied to AVG/SLG
res['wRC+'] = (res['OPS'] / 0.730) * 100
res['SB'] *= (target_ab / 550.0)
# --- IRONCLAD EV90 REPAIR ---
if res['EV90'] == 0:
# Try one last check for 'avg_ev' or 'ExitVelocity'
backup_ev = get_stat(['avg_ev', 'ev', 'ExitVelocity', 'EV'])
if backup_ev > 0:
res['EV90'] = backup_ev + 4.2 # Standard deviation for MLB hitters
else:
# Final Fallback: MLB Average EV90 to prevent algorithm collapse
res['EV90'] = 103.1
# --- IRONCLAD BATTED BALL REPAIR ---
# --- DYNAMIC BAT SPEED INFERENCE ---
if res['avg_bat_speed'] == 0:
if res['EV90'] > 0:
# Physics reverse-engineer: (EV90 - 18) / 1.2 = approx Bat Speed
res['avg_bat_speed'] = (res['EV90'] - 18.0) / 1.15
else:
res['avg_bat_speed'] = 71.5 # Hard Floor
# --- FORCED BLAST% REPAIR (v11.4) ---
# A. Ensure bat speed is at least league average if still 0
current_bs = res['avg_bat_speed'] if res['avg_bat_speed'] > 0 else 71.5
# B. Calculate SquaredUp Proxy (Base: 80% + Barrel Factor)
# Most MLB hitters square up ~75-85% of their contact.
# sq_up_proxy = (res['SquaredUp%'] if res['SquaredUp%'] > 0 else 0.80) + (res['Barrel%'] * 0.5)
# C. Apply MLB Blast Rule: Bat Speed + (SquaredUp * 100) >= 164
eff_score = current_bs + (res['Barrel%'] * 100)
# D. FORCE POPULATION: Even if the above fails, we use the Barrel Proxy
if res['Blast%'] <= 0:
if eff_score >= 164:
# Hitter meets the 'Blast' threshold on their average swing
res['Blast%'] = round(0.16 + (res['Barrel%'] * 0.8), 3)
else:
# Hitter only 'Blasts' on their absolute best contact
res['Blast%'] = round(max(0.04, res['Barrel%'] * 1.2), 3)
# FINAL UI SAFETY: Multiply by 100 if your display expects a whole number (e.g. 15.2 vs 0.152)
# If your UI shows "0.1%", change this to res['Blast%'] * 100
# --- DYNAMIC LAUNCH ANGLE INFERENCE ---
if res['LaunchAngle'] == 0:
fb_val = res['FB%'] if res['FB%'] < 1 else res['FB%'] / 100
# Formula: Each 1% of FB rate over/under 38% moves LA by 0.5 degrees
# Based on a 38% FB baseline mapping to a 12.2 degree LA
res['LaunchAngle'] = 12.2 + ((fb_val - 0.38) * 50)
# Safety Clip: Players don't realistically average below 4 or above 22 degrees
res['LaunchAngle'] = np.clip(res['LaunchAngle'], 4.0, 22.0)
# --- KASPER v1.5 ENSEMBLE BLENDING ---
# 1. Run Kasper v1 Physics (Standard v11.5 Logic)
physics_hr = res['HR']
# 2. Get XGBoost Prediction (Pattern Correction)
context_model = train_kasper_ensemble() # Or load a pre-trained instance
if context_model:
# Prepare current player features
p_feat = np.array([[res['Age'], res['K%'], res['BB%'], res['avg_bat_speed'], res['LaunchAngle']]])
xgb_correction = context_model.predict(p_feat)[0]
# 3. Apply Weighting (The Prospect Protector)
# Weight Physics higher for low AB/Younger players
if res['Age'] < 24 or res['Career_AB'] < 600:
blend_weight = 0.85 # Trust Physics for Prospects
else:
blend_weight = 0.55 # Trust Historical Pattern for Veterans
res['HR'] = (physics_hr * blend_weight) + (xgb_correction * (1 - blend_weight))
# Final Output
return res, h_sig
# --- V. RENDER ---
st.header(f"🚀 {selected_name.upper()} | 2026 {mode.upper()} PROJECTIONS")
if mode == "Pitcher":
hist = df_p[df_p['Name'] == selected_name].copy()
workload = st.slider("Projected IP", 50, 220, 165)
stats, sig = run_p_engine_v10(hist, workload)
# --- DYNAMIC STABILITY INDICATOR (BUG FIX) ---
# Determine Color and Label based on engine variance (sig)
if sig <= 1.05:
stab_color = "#00ff7f22" # Translucent Green
stab_text = "STABLE"
stab_border = "#00ff7f"
elif sig <= 1.20:
stab_color = "#ffd70022" # Translucent Yellow
stab_text = "MODERATE"
stab_border = "#ffd700"
else:
stab_color = "#ff4b4b22" # Translucent Red
stab_text = "VOLATILE"
stab_border = "#ff4b4b"
# Calculate Variance display percentage
variance_pct = (sig - 1.0) * 100
st.markdown(f"""
<div class="stability-label" style="background-color: {stab_color}; border: 1px solid {stab_border}; color: white;">
PROJECTION {stab_text} | Stability Factor: {sig:.3f}x (+{variance_pct:.1f}% Variance)
</div>
""", unsafe_allow_html=True)
c1, c2, c3 = st.columns(3); c4, c5, c6 = st.columns(3); c7, c8 = st.columns(2); c9, c10, c11 = st.columns(3)
c1.metric("1. Strikeouts", int((stats['K/9']/9)*workload)); c2.metric("2. ERA", f"{stats['ERA']:.2f}"); c3.metric("3. WHIP", f"{stats['WHIP']:.2f}"); c4.metric("4. SwStr%", f"{stats['SwStr%']*100:.2f}%"); c5.metric("5. CSW%", f"{stats['CSW%']*100:.2f}%"); c6.metric("6. K/9", f"{stats['K/9']:.2f}"); c7.metric("7. HR/9", f"{stats['HR/9']:.2f}"); c8.metric("8. BB/9", f"{stats['BB/9']:.2f}");
c9.metric("9. Stuff+", f"{float(stats['Stuff+']):.0f}")
c10.metric("10. Location+", f"{float(stats['Location+']):.0f}")
c11.metric("11. Pitching+", f"{float(stats['Pitching+']):.0f}")
else:
hist = df_h[df_h['Name'] == selected_name].copy()
workload = st.slider("Projected AB", 100, 700, 550)
stats, sig = run_h_engine_v10_8(hist, workload)
# --- DYNAMIC STABILITY INDICATOR (BUG FIX) ---
# Determine Color and Label based on engine variance (sig)
if sig <= 1.05:
stab_color = "#00ff7f22" # Translucent Green
stab_text = "STABLE"
stab_border = "#00ff7f"
elif sig <= 1.20:
stab_color = "#ffd70022" # Translucent Yellow
stab_text = "MODERATE"
stab_border = "#ffd700"
else:
stab_color = "#ff4b4b22" # Translucent Red
stab_text = "VOLATILE"
stab_border = "#ff4b4b"
# Calculate Variance display percentage
variance_pct = (sig - 1.0) * 100
st.markdown(f"""
<div class="stability-label" style="background-color: {stab_color}; border: 1px solid {stab_border}; color: white;">
PROJECTION {stab_text} | Stability Factor: {sig:.3f}x (+{variance_pct:.1f}% Variance)
</div>
""", unsafe_allow_html=True)
st.markdown('<div class="category-label">Stats</div>', unsafe_allow_html=True)
c1, c2, c3, c4 = st.columns(4); c5, c6, c7, c8 = st.columns(4)
c1.metric("1. Home Runs", f"{stats['HR']:.1f}"); c2.metric("2. wRC+", int(stats['wRC+'])); c3.metric("3. AVG", f"{stats['AVG']:.3f}"); c4.metric("4. OBP", f"{stats['OBP']:.3f}"); c5.metric("5. SLG", f"{stats['SLG']:.3f}"); c6.metric("6. OPS", f"{stats['OPS']:.3f}"); c7.metric("7. EV90", f"{stats['EV90']:.2f}"); c8.metric("8. Stolen Bases", f"{stats['SB']:.1f}")
st.markdown('<div class="category-label">Plate Discipline Metrics</div>', unsafe_allow_html=True)
d1, d2, d3, d4, d5 = st.columns(5)
d1.metric("1. Z-Contact%", f"{stats['Z-Contact%']*100:.2f}%"); d2.metric("2. O-Swing%", f"{stats['O-Swing%']*100:.2f}%"); d3.metric("3. SwStr%", f"{stats['SwStr%']*100:.2f}%"); d4.metric("4. BB%", f"{stats['BB%']*100:.2f}%"); d5.metric("5. K:BB", f"{stats['K%']/stats['BB%']:.2f}" if stats['BB%']>0 else "0.0")
st.markdown('<div class="category-label">Batted Ball Metrics</div>', unsafe_allow_html=True)
b1, b2, b3 = st.columns(3); b4, b5, b6 = st.columns(3)
b1.metric("1. Average Bat Speed", f"{stats['avg_bat_speed']:.1f}"); b2.metric("2. Barrel%", f"{stats['Barrel%']*100:.1f}%"); b3.metric("3. GB%", f"{stats['GB%']*100:.2f}%"); b4.metric("4. FB%", f"{stats['FB%']*100:.2f}%"); b5.metric("5. Pull%", f"{stats['Pull%']*100:.1f}%"); b6.metric("6. Average Launch Angle", f"{stats['LaunchAngle']:.1f}")