Merger-and-Acquisition / precompute_index.py
AAdevloper
Initial commit: Alpha-Index 100 Gradio app
faebc8b
"""
Pre-compute the Alpha-Index 100 so the app loads instantly
Run this locally, then deploy the pre-computed CSV
"""
import pandas as pd
import numpy as np
import joblib
import json
# Load everything
print("Loading data and models...")
df = pd.read_csv('data/mock_fund_data.csv')
model = joblib.load('models/xgboost_model.pkl')
encoders = {
'strategy': joblib.load('models/strategy_encoder.pkl'),
'vintage_period': joblib.load('models/vintage_period_encoder.pkl'),
'rate_regime': joblib.load('models/rate_regime_encoder.pkl'),
'valuation_regime': joblib.load('models/valuation_regime_encoder.pkl')
}
with open('models/feature_names.json', 'r') as f:
feature_cols = json.load(f)
print(f"Loaded {len(df)} funds")
# Feature engineering
print("Engineering features...")
df['size_rate_interaction'] = df['fund_size_mil'] * df['macro_interest_rate_at_launch']
df['pe_rate_interaction'] = df['public_market_pe_at_launch'] * df['macro_interest_rate_at_launch']
df['log_fund_size'] = np.log1p(df['fund_size_mil'])
df['vintage_period'] = pd.cut(df['vintage_year'],
bins=[2009, 2013, 2017, 2021, 2024],
labels=['2010-2013', '2014-2017', '2018-2021', '2022-2023'])
df['rate_regime'] = pd.cut(df['macro_interest_rate_at_launch'],
bins=[0, 1, 3, 6],
labels=['Low', 'Medium', 'High'])
df['valuation_regime'] = pd.cut(df['public_market_pe_at_launch'],
bins=[0, 17, 21, 30],
labels=['Low', 'Medium', 'High'])
# Encode
print("Encoding...")
df['strategy_encoded'] = encoders['strategy'].transform(df['strategy'])
df['vintage_period_encoded'] = encoders['vintage_period'].transform(df['vintage_period'])
df['rate_regime_encoded'] = encoders['rate_regime'].transform(df['rate_regime'])
df['valuation_regime_encoded'] = encoders['valuation_regime'].transform(df['valuation_regime'])
# Predict
print("Making predictions...")
X = df[feature_cols]
df['predicted_score'] = model.predict_proba(X)[:, 1]
# Get top 100
print("Creating Alpha-Index 100...")
df_top = df.nlargest(100, 'predicted_score').copy()
# Calculate weights
df_top['score_transformed'] = df_top['predicted_score'] ** 2
total = df_top['score_transformed'].sum()
df_top['index_weight'] = df_top['score_transformed'] / total
df_top = df_top.drop('score_transformed', axis=1)
df_top['rank'] = range(1, len(df_top) + 1)
# Save
output_file = 'data/alpha_index_100_precomputed.csv'
df_top.to_csv(output_file, index=False)
print(f"\n✅ Saved pre-computed index to {output_file}")
print(f"✅ Top score: {df_top['predicted_score'].max():.1%}")
print(f"✅ Total weight: {df_top['index_weight'].sum():.1%}")
print("\nDeploy this file to your Space for instant loading!")