Spaces:
Build error
Build error
| """ | |
| Pre-compute the Alpha-Index 100 so the app loads instantly | |
| Run this locally, then deploy the pre-computed CSV | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| import json | |
| # Load everything | |
| print("Loading data and models...") | |
| df = pd.read_csv('data/mock_fund_data.csv') | |
| model = joblib.load('models/xgboost_model.pkl') | |
| encoders = { | |
| 'strategy': joblib.load('models/strategy_encoder.pkl'), | |
| 'vintage_period': joblib.load('models/vintage_period_encoder.pkl'), | |
| 'rate_regime': joblib.load('models/rate_regime_encoder.pkl'), | |
| 'valuation_regime': joblib.load('models/valuation_regime_encoder.pkl') | |
| } | |
| with open('models/feature_names.json', 'r') as f: | |
| feature_cols = json.load(f) | |
| print(f"Loaded {len(df)} funds") | |
| # Feature engineering | |
| print("Engineering features...") | |
| df['size_rate_interaction'] = df['fund_size_mil'] * df['macro_interest_rate_at_launch'] | |
| df['pe_rate_interaction'] = df['public_market_pe_at_launch'] * df['macro_interest_rate_at_launch'] | |
| df['log_fund_size'] = np.log1p(df['fund_size_mil']) | |
| df['vintage_period'] = pd.cut(df['vintage_year'], | |
| bins=[2009, 2013, 2017, 2021, 2024], | |
| labels=['2010-2013', '2014-2017', '2018-2021', '2022-2023']) | |
| df['rate_regime'] = pd.cut(df['macro_interest_rate_at_launch'], | |
| bins=[0, 1, 3, 6], | |
| labels=['Low', 'Medium', 'High']) | |
| df['valuation_regime'] = pd.cut(df['public_market_pe_at_launch'], | |
| bins=[0, 17, 21, 30], | |
| labels=['Low', 'Medium', 'High']) | |
| # Encode | |
| print("Encoding...") | |
| df['strategy_encoded'] = encoders['strategy'].transform(df['strategy']) | |
| df['vintage_period_encoded'] = encoders['vintage_period'].transform(df['vintage_period']) | |
| df['rate_regime_encoded'] = encoders['rate_regime'].transform(df['rate_regime']) | |
| df['valuation_regime_encoded'] = encoders['valuation_regime'].transform(df['valuation_regime']) | |
| # Predict | |
| print("Making predictions...") | |
| X = df[feature_cols] | |
| df['predicted_score'] = model.predict_proba(X)[:, 1] | |
| # Get top 100 | |
| print("Creating Alpha-Index 100...") | |
| df_top = df.nlargest(100, 'predicted_score').copy() | |
| # Calculate weights | |
| df_top['score_transformed'] = df_top['predicted_score'] ** 2 | |
| total = df_top['score_transformed'].sum() | |
| df_top['index_weight'] = df_top['score_transformed'] / total | |
| df_top = df_top.drop('score_transformed', axis=1) | |
| df_top['rank'] = range(1, len(df_top) + 1) | |
| # Save | |
| output_file = 'data/alpha_index_100_precomputed.csv' | |
| df_top.to_csv(output_file, index=False) | |
| print(f"\n✅ Saved pre-computed index to {output_file}") | |
| print(f"✅ Top score: {df_top['predicted_score'].max():.1%}") | |
| print(f"✅ Total weight: {df_top['index_weight'].sum():.1%}") | |
| print("\nDeploy this file to your Space for instant loading!") | |