Spaces:
Sleeping
Sleeping
| """ | |
| Alpha-Index 100 - Gradio Version for HF Spaces | |
| ML-Powered Private Markets Investment Index | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| import json | |
| import os | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| # Global variables for lazy loading | |
| model = None | |
| encoders = None | |
| feature_cols = None | |
| index_data = None | |
| def load_model(): | |
| """Lazy load model and encoders only when needed""" | |
| global model, encoders, feature_cols | |
| if model is None: | |
| MODEL_DIR = 'models' | |
| # Load model | |
| model = joblib.load(os.path.join(MODEL_DIR, 'xgboost_model.pkl')) | |
| # Load encoders | |
| encoders = { | |
| 'strategy': joblib.load(os.path.join(MODEL_DIR, 'strategy_encoder.pkl')), | |
| 'vintage_period': joblib.load(os.path.join(MODEL_DIR, 'vintage_period_encoder.pkl')), | |
| 'rate_regime': joblib.load(os.path.join(MODEL_DIR, 'rate_regime_encoder.pkl')), | |
| 'valuation_regime': joblib.load(os.path.join(MODEL_DIR, 'valuation_regime_encoder.pkl')) | |
| } | |
| # Load feature names | |
| with open(os.path.join(MODEL_DIR, 'feature_names.json'), 'r') as f: | |
| feature_cols = json.load(f) | |
| def load_index_data(): | |
| """Load precomputed index data""" | |
| global index_data | |
| if index_data is None: | |
| index_data = pd.read_csv('data/alpha_index_100_precomputed.csv') | |
| return index_data | |
| def predict_fund(strategy, fund_size, vintage_year, interest_rate, pe_ratio): | |
| """Predict fund performance based on input parameters""" | |
| try: | |
| # Load model on first use | |
| load_model() | |
| # Create input dataframe | |
| input_data = pd.DataFrame([{ | |
| 'fund_manager': "Test Fund", | |
| 'strategy': strategy, | |
| 'fund_size_mil': fund_size, | |
| 'vintage_year': int(vintage_year), | |
| 'macro_interest_rate_at_launch': interest_rate, | |
| 'public_market_pe_at_launch': pe_ratio | |
| }]) | |
| # Feature engineering | |
| input_data['size_rate_interaction'] = input_data['fund_size_mil'] * input_data['macro_interest_rate_at_launch'] | |
| input_data['pe_rate_interaction'] = input_data['public_market_pe_at_launch'] * input_data['macro_interest_rate_at_launch'] | |
| input_data['log_fund_size'] = np.log1p(input_data['fund_size_mil']) | |
| input_data['vintage_period'] = pd.cut( | |
| input_data['vintage_year'], | |
| bins=[2009, 2013, 2017, 2021, 2024], | |
| labels=['2010-2013', '2014-2017', '2018-2021', '2022-2023'] | |
| ) | |
| input_data['rate_regime'] = pd.cut( | |
| input_data['macro_interest_rate_at_launch'], | |
| bins=[0, 1, 3, 6], | |
| labels=['Low', 'Medium', 'High'] | |
| ) | |
| input_data['valuation_regime'] = pd.cut( | |
| input_data['public_market_pe_at_launch'], | |
| bins=[0, 17, 21, 30], | |
| labels=['Low', 'Medium', 'High'] | |
| ) | |
| # Encode categorical features | |
| input_data['strategy_encoded'] = encoders['strategy'].transform(input_data['strategy']) | |
| input_data['vintage_period_encoded'] = encoders['vintage_period'].transform(input_data['vintage_period']) | |
| input_data['rate_regime_encoded'] = encoders['rate_regime'].transform(input_data['rate_regime']) | |
| input_data['valuation_regime_encoded'] = encoders['valuation_regime'].transform(input_data['valuation_regime']) | |
| # Predict | |
| X = input_data[feature_cols] | |
| score = model.predict_proba(X)[0, 1] | |
| # Generate result text | |
| score_pct = f"{score:.1%}" | |
| if score >= 0.70: | |
| verdict = "π’ HIGH CONVICTION" | |
| description = "Strong potential for top-quartile performance" | |
| elif score >= 0.50: | |
| verdict = "π‘ MODERATE" | |
| description = "Decent potential, consider for diversification" | |
| else: | |
| verdict = "π΄ LOW CONVICTION" | |
| description = "Below recommended threshold" | |
| result = f""" | |
| ### ML Predicted Score: {score_pct} | |
| **{verdict}** | |
| {description} | |
| --- | |
| **Input Parameters:** | |
| - Strategy: {strategy} | |
| - Fund Size: ${fund_size}M | |
| - Vintage Year: {vintage_year} | |
| - Interest Rate: {interest_rate}% | |
| - P/E Ratio: {pe_ratio} | |
| """ | |
| return result | |
| except Exception as e: | |
| return f"β Error: {str(e)}\n\nPlease ensure all model files are present in the 'models/' directory." | |
| def create_strategy_chart(): | |
| """Create strategy allocation pie chart""" | |
| df = load_index_data() | |
| strategy_alloc = df.groupby('strategy')['index_weight'].sum().reset_index() | |
| strategy_alloc['percentage'] = strategy_alloc['index_weight'] * 100 | |
| fig = px.pie( | |
| strategy_alloc, | |
| values='percentage', | |
| names='strategy', | |
| title='Strategy Allocation', | |
| color_discrete_sequence=px.colors.qualitative.Set3 | |
| ) | |
| fig.update_traces(textposition='inside', textinfo='percent+label') | |
| return fig | |
| def create_vintage_chart(): | |
| """Create vintage year distribution chart""" | |
| df = load_index_data() | |
| vintage_alloc = df.groupby('vintage_year')['index_weight'].sum().reset_index() | |
| vintage_alloc['percentage'] = vintage_alloc['index_weight'] * 100 | |
| fig = px.bar( | |
| vintage_alloc, | |
| x='vintage_year', | |
| y='percentage', | |
| title='Vintage Year Distribution', | |
| labels={'vintage_year': 'Vintage Year', 'percentage': 'Allocation (%)'}, | |
| color='percentage', | |
| color_continuous_scale='Blues' | |
| ) | |
| return fig | |
| def get_summary_stats(): | |
| """Get index summary statistics""" | |
| df = load_index_data() | |
| total_funds = len(df) | |
| weighted_avg_score = (df['predicted_score'] * df['index_weight']).sum() | |
| top_10_concentration = df.nlargest(10, 'index_weight')['index_weight'].sum() | |
| total_aum = df['fund_size_mil'].sum() | |
| stats_text = f""" | |
| ### π Index Summary Statistics | |
| - **Total Funds**: {total_funds} | |
| - **Weighted Average Score**: {weighted_avg_score:.1%} | |
| - **Top 10 Concentration**: {top_10_concentration:.1%} | |
| - **Total AUM**: ${total_aum:,.0f}M | |
| - **Average Fund Size**: ${df['fund_size_mil'].mean():,.0f}M | |
| - **Score Range**: {df['predicted_score'].min():.1%} - {df['predicted_score'].max():.1%} | |
| """ | |
| return stats_text | |
| def get_top_holdings(): | |
| """Get top 10 holdings table""" | |
| df = load_index_data() | |
| top_10 = df.head(10)[['rank', 'fund_manager', 'strategy', 'fund_size_mil', 'predicted_score', 'index_weight']].copy() | |
| top_10['predicted_score'] = top_10['predicted_score'].apply(lambda x: f"{x:.1%}") | |
| top_10['index_weight'] = top_10['index_weight'].apply(lambda x: f"{x:.2%}") | |
| top_10['fund_size_mil'] = top_10['fund_size_mil'].apply(lambda x: f"${x:,.0f}M") | |
| top_10.columns = ['Rank', 'Fund Manager', 'Strategy', 'Fund Size', 'ML Score', 'Index Weight'] | |
| return top_10 | |
| # Create Gradio interface | |
| with gr.Blocks(title="Alpha-Index 100", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π Alpha-Index 100") | |
| gr.Markdown("*ML-Powered Private Markets Investment Index*") | |
| gr.Markdown("---") | |
| with gr.Tabs(): | |
| # TAB 1: Alpha-Index 100 Dashboard | |
| with gr.Tab("π Alpha-Index 100"): | |
| gr.Markdown("## The Index: Top 100 Funds Ranked by ML Score") | |
| gr.Markdown("This is your investable index product - funds are weighted by predicted performance.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| summary_stats = gr.Markdown(get_summary_stats()) | |
| with gr.Column(scale=1): | |
| strategy_chart = gr.Plot(create_strategy_chart()) | |
| gr.Markdown("### π Top 10 Holdings") | |
| top_holdings_table = gr.Dataframe( | |
| value=get_top_holdings(), | |
| interactive=False | |
| ) | |
| vintage_chart = gr.Plot(create_vintage_chart()) | |
| gr.Markdown("### π Complete Index (Top 100)") | |
| full_index = load_index_data()[['rank', 'fund_manager', 'strategy', 'vintage_year', 'fund_size_mil', 'predicted_score', 'index_weight']].copy() | |
| full_index['predicted_score'] = full_index['predicted_score'].apply(lambda x: f"{x:.1%}") | |
| full_index['index_weight'] = full_index['index_weight'].apply(lambda x: f"{x:.2%}") | |
| full_index['fund_size_mil'] = full_index['fund_size_mil'].apply(lambda x: f"${x:,.0f}M") | |
| full_index.columns = ['Rank', 'Fund Manager', 'Strategy', 'Vintage', 'Fund Size', 'ML Score', 'Index Weight'] | |
| full_index_table = gr.Dataframe( | |
| value=full_index, | |
| interactive=False, | |
| wrap=True | |
| ) | |
| # TAB 2: Interactive Fund Scorer | |
| with gr.Tab("π― Interactive Fund Scorer"): | |
| gr.Markdown("## Build & Score Your Own Fund") | |
| gr.Markdown("Configure parameters and see how your hypothetical fund would score.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### π Fund Parameters") | |
| strategy = gr.Dropdown( | |
| choices=["Buyout", "Venture Capital", "Growth Equity", "Private Credit", "Real Estate"], | |
| value="Buyout", | |
| label="Investment Strategy" | |
| ) | |
| fund_size = gr.Slider( | |
| minimum=50, | |
| maximum=10000, | |
| value=500, | |
| step=50, | |
| label="Fund Size ($M)" | |
| ) | |
| vintage_year = gr.Slider( | |
| minimum=2010, | |
| maximum=2023, | |
| value=2020, | |
| step=1, | |
| label="Vintage Year" | |
| ) | |
| interest_rate = gr.Slider( | |
| minimum=0.0, | |
| maximum=6.0, | |
| value=2.5, | |
| step=0.1, | |
| label="Interest Rate at Launch (%)" | |
| ) | |
| pe_ratio = gr.Slider( | |
| minimum=10.0, | |
| maximum=30.0, | |
| value=19.0, | |
| step=0.5, | |
| label="Market P/E Ratio at Launch" | |
| ) | |
| submit_btn = gr.Button("π Score This Fund", variant="primary", size="lg") | |
| with gr.Column(): | |
| gr.Markdown("### π Prediction Results") | |
| output = gr.Markdown("Click 'Score This Fund' to run ML prediction.\n\n**Note:** Models load on first click (~5-10 seconds), then predictions are instant.") | |
| # Connect button to function | |
| submit_btn.click( | |
| fn=predict_fund, | |
| inputs=[strategy, fund_size, vintage_year, interest_rate, pe_ratio], | |
| outputs=output | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("*Built with Gradio β’ Using synthetic data for demonstration*") | |
| if __name__ == "__main__": | |
| demo.launch() | |