| """ |
| PredictLM Playground — Gradio demo for predictlm-mini-13m. |
| |
| Upload a CSV → pick target column → get predictions on a held-out split. |
| Single-model fast path (no Duo, no TTT). For the full 0.751/0.609 recipe, |
| see `pip install predictlm`. |
| """ |
|
|
| import os |
|
|
| import gradio as gr |
| import numpy as np |
| import pandas as pd |
| from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score |
|
|
| from predictlm import PredictLM |
|
|
| |
| |
| print("Loading predictlm-mini-13m (single-model mode for speed)...") |
| MODEL = PredictLM.from_pretrained( |
| "zerooneresearch/predictlm-mini-13m", |
| device="cpu", |
| auto_duo=False, |
| ) |
| print("Model loaded.") |
|
|
|
|
| MAX_ROWS = 1100 |
| MAX_FEATURES = 128 |
| EXAMPLE_DATASETS = { |
| "Breast cancer (classification, 569 rows × 30 features)": "examples/breast_cancer.csv", |
| "California housing (regression, 1000 rows × 8 features)": "examples/california_housing.csv", |
| } |
|
|
|
|
| def load_csv(file) -> tuple: |
| if file is None: |
| return ( |
| None, |
| gr.Dropdown(choices=[], value=None, interactive=False), |
| "_Upload a CSV (or pick an example below) to start._", |
| ) |
| try: |
| df = pd.read_csv(file) |
| except Exception as e: |
| return None, gr.Dropdown(choices=[], value=None), f"❌ Could not read CSV: {e}" |
|
|
| if len(df) > MAX_ROWS: |
| df = df.sample(n=MAX_ROWS, random_state=42).reset_index(drop=True) |
| sample_note = f" (sampled to {MAX_ROWS} rows for speed)" |
| else: |
| sample_note = "" |
|
|
| cols = list(df.columns) |
| return ( |
| df, |
| gr.Dropdown(choices=cols, value=cols[-1], interactive=True), |
| f"✅ Loaded {len(df)} rows × {len(cols)} columns{sample_note}. " |
| f"Default target is the last column — change it if needed.", |
| ) |
|
|
|
|
| def load_example(name): |
| path = EXAMPLE_DATASETS.get(name) |
| if not path or not os.path.exists(path): |
| return None, gr.Dropdown(choices=[], value=None), f"Example file not found: {path}" |
| return load_csv(path) |
|
|
|
|
| def run_prediction(df, target_col, test_frac): |
| if df is None or target_col is None: |
| return "_Load a CSV first._", None |
| if target_col not in df.columns: |
| return f"❌ Target column **{target_col}** not in CSV.", None |
|
|
| df = df.dropna(subset=[target_col]).copy() |
| n = len(df) |
| if n < 20: |
| return f"❌ Need at least 20 rows after dropping NA target. Got {n}.", None |
|
|
| feature_cols = [c for c in df.columns if c != target_col] |
| numeric_feats = [c for c in feature_cols if pd.api.types.is_numeric_dtype(df[c])] |
|
|
| if not numeric_feats: |
| return ( |
| "❌ No numeric feature columns found. PredictLM v1 expects numeric features " |
| "(encode categoricals first).", |
| None, |
| ) |
|
|
| if len(numeric_feats) > MAX_FEATURES: |
| return ( |
| f"❌ PredictLM v1 supports ≤{MAX_FEATURES} features. CSV has " |
| f"{len(numeric_feats)} numeric features.", |
| None, |
| ) |
|
|
| test_n = max(5, int(n * test_frac)) |
| train_n = n - test_n |
|
|
| df = df.sample(frac=1, random_state=42).reset_index(drop=True) |
| train_df = df.iloc[:train_n] |
| test_df = df.iloc[train_n:] |
|
|
| X_train = train_df[numeric_feats].values.astype(np.float32) |
| y_train = train_df[target_col].values |
| X_test = test_df[numeric_feats].values.astype(np.float32) |
| y_test = test_df[target_col].values |
|
|
| try: |
| preds = MODEL.fit(X_train, y_train).predict(X_test) |
| except Exception as e: |
| return f"❌ Prediction error: {e}", None |
|
|
| result = pd.DataFrame( |
| { |
| "actual": y_test, |
| "predicted": preds, |
| } |
| ) |
|
|
| if pd.api.types.is_numeric_dtype(df[target_col]) and df[target_col].nunique() > 10: |
| |
| r2 = r2_score(y_test, preds) |
| mae = mean_absolute_error(y_test, preds) |
| result["error"] = (result["actual"] - result["predicted"]).round(4) |
| summary = ( |
| f"**Regression** · {len(numeric_feats)} features · n_train = {train_n} · " |
| f"n_test = {test_n}\n\n" |
| f"R² = **{r2:.3f}** · MAE = **{mae:.3f}**\n\n" |
| f"_Single-model fast path. Full Duo + TTT recipe averages 0.609 R² across_ " |
| f"_25 OpenML regression datasets._" |
| ) |
| else: |
| |
| acc = accuracy_score(y_test, preds) |
| result["correct"] = result["actual"] == result["predicted"] |
| n_classes = pd.Series(y_test).nunique() |
| summary = ( |
| f"**Classification** · {len(numeric_feats)} features · {n_classes} classes · " |
| f"n_train = {train_n} · n_test = {test_n}\n\n" |
| f"Accuracy = **{acc:.3f}**\n\n" |
| f"_Single-model fast path. Full Duo + TTT recipe averages 0.751 accuracy_ " |
| f"_across 25 OpenML classification datasets._" |
| ) |
|
|
| return summary, result.head(50) |
|
|
|
|
| HEADER = """ |
| # PredictLM Playground |
| |
| Upload a CSV, pick a target column, and run **predictlm-mini-13m** on it — a 13M-parameter open-weight tabular foundation model, Apache-2.0. |
| |
| > **Note**: This Space runs single-model fast-path (no Duo + TTT) for snappy responses. Local Python with `pip install predictlm` gets the full 0.751 / 0.609 OpenML numbers. |
| """ |
|
|
| FOOTER = """ |
| --- |
| |
| [Model card](https://huggingface.co/zerooneresearch/predictlm-mini-13m) · [PyPI `pip install predictlm`](https://pypi.org/project/predictlm/) · [Source on GitHub](https://github.com/matej-01RAI/predictlm-mcp) · [Org](https://huggingface.co/zerooneresearch) |
| |
| PredictLM is built by [Zero One Research](https://huggingface.co/zerooneresearch), an independent AI lab in Bratislava, EU. |
| """ |
|
|
|
|
| with gr.Blocks(title="PredictLM Playground", theme=gr.themes.Soft()) as demo: |
| gr.Markdown(HEADER) |
|
|
| df_state = gr.State(None) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| file = gr.File(label="Upload CSV", file_types=[".csv"]) |
| example = gr.Dropdown( |
| choices=list(EXAMPLE_DATASETS.keys()), |
| label="…or pick a built-in example", |
| value=None, |
| ) |
| target = gr.Dropdown(label="Target column", choices=[], interactive=False) |
| test_frac = gr.Slider( |
| 0.1, 0.5, value=0.2, step=0.05, |
| label="Test fraction (held-out for evaluation)", |
| ) |
| run = gr.Button("Predict", variant="primary", size="lg") |
| status = gr.Markdown("_Upload a CSV (or pick an example below) to start._") |
|
|
| with gr.Column(scale=2): |
| summary = gr.Markdown( |
| "_Predictions will appear here._" |
| ) |
| results = gr.Dataframe( |
| label="Predictions (first 50 rows of test split)", |
| interactive=False, |
| wrap=True, |
| ) |
|
|
| file.change(load_csv, inputs=[file], outputs=[df_state, target, status]) |
| example.change(load_example, inputs=[example], outputs=[df_state, target, status]) |
| run.click( |
| run_prediction, |
| inputs=[df_state, target, test_frac], |
| outputs=[summary, results], |
| ) |
|
|
| gr.Markdown(FOOTER) |
|
|
|
|
| if __name__ == "__main__": |
| demo.queue().launch( |
| server_name="0.0.0.0", |
| server_port=int(os.environ.get("PORT", 7860)), |
| show_error=True, |
| ) |
|
|