01RAI's picture
Upgrade to gradio 5.4.0 + explicit launch config for HF Spaces
f2c3b11 verified
"""
PredictLM Playground — Gradio demo for predictlm-mini-13m.
Upload a CSV → pick target column → get predictions on a held-out split.
Single-model fast path (no Duo, no TTT). For the full 0.751/0.609 recipe,
see `pip install predictlm`.
"""
import os
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score
from predictlm import PredictLM
# Load model once at startup. First request after cold-start downloads the
# 54MB Mini checkpoint; cached for subsequent requests.
print("Loading predictlm-mini-13m (single-model mode for speed)...")
MODEL = PredictLM.from_pretrained(
"zerooneresearch/predictlm-mini-13m",
device="cpu",
auto_duo=False,
)
print("Model loaded.")
MAX_ROWS = 1100
MAX_FEATURES = 128
EXAMPLE_DATASETS = {
"Breast cancer (classification, 569 rows × 30 features)": "examples/breast_cancer.csv",
"California housing (regression, 1000 rows × 8 features)": "examples/california_housing.csv",
}
def load_csv(file) -> tuple:
if file is None:
return (
None,
gr.Dropdown(choices=[], value=None, interactive=False),
"_Upload a CSV (or pick an example below) to start._",
)
try:
df = pd.read_csv(file)
except Exception as e:
return None, gr.Dropdown(choices=[], value=None), f"❌ Could not read CSV: {e}"
if len(df) > MAX_ROWS:
df = df.sample(n=MAX_ROWS, random_state=42).reset_index(drop=True)
sample_note = f" (sampled to {MAX_ROWS} rows for speed)"
else:
sample_note = ""
cols = list(df.columns)
return (
df,
gr.Dropdown(choices=cols, value=cols[-1], interactive=True),
f"✅ Loaded {len(df)} rows × {len(cols)} columns{sample_note}. "
f"Default target is the last column — change it if needed.",
)
def load_example(name):
path = EXAMPLE_DATASETS.get(name)
if not path or not os.path.exists(path):
return None, gr.Dropdown(choices=[], value=None), f"Example file not found: {path}"
return load_csv(path)
def run_prediction(df, target_col, test_frac):
if df is None or target_col is None:
return "_Load a CSV first._", None
if target_col not in df.columns:
return f"❌ Target column **{target_col}** not in CSV.", None
df = df.dropna(subset=[target_col]).copy()
n = len(df)
if n < 20:
return f"❌ Need at least 20 rows after dropping NA target. Got {n}.", None
feature_cols = [c for c in df.columns if c != target_col]
numeric_feats = [c for c in feature_cols if pd.api.types.is_numeric_dtype(df[c])]
if not numeric_feats:
return (
"❌ No numeric feature columns found. PredictLM v1 expects numeric features "
"(encode categoricals first).",
None,
)
if len(numeric_feats) > MAX_FEATURES:
return (
f"❌ PredictLM v1 supports ≤{MAX_FEATURES} features. CSV has "
f"{len(numeric_feats)} numeric features.",
None,
)
test_n = max(5, int(n * test_frac))
train_n = n - test_n
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
train_df = df.iloc[:train_n]
test_df = df.iloc[train_n:]
X_train = train_df[numeric_feats].values.astype(np.float32)
y_train = train_df[target_col].values
X_test = test_df[numeric_feats].values.astype(np.float32)
y_test = test_df[target_col].values
try:
preds = MODEL.fit(X_train, y_train).predict(X_test)
except Exception as e:
return f"❌ Prediction error: {e}", None
result = pd.DataFrame(
{
"actual": y_test,
"predicted": preds,
}
)
if pd.api.types.is_numeric_dtype(df[target_col]) and df[target_col].nunique() > 10:
# Regression
r2 = r2_score(y_test, preds)
mae = mean_absolute_error(y_test, preds)
result["error"] = (result["actual"] - result["predicted"]).round(4)
summary = (
f"**Regression** · {len(numeric_feats)} features · n_train = {train_n} · "
f"n_test = {test_n}\n\n"
f"R² = **{r2:.3f}** · MAE = **{mae:.3f}**\n\n"
f"_Single-model fast path. Full Duo + TTT recipe averages 0.609 R² across_ "
f"_25 OpenML regression datasets._"
)
else:
# Classification
acc = accuracy_score(y_test, preds)
result["correct"] = result["actual"] == result["predicted"]
n_classes = pd.Series(y_test).nunique()
summary = (
f"**Classification** · {len(numeric_feats)} features · {n_classes} classes · "
f"n_train = {train_n} · n_test = {test_n}\n\n"
f"Accuracy = **{acc:.3f}**\n\n"
f"_Single-model fast path. Full Duo + TTT recipe averages 0.751 accuracy_ "
f"_across 25 OpenML classification datasets._"
)
return summary, result.head(50)
HEADER = """
# PredictLM Playground
Upload a CSV, pick a target column, and run **predictlm-mini-13m** on it — a 13M-parameter open-weight tabular foundation model, Apache-2.0.
> **Note**: This Space runs single-model fast-path (no Duo + TTT) for snappy responses. Local Python with `pip install predictlm` gets the full 0.751 / 0.609 OpenML numbers.
"""
FOOTER = """
---
[Model card](https://huggingface.co/zerooneresearch/predictlm-mini-13m) · [PyPI `pip install predictlm`](https://pypi.org/project/predictlm/) · [Source on GitHub](https://github.com/matej-01RAI/predictlm-mcp) · [Org](https://huggingface.co/zerooneresearch)
PredictLM is built by [Zero One Research](https://huggingface.co/zerooneresearch), an independent AI lab in Bratislava, EU.
"""
with gr.Blocks(title="PredictLM Playground", theme=gr.themes.Soft()) as demo:
gr.Markdown(HEADER)
df_state = gr.State(None)
with gr.Row():
with gr.Column(scale=1):
file = gr.File(label="Upload CSV", file_types=[".csv"])
example = gr.Dropdown(
choices=list(EXAMPLE_DATASETS.keys()),
label="…or pick a built-in example",
value=None,
)
target = gr.Dropdown(label="Target column", choices=[], interactive=False)
test_frac = gr.Slider(
0.1, 0.5, value=0.2, step=0.05,
label="Test fraction (held-out for evaluation)",
)
run = gr.Button("Predict", variant="primary", size="lg")
status = gr.Markdown("_Upload a CSV (or pick an example below) to start._")
with gr.Column(scale=2):
summary = gr.Markdown(
"_Predictions will appear here._"
)
results = gr.Dataframe(
label="Predictions (first 50 rows of test split)",
interactive=False,
wrap=True,
)
file.change(load_csv, inputs=[file], outputs=[df_state, target, status])
example.change(load_example, inputs=[example], outputs=[df_state, target, status])
run.click(
run_prediction,
inputs=[df_state, target, test_frac],
outputs=[summary, results],
)
gr.Markdown(FOOTER)
if __name__ == "__main__":
demo.queue().launch(
server_name="0.0.0.0",
server_port=int(os.environ.get("PORT", 7860)),
show_error=True,
)