zaid002's picture
Upload app (4).py
3816533 verified
import streamlit as st
import joblib
import pandas as pd
import numpy as np
import os
from predict import predict_single
st.set_page_config(page_title="Multiple Disease Prediction", layout="centered")
st.title("Multiple Disease Prediction")
st.write("Choose a disease model, enter the patient data, and get prediction probabilities.")
MODEL_DIR = "models"
AVAILABLE_MODELS = {}
# load available model files
for fname in os.listdir(MODEL_DIR) if os.path.exists(MODEL_DIR) else []:
if fname.endswith(".joblib") and fname.startswith("model_"):
disease = fname.replace("model_", "").replace(".joblib", "")
AVAILABLE_MODELS[disease] = os.path.join(MODEL_DIR, fname)
if not AVAILABLE_MODELS:
st.warning("No saved models found in the models/ folder. Train models with train_models.py first.")
st.stop()
disease = st.selectbox("Select disease model", options=list(AVAILABLE_MODELS.keys()))
model_path = AVAILABLE_MODELS[disease]
model = joblib.load(model_path)
st.write(f"Loaded model for **{disease}**")
# Detect expected features from the model: if pipeline has a ColumnTransformer step we can't easily extract names,
# so we'll require a CSV template or allow user to upload a CSV with sample row.
st.write("You can either upload a CSV row with the features, or fill the form below.")
uploaded = st.file_uploader("Upload a CSV file with a single row (columns = feature names)", type=["csv"])
input_df = None
if uploaded:
input_df = pd.read_csv(uploaded)
if input_df.shape[0] > 1:
st.info("Only the first row will be used.")
input_df = input_df.iloc[[0]]
else:
# Attempt to build simple numeric input form by reading training feature names from pipeline
# Heuristic: If pipeline has a 'pre' step (ColumnTransformer), we cannot extract original feature names reliably.
st.info("No CSV uploaded — provide feature values manually.")
# Let user supply arbitrary key-value pairs; pipeline will ignore unknown columns and fail for missing ones.
st.write("Enter features as name:value pairs (comma separated). Example: age:45, sex:Male, serum_creatinine:1.2")
kv = st.text_input("Feature inputs (name:value pairs)", placeholder="age:45, sex:Male")
if kv:
try:
pairs = [p.strip() for p in kv.split(",") if p.strip()]
data = {}
for p in pairs:
k, v = p.split(":", 1)
k = k.strip()
v = v.strip()
# try convert to numeric
if v.replace('.','',1).lstrip('-').isdigit():
if '.' in v:
data[k] = float(v)
else:
data[k] = int(v)
else:
data[k] = v
input_df = pd.DataFrame([data])
except Exception as e:
st.error(f"Couldn't parse input: {e}")
if input_df is not None:
st.write("Input provided:")
st.dataframe(input_df.T)
if st.button("Predict"):
try:
# Convert input_df to dict of first row
sample = input_df.iloc[0].to_dict()
result = predict_single(model, sample)
st.subheader("Prediction")
st.write(result["prediction"])
if result["probability"] is not None:
st.subheader("Probabilities")
# if multiple columns, show as dataframe
proba = np.array(result["probability"])
if proba.ndim == 2:
# class probabilities
classes = model.named_steps['clf'].classes_ if hasattr(model.named_steps['clf'], 'classes_') else list(range(proba.shape[1]))
dfp = pd.DataFrame(proba, columns=[f"prob_{c}" for c in classes])
st.dataframe(dfp.T)
else:
st.write(proba)
except Exception as e:
st.error(f"Prediction failed: {e}")