Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| from predict import predict_single | |
| st.set_page_config(page_title="Multiple Disease Prediction", layout="centered") | |
| st.title("Multiple Disease Prediction") | |
| st.write("Choose a disease model, enter the patient data, and get prediction probabilities.") | |
| MODEL_DIR = "models" | |
| AVAILABLE_MODELS = {} | |
| # load available model files | |
| for fname in os.listdir(MODEL_DIR) if os.path.exists(MODEL_DIR) else []: | |
| if fname.endswith(".joblib") and fname.startswith("model_"): | |
| disease = fname.replace("model_", "").replace(".joblib", "") | |
| AVAILABLE_MODELS[disease] = os.path.join(MODEL_DIR, fname) | |
| if not AVAILABLE_MODELS: | |
| st.warning("No saved models found in the models/ folder. Train models with train_models.py first.") | |
| st.stop() | |
| disease = st.selectbox("Select disease model", options=list(AVAILABLE_MODELS.keys())) | |
| model_path = AVAILABLE_MODELS[disease] | |
| model = joblib.load(model_path) | |
| st.write(f"Loaded model for **{disease}**") | |
| # Detect expected features from the model: if pipeline has a ColumnTransformer step we can't easily extract names, | |
| # so we'll require a CSV template or allow user to upload a CSV with sample row. | |
| st.write("You can either upload a CSV row with the features, or fill the form below.") | |
| uploaded = st.file_uploader("Upload a CSV file with a single row (columns = feature names)", type=["csv"]) | |
| input_df = None | |
| if uploaded: | |
| input_df = pd.read_csv(uploaded) | |
| if input_df.shape[0] > 1: | |
| st.info("Only the first row will be used.") | |
| input_df = input_df.iloc[[0]] | |
| else: | |
| # Attempt to build simple numeric input form by reading training feature names from pipeline | |
| # Heuristic: If pipeline has a 'pre' step (ColumnTransformer), we cannot extract original feature names reliably. | |
| st.info("No CSV uploaded — provide feature values manually.") | |
| # Let user supply arbitrary key-value pairs; pipeline will ignore unknown columns and fail for missing ones. | |
| st.write("Enter features as name:value pairs (comma separated). Example: age:45, sex:Male, serum_creatinine:1.2") | |
| kv = st.text_input("Feature inputs (name:value pairs)", placeholder="age:45, sex:Male") | |
| if kv: | |
| try: | |
| pairs = [p.strip() for p in kv.split(",") if p.strip()] | |
| data = {} | |
| for p in pairs: | |
| k, v = p.split(":", 1) | |
| k = k.strip() | |
| v = v.strip() | |
| # try convert to numeric | |
| if v.replace('.','',1).lstrip('-').isdigit(): | |
| if '.' in v: | |
| data[k] = float(v) | |
| else: | |
| data[k] = int(v) | |
| else: | |
| data[k] = v | |
| input_df = pd.DataFrame([data]) | |
| except Exception as e: | |
| st.error(f"Couldn't parse input: {e}") | |
| if input_df is not None: | |
| st.write("Input provided:") | |
| st.dataframe(input_df.T) | |
| if st.button("Predict"): | |
| try: | |
| # Convert input_df to dict of first row | |
| sample = input_df.iloc[0].to_dict() | |
| result = predict_single(model, sample) | |
| st.subheader("Prediction") | |
| st.write(result["prediction"]) | |
| if result["probability"] is not None: | |
| st.subheader("Probabilities") | |
| # if multiple columns, show as dataframe | |
| proba = np.array(result["probability"]) | |
| if proba.ndim == 2: | |
| # class probabilities | |
| classes = model.named_steps['clf'].classes_ if hasattr(model.named_steps['clf'], 'classes_') else list(range(proba.shape[1])) | |
| dfp = pd.DataFrame(proba, columns=[f"prob_{c}" for c in classes]) | |
| st.dataframe(dfp.T) | |
| else: | |
| st.write(proba) | |
| except Exception as e: | |
| st.error(f"Prediction failed: {e}") | |