Spaces:

zaid002
/

multiple-disease-prediction

Sleeping

App Files Files Community

multiple-disease-prediction / src /app (4).py

zaid002

Upload app (4).py

3816533 verified 7 months ago

raw

history blame contribute delete

3.95 kB

	import streamlit as st
	import joblib
	import pandas as pd
	import numpy as np
	import os
	from predict import predict_single

	st.set_page_config(page_title="Multiple Disease Prediction", layout="centered")

	st.title("Multiple Disease Prediction")
	st.write("Choose a disease model, enter the patient data, and get prediction probabilities.")

	MODEL_DIR = "models"
	AVAILABLE_MODELS = {}
	# load available model files
	for fname in os.listdir(MODEL_DIR) if os.path.exists(MODEL_DIR) else []:
	if fname.endswith(".joblib") and fname.startswith("model_"):
	disease = fname.replace("model_", "").replace(".joblib", "")
	AVAILABLE_MODELS[disease] = os.path.join(MODEL_DIR, fname)

	if not AVAILABLE_MODELS:
	st.warning("No saved models found in the models/ folder. Train models with train_models.py first.")
	st.stop()

	disease = st.selectbox("Select disease model", options=list(AVAILABLE_MODELS.keys()))
	model_path = AVAILABLE_MODELS[disease]
	model = joblib.load(model_path)

	st.write(f"Loaded model for {disease}")

	# Detect expected features from the model: if pipeline has a ColumnTransformer step we can't easily extract names,
	# so we'll require a CSV template or allow user to upload a CSV with sample row.
	st.write("You can either upload a CSV row with the features, or fill the form below.")

	uploaded = st.file_uploader("Upload a CSV file with a single row (columns = feature names)", type=["csv"])
	input_df = None
	if uploaded:
	input_df = pd.read_csv(uploaded)
	if input_df.shape[0] > 1:
	st.info("Only the first row will be used.")
	input_df = input_df.iloc[[0]]
	else:
	# Attempt to build simple numeric input form by reading training feature names from pipeline
	# Heuristic: If pipeline has a 'pre' step (ColumnTransformer), we cannot extract original feature names reliably.
	st.info("No CSV uploaded — provide feature values manually.")
	# Let user supply arbitrary key-value pairs; pipeline will ignore unknown columns and fail for missing ones.
	st.write("Enter features as name:value pairs (comma separated). Example: age:45, sex:Male, serum_creatinine:1.2")
	kv = st.text_input("Feature inputs (name:value pairs)", placeholder="age:45, sex:Male")
	if kv:
	try:
	pairs = [p.strip() for p in kv.split(",") if p.strip()]
	data = {}
	for p in pairs:
	k, v = p.split(":", 1)
	k = k.strip()
	v = v.strip()
	# try convert to numeric
	if v.replace('.','',1).lstrip('-').isdigit():
	if '.' in v:
	data[k] = float(v)
	else:
	data[k] = int(v)
	else:
	data[k] = v
	input_df = pd.DataFrame([data])
	except Exception as e:
	st.error(f"Couldn't parse input: {e}")

	if input_df is not None:
	st.write("Input provided:")
	st.dataframe(input_df.T)

	if st.button("Predict"):
	try:
	# Convert input_df to dict of first row
	sample = input_df.iloc[0].to_dict()
	result = predict_single(model, sample)
	st.subheader("Prediction")
	st.write(result["prediction"])
	if result["probability"] is not None:
	st.subheader("Probabilities")
	# if multiple columns, show as dataframe
	proba = np.array(result["probability"])
	if proba.ndim == 2:
	# class probabilities
	classes = model.named_steps['clf'].classes_ if hasattr(model.named_steps['clf'], 'classes_') else list(range(proba.shape[1]))
	dfp = pd.DataFrame(proba, columns=[f"prob_{c}" for c in classes])
	st.dataframe(dfp.T)
	else:
	st.write(proba)
	except Exception as e:
	st.error(f"Prediction failed: {e}")