Spaces:

Waniss
/

Project

No application file

App Files Files Community

Project / pages /prediction.py

Waniss

Upload 17 files

e1c192a verified 5 months ago

raw

history blame contribute delete

6.45 kB

	import os
	import joblib
	import numpy as np
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix

	import dash
	from dash import html, dcc, Input, Output, State
	import dash_bootstrap_components as dbc

	dash.register_page(__name__, path="/prediction", name="Prediction")

	# ----------------------------
	# Paths
	# ----------------------------
	BASE_DIR = os.path.dirname(__file__)
	MODEL_PATH = os.path.join(BASE_DIR, "saved_models", "logreg_breastcancer_reduced.pkl")
	TRAIN_PATH = os.path.join(BASE_DIR, "saved_models", "X_train_y_train.csv")

	# ----------------------------
	# Load model and training data
	# ----------------------------
	if os.path.exists(MODEL_PATH):
	model = joblib.load(MODEL_PATH)
	else:
	raise FileNotFoundError(f"Model file not found at {MODEL_PATH}")

	if os.path.exists(TRAIN_PATH):
	train_data = pd.read_csv(TRAIN_PATH)
	else:
	raise FileNotFoundError(f"Training data CSV not found at {TRAIN_PATH}")

	# ----------------------------
	# Features used in model
	# ----------------------------
	features = [
	'texture error', 'area error', 'smoothness error', 'concavity error',
	'symmetry error', 'fractal dimension error', 'worst concavity'
	]

	feature_labels = [
	"Texture Error", "Area Error", "Smoothness Error", "Concavity Error",
	"Symmetry Error", "Fractal Dimension Error", "Worst Concavity"
	]

	X_train = train_data[features]
	y_train = train_data['target']

	# ----------------------------
	# Precompute performance plots
	# ----------------------------
	y_proba_train = model.predict_proba(X_train)[:, 1]
	y_pred_train = model.predict(X_train)

	# ROC Curve
	fpr, tpr, _ = roc_curve(y_train, y_proba_train)
	auc_score = roc_auc_score(y_train, y_proba_train)
	fig_roc = go.Figure()
	fig_roc.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', line=dict(color="#e61227", width=3), name=f"AUC={auc_score:.3f}"))
	fig_roc.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(color="gray", dash="dash")))
	fig_roc.update_layout(title="ROC Curve", margin=dict(l=20, r=20, t=40, b=20))

	# Feature importance
	clf = model.named_steps[list(model.named_steps.keys())[-1]]
	if hasattr(clf, "coef_"):
	coefs = clf.coef_.ravel()
	fig_feat = px.bar(x=features, y=coefs, color=coefs, color_continuous_scale="RdPu", title="Feature Importance")
	fig_feat.update_layout(margin=dict(l=20, r=20, t=40, b=20))
	else:
	fig_feat = go.Figure()

	# Confusion Matrix
	cm = confusion_matrix(y_train, y_pred_train)
	fig_cm = go.Figure(data=go.Heatmap(z=cm, x=["Pred Malignant (0)", "Pred Benign (1)"], y=["Actual Malignant (0)", "Actual Benign (1)"], colorscale="RdPu", showscale=False))
	fig_cm.update_layout(title="Confusion Matrix", margin=dict(l=20, r=20, t=40, b=20))

	# ----------------------------
	# Layout
	# ----------------------------
	layout = html.Div([
	html.H2("Breast Cancer Prediction", style={"margin-bottom": "30px"}),

	dbc.Card(
	dbc.CardBody([
	dbc.Row([
	dbc.Col([
	html.Label(label),
	dbc.Input(id=f"input-{feat}", type="number", step=0.0001, value=0)
	], width=3, className="mb-2")
	for feat, label in zip(features, feature_labels)
	], className="mb-3"),
	dbc.Button("Run Diagnostic Prediction", id="predict-btn", color="light", className="mb-3"),
	html.Div(id="prediction-output")
	]),
	style={
	"background": "linear-gradient(135deg, #ff77b4, #e61227)",
	"color": "#fff",
	"box-shadow": "0 4px 15px rgba(0,0,0,0.2)",
	"padding": "20px",
	"border-radius": "10px",
	"margin-bottom": "40px"
	}
	),

	dbc.Card(
	dbc.CardBody([
	dbc.Row([
	dbc.Col(dcc.Graph(figure=fig_roc), md=4),
	dbc.Col(dcc.Graph(figure=fig_feat), md=4),
	dbc.Col(dcc.Graph(figure=fig_cm), md=4),
	])
	])
	)
	], style={"margin": "20px 3%"})

	# ----------------------------
	# Callback for user prediction
	# ----------------------------
	@dash.callback(
	Output("prediction-output", "children"),
	Input("predict-btn", "n_clicks"),
	[State(f"input-{feat}", "value") for feat in features]
	)
	def predict_user(n_clicks, *vals):
	if n_clicks is None:
	return ""

	# Sanitize inputs (The Bug Fix)
	cleaned_vals = [float(v) if v is not None else 0.0 for v in vals]

	try:
	x_input = pd.DataFrame([cleaned_vals], columns=features)

	# Get binary prediction (0 or 1)
	y_pred = model.predict(x_input)[0]

	# Get probabilities for both classes
	# probas[0] is for class 0 (Malignant), probas[1] is for class 1 (Benign)
	probas = model.predict_proba(x_input)[0]
	prob_malignant = probas[0]
	prob_benign = probas[1]

	# Determine label based on Jupyter logic (0=Malignant, 1=Benign)
	if y_pred == 0:
	result_text = "MALIGNANT"
	result_color = "#FFD700" # Warning Gold
	badge_color = "danger"
	else:
	result_text = "BENIGN"
	result_color = "#00FF7F" # Spring Green
	badge_color = "success"

	return html.Div([
	html.Hr(style={"borderTop": "1px solid white"}),
	html.H3([
	f"Model Classification: ",
	dbc.Badge(result_text, color=badge_color, className="ms-2")
	], style={"fontWeight": "bold"}),

	dbc.Row([
	dbc.Col([
	html.P(f"Probability of Malignant (Class 0): {prob_malignant:.2%}"),
	dbc.Progress(value=prob_malignant*100, color="dark", style={"height": "10px"})
	], md=6),
	dbc.Col([
	html.P(f"Probability of Benign (Class 1): {prob_benign:.2%}"),
	dbc.Progress(value=prob_benign*100, color="info", style={"height": "10px"})
	], md=6),
	], className="mt-3")
	], style={"color": "white"})

	except Exception as e:
	return html.Div(f"Prediction Error: {e}", style={"color": "white", "background": "red", "padding": "10px"})