Update app.py
Browse files
app.py
CHANGED
|
@@ -3,24 +3,25 @@ import pandas as pd
|
|
| 3 |
import numpy as np
|
| 4 |
import pickle
|
| 5 |
|
| 6 |
-
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
final_model = pickle.load(
|
| 10 |
|
| 11 |
-
|
| 12 |
-
scaler = pickle.load(
|
| 13 |
|
| 14 |
-
|
| 15 |
-
label_encoder = pickle.load(
|
| 16 |
|
| 17 |
-
print("✓ Models, Scaler, and Label Encoder loaded successfully.")
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
#
|
| 21 |
-
#
|
| 22 |
-
|
| 23 |
-
|
| 24 |
'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean',
|
| 25 |
'smoothness_mean', 'compactness_mean', 'concavity_mean',
|
| 26 |
'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
|
|
@@ -32,66 +33,63 @@ original_feature_columns = [
|
|
| 32 |
'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst'
|
| 33 |
]
|
| 34 |
|
| 35 |
-
def predict_cancer( *args ):
|
| 36 |
-
"""
|
| 37 |
-
Prediction function for Gradio interface.
|
| 38 |
-
Takes 30 numerical inputs, preprocesses them, and returns diagnosis and confidence.
|
| 39 |
-
"""
|
| 40 |
-
if len(args) != len(original_feature_columns):
|
| 41 |
-
raise ValueError(f"Expected {len(original_feature_columns)} inputs, but got {len(args)}")
|
| 42 |
-
|
| 43 |
-
# Create a DataFrame from the inputs
|
| 44 |
-
input_data = pd.DataFrame([args], columns=original_feature_columns)
|
| 45 |
-
|
| 46 |
-
# Apply scaling
|
| 47 |
-
input_scaled = scaler.transform(input_data)
|
| 48 |
-
input_scaled_df = pd.DataFrame(input_scaled, columns=original_feature_columns)
|
| 49 |
-
|
| 50 |
-
# Apply feature engineering (same as done during training)
|
| 51 |
-
if 'radius_mean' in input_scaled_df.columns and 'area_mean' in input_scaled_df.columns:
|
| 52 |
-
input_scaled_df['radius_area_ratio'] = input_scaled_df['radius_mean'] / (input_scaled_df['area_mean'] + 1e-6)
|
| 53 |
-
if 'perimeter_mean' in input_scaled_df.columns and 'area_mean' in input_scaled_df.columns:
|
| 54 |
-
input_scaled_df['perimeter_area_ratio'] = input_scaled_df['perimeter_mean'] / (input_scaled_df['area_mean'] + 1e-6)
|
| 55 |
-
if 'concavity_mean' in input_scaled_df.columns and 'concave points_mean' in input_scaled_df.columns:
|
| 56 |
-
input_scaled_df['concavity_points_product'] = input_scaled_df['concavity_mean'] * input_scaled_df['concave points_mean']
|
| 57 |
-
|
| 58 |
-
# Make prediction
|
| 59 |
-
prediction_proba = final_model.predict_proba(input_scaled_df)[0]
|
| 60 |
-
prediction_class_idx = np.argmax(prediction_proba)
|
| 61 |
-
prediction_class = label_encoder.inverse_transform([prediction_class_idx])[0]
|
| 62 |
-
|
| 63 |
-
confidence = prediction_proba[prediction_class_idx]
|
| 64 |
-
|
| 65 |
-
# Map output to more readable format
|
| 66 |
-
diagnosis_map = {'M': 'Malignant (Cancer)', 'B': 'Benign (Non-cancerous)'}
|
| 67 |
-
predicted_diagnosis = diagnosis_map.get(prediction_class, prediction_class)
|
| 68 |
-
|
| 69 |
-
return predicted_diagnosis, f"{confidence*100:.2f}%"
|
| 70 |
-
|
| 71 |
-
# Create Gradio input components
|
| 72 |
-
inputs = []
|
| 73 |
-
for col in original_feature_columns:
|
| 74 |
-
# Using gr.Number for all numerical features
|
| 75 |
-
inputs.append(gr.Number(label=col, value=0.0)) # Default value can be adjusted
|
| 76 |
-
|
| 77 |
-
# Example values from a benign case (e.g., from df.head() with diagnosis B)
|
| 78 |
-
# Using averages for a generic starting point, adjust as needed
|
| 79 |
-
example_inputs = [
|
| 80 |
-
12.45, 15.7 , 82.57, 477.1, 0.1045, 0.08947, 0.04991, 0.02111, 0.1716, 0.06337,
|
| 81 |
-
0.3344, 1.157 , 2.508 , 32.43, 0.007624, 0.01802, 0.01993, 0.008453, 0.01538, 0.003463,
|
| 82 |
-
13.78, 20.8 , 91.18, 592.7, 0.146 , 0.2158 , 0.1672 , 0.07899, 0.2823, 0.07526
|
| 83 |
-
]
|
| 84 |
|
| 85 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
interface = gr.Interface(
|
| 87 |
fn=predict_cancer,
|
| 88 |
inputs=inputs,
|
| 89 |
-
outputs=[
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
| 93 |
)
|
| 94 |
|
| 95 |
-
#
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import pickle
|
| 5 |
|
| 6 |
+
# ==============================
|
| 7 |
+
# Load Saved Model Files
|
| 8 |
+
# ==============================
|
| 9 |
|
| 10 |
+
with open("final_model.pkl", "rb") as f:
|
| 11 |
+
final_model = pickle.load(f)
|
| 12 |
|
| 13 |
+
with open("scaler.pkl", "rb") as f:
|
| 14 |
+
scaler = pickle.load(f)
|
| 15 |
|
| 16 |
+
with open("label_encoder.pkl", "rb") as f:
|
| 17 |
+
label_encoder = pickle.load(f)
|
| 18 |
|
|
|
|
| 19 |
|
| 20 |
+
# ==============================
|
| 21 |
+
# Feature Columns (Same as Training)
|
| 22 |
+
# ==============================
|
| 23 |
+
|
| 24 |
+
feature_columns = [
|
| 25 |
'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean',
|
| 26 |
'smoothness_mean', 'compactness_mean', 'concavity_mean',
|
| 27 |
'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
|
|
|
|
| 33 |
'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst'
|
| 34 |
]
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
# ==============================
|
| 38 |
+
# Prediction Function
|
| 39 |
+
# ==============================
|
| 40 |
+
|
| 41 |
+
def predict_cancer(*inputs):
|
| 42 |
+
|
| 43 |
+
# Convert input into dataframe
|
| 44 |
+
input_df = pd.DataFrame([inputs], columns=feature_columns)
|
| 45 |
+
|
| 46 |
+
# Scale data
|
| 47 |
+
scaled_data = scaler.transform(input_df)
|
| 48 |
+
scaled_df = pd.DataFrame(scaled_data, columns=feature_columns)
|
| 49 |
+
|
| 50 |
+
# Feature Engineering (must match training)
|
| 51 |
+
scaled_df['radius_area_ratio'] = scaled_df['radius_mean'] / (scaled_df['area_mean'] + 1e-6)
|
| 52 |
+
scaled_df['perimeter_area_ratio'] = scaled_df['perimeter_mean'] / (scaled_df['area_mean'] + 1e-6)
|
| 53 |
+
scaled_df['concavity_points_product'] = (
|
| 54 |
+
scaled_df['concavity_mean'] * scaled_df['concave points_mean']
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# Prediction
|
| 58 |
+
probabilities = final_model.predict_proba(scaled_df)[0]
|
| 59 |
+
class_index = np.argmax(probabilities)
|
| 60 |
+
predicted_label = label_encoder.inverse_transform([class_index])[0]
|
| 61 |
+
confidence = probabilities[class_index] * 100
|
| 62 |
+
|
| 63 |
+
diagnosis_map = {
|
| 64 |
+
"M": "Malignant (Cancer)",
|
| 65 |
+
"B": "Benign (Non-cancerous)"
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
result = diagnosis_map.get(predicted_label, predicted_label)
|
| 69 |
+
|
| 70 |
+
return result, f"{confidence:.2f}%"
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# ==============================
|
| 74 |
+
# Create Gradio UI
|
| 75 |
+
# ==============================
|
| 76 |
+
|
| 77 |
+
inputs = [gr.Number(label=col, value=0.0) for col in feature_columns]
|
| 78 |
+
|
| 79 |
interface = gr.Interface(
|
| 80 |
fn=predict_cancer,
|
| 81 |
inputs=inputs,
|
| 82 |
+
outputs=[
|
| 83 |
+
gr.Textbox(label="Predicted Diagnosis"),
|
| 84 |
+
gr.Textbox(label="Confidence")
|
| 85 |
+
],
|
| 86 |
+
title="Breast Cancer Prediction App",
|
| 87 |
+
description="Enter the 30 medical features to predict whether the tumor is Benign or Malignant."
|
| 88 |
)
|
| 89 |
|
| 90 |
+
# ==============================
|
| 91 |
+
# Launch App
|
| 92 |
+
# ==============================
|
| 93 |
+
|
| 94 |
+
if __name__ == "__main__":
|
| 95 |
+
interface.launch()
|