DataWizard9742 commited on
Commit
3fa6b6d
·
verified ·
1 Parent(s): 5ca15a4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+
6
+ # Install gradio if not already installed
7
+ try:
8
+ import gradio as gr
9
+ except ImportError:
10
+ !pip install gradio -qq
11
+ import gradio as gr
12
+
13
+ print("\n--- Creating Gradio Interface ---")
14
+
15
+ # Load the saved assets
16
+ try:
17
+ with open('final_model.pkl', 'rb') as file:
18
+ final_model = pickle.load(file)
19
+ with open('scaler.pkl', 'rb') as file:
20
+ scaler = pickle.load(file)
21
+ with open('label_encoder.pkl', 'rb') as file:
22
+ label_encoder = pickle.load(file)
23
+ print("✓ Models, Scaler, and Label Encoder loaded successfully.")
24
+ except Exception as e:
25
+ print(f"✗ Error loading saved assets: {e}")
26
+ print("Please ensure 'final_model.pkl', 'scaler.pkl', and 'label_encoder.pkl' are in the current directory.")
27
+ exit()
28
+
29
+ # Get original feature names from X (from previous execution, assuming it's available or re-derivable)
30
+ # If X is not in kernel state, we'd need to load the original dataset and derive column names
31
+ # For this example, assuming X.columns can be reconstructed or is available.
32
+ # Let's manually list the original 30 feature columns based on previous EDA/preprocessing steps
33
+ original_feature_columns = [
34
+ 'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean',
35
+ 'smoothness_mean', 'compactness_mean', 'concavity_mean',
36
+ 'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
37
+ 'radius_se', 'texture_se', 'perimeter_se', 'area_se',
38
+ 'smoothness_se', 'compactness_se', 'concavity_se',
39
+ 'concave points_se', 'symmetry_se', 'fractal_dimension_se',
40
+ 'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst',
41
+ 'smoothness_worst', 'compactness_worst', 'concavity_worst',
42
+ 'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst'
43
+ ]
44
+
45
+ def predict_cancer( *args ):
46
+ """
47
+ Prediction function for Gradio interface.
48
+ Takes 30 numerical inputs, preprocesses them, and returns diagnosis and confidence.
49
+ """
50
+ if len(args) != len(original_feature_columns):
51
+ raise ValueError(f"Expected {len(original_feature_columns)} inputs, but got {len(args)}")
52
+
53
+ # Create a DataFrame from the inputs
54
+ input_data = pd.DataFrame([args], columns=original_feature_columns)
55
+
56
+ # Apply scaling
57
+ input_scaled = scaler.transform(input_data)
58
+ input_scaled_df = pd.DataFrame(input_scaled, columns=original_feature_columns)
59
+
60
+ # Apply feature engineering (same as done during training)
61
+ if 'radius_mean' in input_scaled_df.columns and 'area_mean' in input_scaled_df.columns:
62
+ input_scaled_df['radius_area_ratio'] = input_scaled_df['radius_mean'] / (input_scaled_df['area_mean'] + 1e-6)
63
+ if 'perimeter_mean' in input_scaled_df.columns and 'area_mean' in input_scaled_df.columns:
64
+ input_scaled_df['perimeter_area_ratio'] = input_scaled_df['perimeter_mean'] / (input_scaled_df['area_mean'] + 1e-6)
65
+ if 'concavity_mean' in input_scaled_df.columns and 'concave points_mean' in input_scaled_df.columns:
66
+ input_scaled_df['concavity_points_product'] = input_scaled_df['concavity_mean'] * input_scaled_df['concave points_mean']
67
+
68
+ # Make prediction
69
+ prediction_proba = final_model.predict_proba(input_scaled_df)[0]
70
+ prediction_class_idx = np.argmax(prediction_proba)
71
+ prediction_class = label_encoder.inverse_transform([prediction_class_idx])[0]
72
+
73
+ confidence = prediction_proba[prediction_class_idx]
74
+
75
+ # Map output to more readable format
76
+ diagnosis_map = {'M': 'Malignant (Cancer)', 'B': 'Benign (Non-cancerous)'}
77
+ predicted_diagnosis = diagnosis_map.get(prediction_class, prediction_class)
78
+
79
+ return predicted_diagnosis, f"{confidence*100:.2f}%"
80
+
81
+ # Create Gradio input components
82
+ inputs = []
83
+ for col in original_feature_columns:
84
+ # Using gr.Number for all numerical features
85
+ inputs.append(gr.Number(label=col, value=0.0)) # Default value can be adjusted
86
+
87
+ # Example values from a benign case (e.g., from df.head() with diagnosis B)
88
+ # Using averages for a generic starting point, adjust as needed
89
+ example_inputs = [
90
+ 12.45, 15.7 , 82.57, 477.1, 0.1045, 0.08947, 0.04991, 0.02111, 0.1716, 0.06337,
91
+ 0.3344, 1.157 , 2.508 , 32.43, 0.007624, 0.01802, 0.01993, 0.008453, 0.01538, 0.003463,
92
+ 13.78, 20.8 , 91.18, 592.7, 0.146 , 0.2158 , 0.1672 , 0.07899, 0.2823, 0.07526
93
+ ]
94
+
95
+ # Create Gradio interface
96
+ interface = gr.Interface(
97
+ fn=predict_cancer,
98
+ inputs=inputs,
99
+ outputs=[gr.Textbox(label="Predicted Diagnosis"), gr.Textbox(label="Confidence")],
100
+ title="Breast Cancer Prediction",
101
+ description="Enter patient's cell nuclei measurements to predict breast cancer diagnosis.",
102
+ examples=[example_inputs]
103
+ )
104
+
105
+ # Launch the interface
106
+ interface.launch(debug=True)
107
+ print("\n--- Gradio interface launched ---")