LUNG-CANCER / app.py
Snigs98's picture
Update app.py
0141125 verified
import pandas as pd
import numpy as np
import zipfile
import os
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
# Extract ZIP file
zip_file_path = "LUNG_CANCER.zip"
extract_folder = "./LUNG_CANCER_DATA"
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
zip_ref.extractall(extract_folder)
# Load dataset
df = pd.read_csv(os.path.join(extract_folder, "survey lung cancer.csv"))
# Preprocessing
df.rename(columns=lambda x: x.strip().replace(" ", "_"), inplace=True)
df['GENDER'] = df['GENDER'].map({'M': 0, 'F': 1})
df['LUNG_CANCER'] = df['LUNG_CANCER'].map({'YES': 1, 'NO': 0})
# Splitting dataset
X = df.drop(columns=['LUNG_CANCER'])
y = df['LUNG_CANCER']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# Handling class imbalance
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
# Scaling features
scaler = StandardScaler()
X_train_resampled = scaler.fit_transform(X_train_resampled)
X_test = scaler.transform(X_test)
# Model training
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train_resampled, y_train_resampled)
# Model evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test, y_pred))
# Gradio Prediction Function
def predict_lung_cancer(*features):
features = np.array(features).reshape(1, -1)
features = scaler.transform(features)
prediction = model.predict(features)
return "Lung Cancer Detected" if prediction[0] == 1 else "No Lung Cancer"
# Gradio Interface
inputs = [
gr.Number(label="Gender (0: Male, 1: Female)"),
gr.Number(label="Age"),
gr.Number(label="Smoking"),
gr.Number(label="Yellow Fingers"),
gr.Number(label="Anxiety"),
gr.Number(label="Peer Pressure"),
gr.Number(label="Chronic Disease"),
gr.Number(label="Fatigue"),
gr.Number(label="Allergy"),
gr.Number(label="Wheezing"),
gr.Number(label="Alcohol Consuming"),
gr.Number(label="Coughing"),
gr.Number(label="Shortness of Breath"),
gr.Number(label="Swallowing Difficulty"),
gr.Number(label="Chest Pain")
]
demo = gr.Interface(fn=predict_lung_cancer, inputs=inputs, outputs="text", title="Lung Cancer Prediction")
demo.launch()