import pandas as pd import numpy as np import zipfile import os import gradio as gr from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, classification_report from imblearn.over_sampling import SMOTE # Extract ZIP file zip_file_path = "LUNG_CANCER.zip" extract_folder = "./LUNG_CANCER_DATA" with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: zip_ref.extractall(extract_folder) # Load dataset df = pd.read_csv(os.path.join(extract_folder, "survey lung cancer.csv")) # Preprocessing df.rename(columns=lambda x: x.strip().replace(" ", "_"), inplace=True) df['GENDER'] = df['GENDER'].map({'M': 0, 'F': 1}) df['LUNG_CANCER'] = df['LUNG_CANCER'].map({'YES': 1, 'NO': 0}) # Splitting dataset X = df.drop(columns=['LUNG_CANCER']) y = df['LUNG_CANCER'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) # Handling class imbalance smote = SMOTE(random_state=42) X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train) # Scaling features scaler = StandardScaler() X_train_resampled = scaler.fit_transform(X_train_resampled) X_test = scaler.transform(X_test) # Model training model = RandomForestClassifier(n_estimators=200, random_state=42) model.fit(X_train_resampled, y_train_resampled) # Model evaluation y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print(f"Model Accuracy: {accuracy:.2f}") print("Classification Report:\n", classification_report(y_test, y_pred)) # Gradio Prediction Function def predict_lung_cancer(*features): features = np.array(features).reshape(1, -1) features = scaler.transform(features) prediction = model.predict(features) return "Lung Cancer Detected" if prediction[0] == 1 else "No Lung Cancer" # Gradio Interface inputs = [ gr.Number(label="Gender (0: Male, 1: Female)"), gr.Number(label="Age"), gr.Number(label="Smoking"), gr.Number(label="Yellow Fingers"), gr.Number(label="Anxiety"), gr.Number(label="Peer Pressure"), gr.Number(label="Chronic Disease"), gr.Number(label="Fatigue"), gr.Number(label="Allergy"), gr.Number(label="Wheezing"), gr.Number(label="Alcohol Consuming"), gr.Number(label="Coughing"), gr.Number(label="Shortness of Breath"), gr.Number(label="Swallowing Difficulty"), gr.Number(label="Chest Pain") ] demo = gr.Interface(fn=predict_lung_cancer, inputs=inputs, outputs="text", title="Lung Cancer Prediction") demo.launch()