Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import zipfile | |
| import os | |
| import gradio as gr | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import LabelEncoder, StandardScaler | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.metrics import accuracy_score, classification_report | |
| from imblearn.over_sampling import SMOTE | |
| # Extract ZIP file | |
| zip_file_path = "LUNG_CANCER.zip" | |
| extract_folder = "./LUNG_CANCER_DATA" | |
| with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: | |
| zip_ref.extractall(extract_folder) | |
| # Load dataset | |
| df = pd.read_csv(os.path.join(extract_folder, "survey lung cancer.csv")) | |
| # Preprocessing | |
| df.rename(columns=lambda x: x.strip().replace(" ", "_"), inplace=True) | |
| df['GENDER'] = df['GENDER'].map({'M': 0, 'F': 1}) | |
| df['LUNG_CANCER'] = df['LUNG_CANCER'].map({'YES': 1, 'NO': 0}) | |
| # Splitting dataset | |
| X = df.drop(columns=['LUNG_CANCER']) | |
| y = df['LUNG_CANCER'] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) | |
| # Handling class imbalance | |
| smote = SMOTE(random_state=42) | |
| X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train) | |
| # Scaling features | |
| scaler = StandardScaler() | |
| X_train_resampled = scaler.fit_transform(X_train_resampled) | |
| X_test = scaler.transform(X_test) | |
| # Model training | |
| model = RandomForestClassifier(n_estimators=200, random_state=42) | |
| model.fit(X_train_resampled, y_train_resampled) | |
| # Model evaluation | |
| y_pred = model.predict(X_test) | |
| accuracy = accuracy_score(y_test, y_pred) | |
| print(f"Model Accuracy: {accuracy:.2f}") | |
| print("Classification Report:\n", classification_report(y_test, y_pred)) | |
| # Gradio Prediction Function | |
| def predict_lung_cancer(*features): | |
| features = np.array(features).reshape(1, -1) | |
| features = scaler.transform(features) | |
| prediction = model.predict(features) | |
| return "Lung Cancer Detected" if prediction[0] == 1 else "No Lung Cancer" | |
| # Gradio Interface | |
| inputs = [ | |
| gr.Number(label="Gender (0: Male, 1: Female)"), | |
| gr.Number(label="Age"), | |
| gr.Number(label="Smoking"), | |
| gr.Number(label="Yellow Fingers"), | |
| gr.Number(label="Anxiety"), | |
| gr.Number(label="Peer Pressure"), | |
| gr.Number(label="Chronic Disease"), | |
| gr.Number(label="Fatigue"), | |
| gr.Number(label="Allergy"), | |
| gr.Number(label="Wheezing"), | |
| gr.Number(label="Alcohol Consuming"), | |
| gr.Number(label="Coughing"), | |
| gr.Number(label="Shortness of Breath"), | |
| gr.Number(label="Swallowing Difficulty"), | |
| gr.Number(label="Chest Pain") | |
| ] | |
| demo = gr.Interface(fn=predict_lung_cancer, inputs=inputs, outputs="text", title="Lung Cancer Prediction") | |
| demo.launch() | |