import pandas as pd
import numpy as np
import zipfile
import os
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE

# Extract ZIP file
zip_file_path = "LUNG_CANCER.zip"
extract_folder = "./LUNG_CANCER_DATA"
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

# Load dataset
df = pd.read_csv(os.path.join(extract_folder, "survey lung cancer.csv"))

# Preprocessing
df.rename(columns=lambda x: x.strip().replace(" ", "_"), inplace=True)
df['GENDER'] = df['GENDER'].map({'M': 0, 'F': 1})
df['LUNG_CANCER'] = df['LUNG_CANCER'].map({'YES': 1, 'NO': 0})

# Splitting dataset
X = df.drop(columns=['LUNG_CANCER'])
y = df['LUNG_CANCER']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Handling class imbalance
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Scaling features
scaler = StandardScaler()
X_train_resampled = scaler.fit_transform(X_train_resampled)
X_test = scaler.transform(X_test)

# Model training
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train_resampled, y_train_resampled)

# Model evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

# Gradio Prediction Function
def predict_lung_cancer(*features):
    features = np.array(features).reshape(1, -1)
    features = scaler.transform(features)
    prediction = model.predict(features)
    return "Lung Cancer Detected" if prediction[0] == 1 else "No Lung Cancer"

# Gradio Interface
inputs = [
    gr.Number(label="Gender (0: Male, 1: Female)"),
    gr.Number(label="Age"),
    gr.Number(label="Smoking"),
    gr.Number(label="Yellow Fingers"),
    gr.Number(label="Anxiety"),
    gr.Number(label="Peer Pressure"),
    gr.Number(label="Chronic Disease"),
    gr.Number(label="Fatigue"),
    gr.Number(label="Allergy"),
    gr.Number(label="Wheezing"),
    gr.Number(label="Alcohol Consuming"),
    gr.Number(label="Coughing"),
    gr.Number(label="Shortness of Breath"),
    gr.Number(label="Swallowing Difficulty"),
    gr.Number(label="Chest Pain")
]

demo = gr.Interface(fn=predict_lung_cancer, inputs=inputs, outputs="text", title="Lung Cancer Prediction")
demo.launch()