import os  # For filesystem operations
import shutil  # For directory cleanup
import zipfile  # For extracting model archives
import pathlib  # For path manipulations
import pandas  # For tabular data handling
import gradio  # For interactive UI
import huggingface_hub  # For downloading model assets
import autogluon.tabular  # For loading and running AutoGluon predictors
import sklearn # Import sklearn to check version

# Settings
MODEL_REPO_ID = "jennifee/classical_automl_model" # Updated to the correct model repo
ZIP_FILENAME  = "autogluon_predictor_dir.zip" # Assuming the zip filename is the same
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native_sleep" # Changed extract directory name

# Feature column names and target column names based on the provided data structure
FEATURE_COLS = [
    "phone_hours",
    "computer_hours",
    "device_count",
    "sleep_quality",
    "sleep_time",
    "sleep_hours",
]
TARGET_COL = "use_before_bed" # Assuming this is the target based on previous context

# Encoding for sleep quality (assuming a categorical mapping is needed for the model)
# This mapping is an example and may need adjustment based on the actual values in the dataset
SLEEP_QUALITY_MAP = {"Poor": 0, "Fair": 1, "Good": 2, "Excellent": 3}

# Encoding for outcome (assuming binary classification for use_before_bed)
OUTCOME_LABELS = {
    0: "Does not use device before bed",
    1: "Uses device before bed",
}

# Download & load the native predictor
def _prepare_predictor_dir() -> str:
    CACHE_DIR.mkdir(parents=True, exist_ok=True)
    local_zip = huggingface_hub.hf_hub_download(
        repo_id=MODEL_REPO_ID,
        filename=ZIP_FILENAME,
        repo_type="model",
        local_dir=str(CACHE_DIR),
        local_dir_use_symlinks=False,
    )
    if EXTRACT_DIR.exists():
        shutil.rmtree(EXTRACT_DIR)
    EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(local_zip, "r") as zf:
        zf.extractall(str(EXTRACT_DIR))
    contents = list(EXTRACT_DIR.iterdir())
    predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR
    return str(predictor_root)

PREDICTOR_DIR = _prepare_predictor_dir()
PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False)

# A mapping utility to make it easier to encode the variables
def _human_label(c):
    try:
        ci = int(c)
        if ci in OUTCOME_LABELS:
            return OUTCOME_LABELS[ci]
    except Exception:
        pass
    if c in OUTCOME_LABELS:
        return OUTCOME_LABELS[c]
    return str(c)

# This functions takes all of our features, encodes this accordingly, and performs a predictions
def do_predict(phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours):
    print("Received inputs:")
    print(f"  phone_hours: {phone_hours}")
    print(f"  computer_hours: {computer_hours}")
    print(f"  device_count: {device_count}")
    print(f"  sleep_quality_label: {sleep_quality_label}")
    print(f"  sleep_time: {sleep_time}")
    print(f"  sleep_hours: {sleep_hours}")
    print(f"  sklearn version: {sklearn.__version__}") # Print sklearn version


    try:
        # Encode categorical features
        sleep_quality_code = SLEEP_QUALITY_MAP[sleep_quality_label]

        row = {
            FEATURE_COLS[0]: float(phone_hours),
            FEATURE_COLS[1]: float(computer_hours),
            FEATURE_COLS[2]: int(device_count),
            FEATURE_COLS[3]: sleep_quality_code,
            FEATURE_COLS[4]: int(sleep_time),
            FEATURE_COLS[5]: float(sleep_hours),
        }
        X = pandas.DataFrame([row], columns=FEATURE_COLS)

        print("Input DataFrame (X):")
        print(X)

        pred_series = PREDICTOR.predict(X)
        raw_pred = pred_series.iloc[0]

        print("Raw prediction (pred_series):")
        print(pred_series)

        try:
            proba = PREDICTOR.predict_proba(X)
            if isinstance(proba, pandas.Series):
                proba = proba.to_frame().T
            print("Prediction probabilities (proba):")
            print(proba)
        except Exception as e:
            proba = None
            print(f"Error getting prediction probabilities: {e}")


        pred_label = _human_label(raw_pred)

        proba_dict = None
        if proba is not None:
            row0 = proba.iloc[0]
            tmp = {}
            for cls, val in row0.items():
                key = _human_label(cls)
                tmp[key] = float(val) + float(tmp.get(key, 0.0))
            proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True))
        print("Probability dictionary (proba_dict):")
        print(proba_dict)


        df_out = pandas.DataFrame([{
            "Predicted outcome": pred_label,
            "Confidence (%)": round((proba_dict.get(pred_label, 1.0) if proba_dict else 1.0) * 100, 2),
        }])

        md = f"**Prediction:** {pred_label}"
        if proba_dict:
            md += f"  \n**Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%"
        print("Markdown output (md):")
        print(md)

        return proba_dict

    except Exception as e:
        print(f"An error occurred during prediction: {e}")
        import traceback
        traceback.print_exc()
        return None # Return None or an empty dictionary in case of an error

# Representative examples (these will need to be updated based on the new model's features)
# These examples are placeholders and should be replaced with actual examples from the dataset if available
EXAMPLES = [
    [2.0, 3.0, 3, "Good", 2200, 8.0],
    [5.0, 6.0, 5, "Fair", 100, 6.0],
    [1.0, 1.0, 1, "Excellent", 2300, 9.0],

]

# Gradio UI for the sleep habits model
with gradio.Blocks() as demo:
    # Provide an introduction
    gradio.Markdown("# Device Use Before Sleep Predictor")
    gradio.Markdown("""
    This app predicts whether a student uses their device before sleep based on their device usage and sleeping habits.
    """)

    with gradio.Row():
        phone_hours = gradio.Slider(0, 10, step=0.1, value=2.0, label=FEATURE_COLS[0])
        computer_hours = gradio.Slider(0, 10, step=0.1, value=3.0, label=FEATURE_COLS[1])
        device_count = gradio.Number(value=3, precision=0, label=FEATURE_COLS[2])

    with gradio.Row():
        sleep_quality_label = gradio.Radio(choices=list(SLEEP_QUALITY_MAP.keys()), value="Good", label=FEATURE_COLS[3])
        sleep_time = gradio.Number(value=2200, precision=0, label=FEATURE_COLS[4])
        sleep_hours = gradio.Slider(0, 12, step=0.1, value=8.0, label=FEATURE_COLS[5])


    proba_pretty = gradio.Label(num_top_classes=2, label="Class probabilities") # Assuming binary classification

    inputs = [phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours]
    for comp in inputs:
        comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty])

    gradio.Examples(
        examples=EXAMPLES,
        inputs=inputs,
        label="Representative examples",
        examples_per_page=3,
        cache_examples=False,
    )

if __name__ == "__main__":
    demo.launch()