from huggingface_hub import hf_hub_download
import os
import json
import time
import joblib
import lightgbm
import numpy as np
import pandas as pd
import gradio as gr
from datetime import datetime, timedelta
from huggingface_hub import upload_file
from sklearn.ensemble import RandomForestRegressor

HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN is None:
    raise ValueError("HF_TOKEN not set. Add it to Space Secrets.")

DATASET_REPO = "GFT-Poland/synthetic-pricing-dataset"
MODEL_REPO = "GFT-Poland/basic-pricing-model"

DATASET_FILE = "synthetic_pricing_dataset.csv"
MODEL_FILE = "model.joblib"

FEATURES = [
    "age",
    "monthly_income",
    "living_costs",
    "existing_liabilities",
    "credit_limits",
    "ltv",
    "interest_rate",
    "loan_term_years",
    "installment_type"
]

TARGET = "max_loan_amount"

# =============================
# GENERATE DATASET
# =============================
def generate_dataset():
    n = 1_000_000
    seed = 42
    import numpy as np
    import pandas as pd

    np.random.seed(seed)
    rows = []

    def installment_to_loan(m, rate, years, itype):
        r = rate / 12
        n = years * 12

        if m <= 0:
            return 0.0

        if itype == 0:
            return m * ((1 - (1 + r) ** -n) / r)

        if itype == 1:
            return m * n * 0.82

        if itype == 2:
            r = (rate + 0.05) / 12
            return m * ((1 - (1 + r) ** -n) / r)

    for _ in range(n):

        age = np.random.randint(21, 65)

        monthly_income = np.random.uniform(2000, 50000)

        living_costs = (
            900
            + 0.35 * (monthly_income ** 0.85)
            + np.random.uniform(-300, 300)
        )

        existing_liabilities = monthly_income * np.random.uniform(0.05, 0.30)
        credit_limits = monthly_income * np.random.uniform(3, 9)

        ltv = np.random.uniform(0.4, 0.95)
        interest_rate = np.random.uniform(0.045, 0.095)

        loan_term_years = np.random.randint(
            10, min(35, 75 - age) + 1
        )

        installment_type = np.random.choice([0, 1, 2], p=[0.5, 0.3, 0.2])

        disposable_income = (
            monthly_income - living_costs - existing_liabilities
        )

        if disposable_income <= 0:
            max_loan = 0
        else:
            dsr_base = 0.28 + 0.18 * np.tanh(monthly_income / 12000)

            dsr_adj = 0
            if ltv > 0.85:
                dsr_adj -= 0.10
            if interest_rate > 0.07:
                dsr_adj -= 0.07
            if installment_type == 2:
                dsr_adj -= 0.05
            if credit_limits > monthly_income * 6:
                dsr_adj -= 0.05

            dsr_limit = np.clip(dsr_base + dsr_adj, 0.25, 0.55)

            max_installment = disposable_income * dsr_limit

            loan_from_installment = installment_to_loan(
                max_installment,
                interest_rate,
                loan_term_years,
                installment_type
            )

            max_loan = min(
                loan_from_installment,
                monthly_income * 240,
                credit_limits * 12
            )

        rows.append({
            "age": age,
            "monthly_income": round(monthly_income, 2),
            "living_costs": round(living_costs, 2),
            "existing_liabilities": round(existing_liabilities, 2),
            "credit_limits": round(credit_limits, 2),
            "ltv": round(ltv, 2),
            "interest_rate": round(interest_rate, 4),
            "loan_term_years": loan_term_years,
            "installment_type": installment_type,
            "max_loan_amount": round(max_loan, 2),
        })
        
    df = pd.DataFrame(rows)
    df["update_ts"] = time.time()

    df.to_csv(DATASET_FILE, index=False)

    upload_file(
        path_or_fileobj=DATASET_FILE,
        path_in_repo=DATASET_FILE,
        repo_id=DATASET_REPO,
        repo_type="dataset",
        token=HF_TOKEN,
    )

    return f"Creditworthiness dataset generated ({len(df)} rows)"


# =============================
# HELPER: DOWNLOAD IF NEWER
# =============================
def download_if_newer(repo_id, filename, local_path, repo_type="dataset"):
    try:
        hf_file = hf_hub_download(repo_id=repo_id, filename=filename, repo_type=repo_type, token=HF_TOKEN)
        repo_mtime = os.path.getmtime(hf_file)
    except Exception:
        return f"No remote file found, using local {local_path}"

    if os.path.exists(local_path):
        local_mtime = os.path.getmtime(local_path)
        if repo_mtime > local_mtime:
            import shutil
            shutil.copy2(hf_file, local_path)
            return f"Updated local {local_path} from repo."
        else:
            return f"Local {local_path} is up-to-date."
    else:
        import shutil
        shutil.copy2(hf_file, local_path)
        return f"Downloaded {local_path} from repo."
    

# =============================
# TRAIN MODEL
# =============================
def train_model():
    download_if_newer(
        DATASET_REPO,
        DATASET_FILE,
        DATASET_FILE,
        repo_type="dataset"
    )

    df = pd.read_csv(DATASET_FILE)

    X = df[FEATURES]
    y = df[TARGET]

    from lightgbm import LGBMRegressor

    model = LGBMRegressor(
        n_estimators=600,
        learning_rate=0.05,
        max_depth=7,
        num_leaves=40,
        subsample=0.85,
        colsample_bytree=0.85,
        monotone_constraints=[
            0,    # age
            +1,   # income
            -1,   # living costs
            -1,   # liabilities
            -1,   # credit limits
            -1,   # LTV
            -1,   # interest rate
            +1,   # loan term
            -1,   # installment type risk
        ],
        random_state=42,
        n_jobs=-1,
    )

    model.fit(X, y)

    joblib.dump(model, MODEL_FILE)

    upload_file(
        path_or_fileobj=MODEL_FILE,
        path_in_repo=MODEL_FILE,
        repo_id=MODEL_REPO,
        repo_type="model",
        token=HF_TOKEN,
    )

    return "Creditworthiness model trained and uploaded"


# =============================
# INVOKE MODEL
# =============================
def invoke_model(json_input):
    try:
        msg = download_if_newer(MODEL_REPO, MODEL_FILE, MODEL_FILE, repo_type="model")

        data = json.loads(json_input)
        model = joblib.load(MODEL_FILE)

        X = pd.DataFrame([data], columns=FEATURES)
        prediction = model.predict(X)[0]

        return {"max_loan_amount": round(float(prediction), 2), "info": msg}

    except Exception as e:
        return {"error": str(e)}


# =============================
# UI
# =============================
with gr.Blocks() as app:
    gr.Markdown("# HF Pricing Engine – MVP")

    with gr.Row():
        gen_btn = gr.Button("Generate dataset")
        train_btn = gr.Button("Train model")

    status = gr.Textbox(label="Status")
    gen_btn.click(generate_dataset, outputs=status)
    train_btn.click(train_model, outputs=status)

    gr.Markdown("## Invoke model, for intallment typ: 0 = fixed 1 = decreasing 2 = variable")
    json_input = gr.Textbox(
        label="Input JSON",
        lines=8,
        value=json.dumps({
          "age": 34,
          "monthly_income": 10000,
          "living_costs": 1000,
          "existing_liabilities": 1000,
          "credit_limits": 50000,
          "ltv": 0.8,
          "interest_rate": 0.065,
          "loan_term_years": 30,
          "installment_type": 0
    }, indent=2),
    )

    invoke_btn = gr.Button("Invoke")
    result = gr.JSON(label="Result")
    invoke_btn.click(invoke_model, inputs=json_input, outputs=result)

app.launch()