# -*- coding: utf-8 -*-
# A1–A5 Upfront Embodied Carbon Calculator (Local LLM, module-specific options, strict unit checks)

from dataclasses import dataclass
from typing import List, Tuple, Dict
import pandas as pd
import gradio as gr

# ============ Local LLM (no external API) ============

LLM_MODEL_ID = "google/flan-t5-base"
try:
    from transformers import pipeline as hf_pipeline
    _llm = hf_pipeline("text2text-generation", model=LLM_MODEL_ID)
except Exception:
    _llm = None


# ============ Module-specific selectable options ============
MODULE_OPTIONS: Dict[str, List[str]] = {
    "A1": ["Concrete", "Steel", "Timber", "Recycled Aggregate"],
    "A2": ["Truck Transport", "Rail Transport", "Ship Transport"],
    "A3": ["CLT Fabrication", "Steel Fabrication", "Precast Concrete"],
    "A4": ["On-site Trucking", "Crane Lifting", "Haulage"],
    "A5": ["Site Electricity", "Construction Waste", "Diesel Fuel"],
}

SUPPORTED = set(MODULE_OPTIONS.keys())


# ============ Deterministic backend ============
@dataclass
class ItemRow:
    module: str      # A1–A5
    name: str        # must be one of MODULE_OPTIONS[module]
    quantity: float
    unit: str        # kg / t / ton_km / kWh  (aliases handled)
    ef: float
    ef_unit: str     # kgCO2e/kg, tCO2e/t, kgCO2e/ton_km, kgCO2e/kWh
    notes: str = ""


def _norm_unit(u: str) -> str:
    u = str(u).strip().lower().replace(" ", "")
    return {"t-km": "ton_km", "t·km": "ton_km", "ton-km": "ton_km"}.get(u, u)


def _normalize(quantity: float, unit: str, ef_unit: str) -> Tuple[float, str]:
    """
    Return (normalized_quantity, basis) where basis ∈ {per_kg, per_t, per_ton_km, per_kWh}.
    Enforce unit <-> ef_unit compatibility to avoid silent wrong math.
    """
    u = _norm_unit(unit)
    eu = str(ef_unit).strip().lower()

    if eu == "kgco2e/kg":
        if u == "kg":
            q = quantity
        elif u == "t":
            q = quantity * 1000.0
        else:
            raise ValueError(f"Incompatible unit '{unit}' for ef_unit '{ef_unit}'. Use kg or t.")
        return q, "per_kg"

    if eu in ("tco2e/t", "tco2e/ton"):
        if u == "t":
            q = quantity
        elif u == "kg":
            q = quantity / 1000.0
        else:
            raise ValueError(f"Incompatible unit '{unit}' for ef_unit '{ef_unit}'. Use kg or t.")
        return q, "per_t"

    if eu == "kgco2e/ton_km":
        if u != "ton_km":
            raise ValueError(
                f"Incompatible unit '{unit}' for ef_unit '{ef_unit}'. Use ton_km "
                "(aliases: t-km / t·km / ton-km)."
            )
        return quantity, "per_ton_km"

    if eu == "kgco2e/kwh":
        if u != "kwh":
            raise ValueError(f"Incompatible unit '{unit}' for ef_unit '{ef_unit}'. Use kWh.")
        return quantity, "per_kWh"

    raise ValueError(
        f"Unsupported ef_unit '{ef_unit}'. Allowed: kgCO2e/kg, tCO2e/t, kgCO2e/ton_km, kgCO2e/kWh."
    )


def _row_to_kgco2e(r: ItemRow) -> float:
    q, basis = _normalize(r.quantity, r.unit, r.ef_unit)
    if basis == "per_kg":
        return q * r.ef
    if basis == "per_t":
        return (q * r.ef) * 1000.0
    if basis in ("per_ton_km", "per_kWh"):
        return q * r.ef
    raise ValueError("Unknown basis")


def _validate_module_and_name(mod: str, name: str):
    if mod not in MODULE_OPTIONS:
        raise ValueError(f"Unsupported module '{mod}'. Use one of {sorted(MODULE_OPTIONS.keys())}.")
    if name not in MODULE_OPTIONS[mod]:
        raise ValueError(
            f"Invalid material/activity '{name}' for module {mod}. "
            f"Valid options: {MODULE_OPTIONS[mod]}"
        )


def _basic_sanity_check(df: pd.DataFrame):
    if (df["quantity"] < 0).any():
        raise ValueError("Quantity must be non-negative.")
    if (df["ef"] < 0).any():
        raise ValueError("Emission factor (ef) must be non-negative.")


def compute_A1A5_emissions(df: pd.DataFrame):
    """Compute per-row and per-module A1–A5 emissions (kgCO2e and tCO2e)."""
    required = ["module", "name", "quantity", "unit", "ef", "ef_unit", "notes"]
    miss = [c for c in required if c not in df.columns]
    if miss:
        raise ValueError(f"Missing columns: {miss}")

    _basic_sanity_check(df)

    out = []
    for _, row in df.iterrows():
        mod = str(row["module"]).strip()
        name = str(row["name"]).strip()
        _validate_module_and_name(mod, name)

        item = ItemRow(
            module=mod,
            name=name,
            quantity=float(row["quantity"]),
            unit=str(row["unit"]),
            ef=float(row["ef"]),
            ef_unit=str(row["ef_unit"]),
            notes=str(row.get("notes", "")),
        )
        kg = _row_to_kgco2e(item)
        rec = dict(row)
        rec["kgCO2e"] = kg
        rec["tCO2e"] = kg / 1000.0
        out.append(rec)

    rows = pd.DataFrame(out)
    summ = rows.groupby("module", as_index=False)[["kgCO2e", "tCO2e"]].sum()
    summ.loc["Total"] = ["Total", float(summ["kgCO2e"].sum()), float(summ["tCO2e"].sum())]
    return rows, summ


# ============ Examples / defaults ============
def default_a1a5_table() -> pd.DataFrame:
    return pd.DataFrame(
        [
            # A1 materials
            {"module": "A1", "name": "Concrete", "quantity": 10000, "unit": "kg", "ef": 0.12, "ef_unit": "kgCO2e/kg", "notes": "demo"},
            {"module": "A1", "name": "Steel", "quantity": 2000, "unit": "kg", "ef": 1.9, "ef_unit": "tCO2e/t", "notes": "demo"},
            # A3 fabrication energy
            {"module": "A3", "name": "CLT Fabrication", "quantity": 8, "unit": "t", "ef": 0.35, "ef_unit": "tCO2e/t", "notes": "demo"},
            # A4 transport
            {"module": "A4", "name": "On-site Trucking", "quantity": 800, "unit": "ton_km", "ef": 0.1, "ef_unit": "kgCO2e/ton_km", "notes": "demo"},
            # A5 site stage
            {"module": "A5", "name": "Construction Waste", "quantity": 2, "unit": "t", "ef": 0.25, "ef_unit": "tCO2e/t", "notes": "demo"},
        ]
    )


def example_case_1() -> pd.DataFrame:
    return default_a1a5_table()


def example_case_2() -> pd.DataFrame:
    return pd.DataFrame(
        [
            {"module": "A1", "name": "Timber", "quantity": 3, "unit": "t", "ef": 0.25, "ef_unit": "tCO2e/t", "notes": "example2"},
            {"module": "A1", "name": "Steel", "quantity": 200, "unit": "kg", "ef": 2.0, "ef_unit": "tCO2e/t", "notes": "example2"},
            {"module": "A3", "name": "Precast Concrete", "quantity": 6, "unit": "t", "ef": 0.35, "ef_unit": "tCO2e/t", "notes": "example2"},
            {"module": "A4", "name": "Haulage", "quantity": 600, "unit": "ton_km", "ef": 0.08, "ef_unit": "kgCO2e/ton_km", "notes": "example2"},
            {"module": "A5", "name": "Construction Waste", "quantity": 1.5, "unit": "t", "ef": 0.25, "ef_unit": "tCO2e/t", "notes": "example2"},
        ]
    )


def example_case_3() -> pd.DataFrame:
    return pd.DataFrame(
        [
            {"module": "A1", "name": "Concrete", "quantity": 15000, "unit": "kg", "ef": 0.12, "ef_unit": "kgCO2e/kg", "notes": "example3"},
            {"module": "A1", "name": "Steel", "quantity": 3000, "unit": "kg", "ef": 1.9, "ef_unit": "tCO2e/t", "notes": "example3"},
            {"module": "A4", "name": "On-site Trucking", "quantity": 1200, "unit": "ton_km", "ef": 0.1, "ef_unit": "kgCO2e/ton_km", "notes": "example3"},
            {"module": "A5", "name": "Site Electricity", "quantity": 1000, "unit": "kWh", "ef": 0.45, "ef_unit": "kgCO2e/kWh", "notes": "example3"},
            {"module": "A5", "name": "Construction Waste", "quantity": 3, "unit": "t", "ef": 0.25, "ef_unit": "tCO2e/t", "notes": "example3"},
        ]
    )


def empty_table() -> pd.DataFrame:
    return pd.DataFrame(columns=["module", "name", "quantity", "unit", "ef", "ef_unit", "notes"])


# ============ Suggestions ============
def rule_based_suggestions(rows_df: pd.DataFrame, summ_df: pd.DataFrame) -> str:
    tips: List[str] = []
    total = float(summ_df.loc[summ_df["module"] == "Total", "tCO2e"].values[0])
    a1a3 = float(summ_df[summ_df["module"].isin(["A1", "A2", "A3"])]["tCO2e"].sum())  # include A2
    a4 = float(summ_df[summ_df["module"] == "A4"]["tCO2e"].sum())
    a5 = float(summ_df[summ_df["module"] == "A5"]["tCO2e"].sum())
    if total > 0 and a1a3 / total > 0.6:
        tips.append("- A1–A3: lower-carbon EPDs, increase recycled content, or optimize mixes/suppliers.")
    if a4 > 0.5:
        tips.append("- A4: cut ton·km via nearer sourcing, mode shift (rail/ship), improve load factor.")
    if a5 > 0.3:
        tips.append("- A5: reduce site electricity/fuel; tighten waste rates; on-site segregation.")
    if not tips:
        tips.append("- General: verify quantities & EF sources; target top tCO2e items first.")
    return "Rule-based suggestions:\n" + "\n".join(tips)


def _summarize_for_prompt(rows_df: pd.DataFrame, summ_df: pd.DataFrame) -> str:
    rows_sorted = rows_df.sort_values("tCO2e", ascending=False)
    top = rows_sorted.head(5)[["module", "name", "tCO2e"]]
    lines = [f"- {r.module}: {r.name} — {r.tCO2e:.2f} tCO2e" for r in top.itertuples()]
    mod_lines = [f"- {r.module}: {r.tCO2e:.2f} tCO2e" for r in summ_df[summ_df["module"] != "Total"].itertuples()]
    total = float(summ_df.loc[summ_df["module"] == "Total", "tCO2e"].values[0])
    text = "Module totals:\n" + "\n".join(mod_lines)
    text += f"\n\nTotal: {total:.2f} tCO2e\n\n"
    text += "Top contributors:\n" + "\n".join(lines)
    return text


def llm_feedback(rows_df: pd.DataFrame, summ_df: pd.DataFrame) -> str:
    if _llm is None:
        return "(LLM unavailable — flan-t5-base not loaded)"

    prompt = (
        "You are a sustainability engineer.\n"
        "Below are EXAMPLES of good embodied carbon reduction actions:\n"
        "- A1: Use low-carbon or recycled concrete.\n"
        "- A1: Increase recycled steel content.\n"
        "- A3: Optimize manufacturing energy efficiency.\n"
        "- A4: Shorten transport distance or shift to rail.\n"
        "- A5: Reduce on-site electricity use and minimize waste.\n\n"
        "Now, based on the embodied carbon summary below, write 4–6 similar practical strategies.\n"
        "Avoid repeating the table values. Write in bullet form, each line ≤ 20 words.\n\n"
        + _summarize_for_prompt(rows_df, summ_df)
    )

    try:
        out = _llm(prompt, max_new_tokens=220)[0]["generated_text"]
        return "LLM suggestions (local):\n" + out.strip()
    except Exception as e:
        return f"(LLM error: {e})"


# ============ Gradio actions ============
def update_materials(module: str):
    return gr.update(choices=MODULE_OPTIONS.get(module, []), value=None)


def add_row(module, material, quantity, unit, ef, ef_unit, notes, df):
    if module not in MODULE_OPTIONS:
        return df, f"Error: Unsupported module '{module}'."
    if material not in MODULE_OPTIONS[module]:
        return df, f"Error: Invalid material '{material}' for {module}. Valid: {MODULE_OPTIONS[module]}"
    if quantity is None or ef is None:
        return df, "Error: Quantity and EF must be provided."
    new_row = {
        "module": module,
        "name": material,
        "quantity": float(quantity),
        "unit": unit,
        "ef": float(ef),
        "ef_unit": ef_unit,
        "notes": notes or "",
    }
    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    return df, "Row added."


def _compute_all(df_in):
    try:
        if df_in is None or df_in.empty:
            return pd.DataFrame(), pd.DataFrame(), 0.0, "", "No data. Please add rows.", ""
        rows_df, summ_df = compute_A1A5_emissions(df_in)
        total_t = float(summ_df.loc[summ_df["module"] == "Total", "tCO2e"].values[0])
        rule_sugg = rule_based_suggestions(rows_df, summ_df)
        llm_sugg = llm_feedback(rows_df, summ_df)
        status = f"LLM (local): {LLM_MODEL_ID}" if _llm else "LLM unavailable (distilgpt2 not loaded)"
        return rows_df, summ_df, total_t, status, llm_sugg, rule_sugg
    except Exception as e:
        return pd.DataFrame(), pd.DataFrame(), 0.0, "", "", f"Error: {e}"


# ============ UI ============
with gr.Blocks(title="A1–A5 Carbon Calculator (Local LLM)") as demo:
    gr.Markdown("# 🏗️ A1–A5 Upfront Embodied Carbon Calculator")
    gr.Markdown(
        "Fill rows using the form (module-specific options). Then **Add Row** → **Compute & Suggest**.\n\n"
        "**Units**: Materials/Waste — kg|t with kgCO2e/kg or tCO2e/t; "
        "Transport — ton_km with kgCO2e/ton_km; Site — kWh with kgCO2e/kWh."
    )

    with gr.Row():
        with gr.Column(scale=3):
            # Form inputs
            module_dd = gr.Dropdown(list(MODULE_OPTIONS.keys()), label="Module (A1–A5)", value="A1")
            material_dd = gr.Dropdown([], label="Material/Activity")
            module_dd.change(update_materials, inputs=module_dd, outputs=material_dd)

            quantity = gr.Number(value=1000, label="Quantity")
            unit = gr.Dropdown(choices=["kg", "t", "ton_km", "kWh"], value="kg", label="Unit")
            ef = gr.Number(value=0.12, label="Emission Factor (EF)")
            ef_unit = gr.Dropdown(
                choices=["kgCO2e/kg", "tCO2e/t", "kgCO2e/ton_km", "kgCO2e/kWh"],
                value="kgCO2e/kg",
                label="EF Unit"
            )
            notes = gr.Textbox(label="Notes", placeholder="optional")

            df_table = gr.Dataframe(
                headers=["module", "name", "quantity", "unit", "ef", "ef_unit", "notes"],
                value=empty_table(),
                interactive=False,
                label="Input Table",
            )

            add_btn = gr.Button("➕ Add Row")
            add_msg = gr.Markdown()
            add_btn.click(
                add_row,
                inputs=[module_dd, material_dd, quantity, unit, ef, ef_unit, notes, df_table],
                outputs=[df_table, add_msg],
            )

            with gr.Row():
                btn_ex1 = gr.Button("Load Example 1")
                btn_ex2 = gr.Button("Load Example 2")
                btn_ex3 = gr.Button("Load Example 3")
                btn_clr = gr.Button("Clear Table")

            btn_ex1.click(lambda: example_case_1(), inputs=None, outputs=df_table, concurrency_limit=1)
            btn_ex2.click(lambda: example_case_2(), inputs=None, outputs=df_table, concurrency_limit=1)
            btn_ex3.click(lambda: example_case_3(), inputs=None, outputs=df_table, concurrency_limit=1)
            btn_clr.click(lambda: empty_table(), inputs=None, outputs=df_table, concurrency_limit=1)

            run_btn = gr.Button("✅ Compute & Suggest")

        with gr.Column(scale=2):
            rows_out = gr.Dataframe(label="Per-row Results", interactive=False)
            summ_out = gr.Dataframe(label="By-module Summary", interactive=False)
            total_out = gr.Number(label="Total upfront (tCO2e)", interactive=False)
            status_md = gr.Markdown(label="LLM Status")
            sugg_llm_md = gr.Markdown(label="LLM Suggestions (local)")
            sugg_rule_md = gr.Markdown(label="Rule-based Suggestions")

    run_btn.click(
        _compute_all,
        inputs=[df_table],
        outputs=[rows_out, summ_out, total_out, status_md, sugg_llm_md, sugg_rule_md],
    )

if __name__ == "__main__":
    demo.launch()