EricCRX's picture
Update app.py
1a773bc verified
# -*- coding: utf-8 -*-
# A1–A5 Upfront Embodied Carbon Calculator (Local LLM, module-specific options, strict unit checks)
from dataclasses import dataclass
from typing import List, Tuple, Dict
import pandas as pd
import gradio as gr
# ============ Local LLM (no external API) ============
LLM_MODEL_ID = "google/flan-t5-base"
try:
from transformers import pipeline as hf_pipeline
_llm = hf_pipeline("text2text-generation", model=LLM_MODEL_ID)
except Exception:
_llm = None
# ============ Module-specific selectable options ============
MODULE_OPTIONS: Dict[str, List[str]] = {
"A1": ["Concrete", "Steel", "Timber", "Recycled Aggregate"],
"A2": ["Truck Transport", "Rail Transport", "Ship Transport"],
"A3": ["CLT Fabrication", "Steel Fabrication", "Precast Concrete"],
"A4": ["On-site Trucking", "Crane Lifting", "Haulage"],
"A5": ["Site Electricity", "Construction Waste", "Diesel Fuel"],
}
SUPPORTED = set(MODULE_OPTIONS.keys())
# ============ Deterministic backend ============
@dataclass
class ItemRow:
module: str # A1–A5
name: str # must be one of MODULE_OPTIONS[module]
quantity: float
unit: str # kg / t / ton_km / kWh (aliases handled)
ef: float
ef_unit: str # kgCO2e/kg, tCO2e/t, kgCO2e/ton_km, kgCO2e/kWh
notes: str = ""
def _norm_unit(u: str) -> str:
u = str(u).strip().lower().replace(" ", "")
return {"t-km": "ton_km", "t·km": "ton_km", "ton-km": "ton_km"}.get(u, u)
def _normalize(quantity: float, unit: str, ef_unit: str) -> Tuple[float, str]:
"""
Return (normalized_quantity, basis) where basis ∈ {per_kg, per_t, per_ton_km, per_kWh}.
Enforce unit <-> ef_unit compatibility to avoid silent wrong math.
"""
u = _norm_unit(unit)
eu = str(ef_unit).strip().lower()
if eu == "kgco2e/kg":
if u == "kg":
q = quantity
elif u == "t":
q = quantity * 1000.0
else:
raise ValueError(f"Incompatible unit '{unit}' for ef_unit '{ef_unit}'. Use kg or t.")
return q, "per_kg"
if eu in ("tco2e/t", "tco2e/ton"):
if u == "t":
q = quantity
elif u == "kg":
q = quantity / 1000.0
else:
raise ValueError(f"Incompatible unit '{unit}' for ef_unit '{ef_unit}'. Use kg or t.")
return q, "per_t"
if eu == "kgco2e/ton_km":
if u != "ton_km":
raise ValueError(
f"Incompatible unit '{unit}' for ef_unit '{ef_unit}'. Use ton_km "
"(aliases: t-km / t·km / ton-km)."
)
return quantity, "per_ton_km"
if eu == "kgco2e/kwh":
if u != "kwh":
raise ValueError(f"Incompatible unit '{unit}' for ef_unit '{ef_unit}'. Use kWh.")
return quantity, "per_kWh"
raise ValueError(
f"Unsupported ef_unit '{ef_unit}'. Allowed: kgCO2e/kg, tCO2e/t, kgCO2e/ton_km, kgCO2e/kWh."
)
def _row_to_kgco2e(r: ItemRow) -> float:
q, basis = _normalize(r.quantity, r.unit, r.ef_unit)
if basis == "per_kg":
return q * r.ef
if basis == "per_t":
return (q * r.ef) * 1000.0
if basis in ("per_ton_km", "per_kWh"):
return q * r.ef
raise ValueError("Unknown basis")
def _validate_module_and_name(mod: str, name: str):
if mod not in MODULE_OPTIONS:
raise ValueError(f"Unsupported module '{mod}'. Use one of {sorted(MODULE_OPTIONS.keys())}.")
if name not in MODULE_OPTIONS[mod]:
raise ValueError(
f"Invalid material/activity '{name}' for module {mod}. "
f"Valid options: {MODULE_OPTIONS[mod]}"
)
def _basic_sanity_check(df: pd.DataFrame):
if (df["quantity"] < 0).any():
raise ValueError("Quantity must be non-negative.")
if (df["ef"] < 0).any():
raise ValueError("Emission factor (ef) must be non-negative.")
def compute_A1A5_emissions(df: pd.DataFrame):
"""Compute per-row and per-module A1–A5 emissions (kgCO2e and tCO2e)."""
required = ["module", "name", "quantity", "unit", "ef", "ef_unit", "notes"]
miss = [c for c in required if c not in df.columns]
if miss:
raise ValueError(f"Missing columns: {miss}")
_basic_sanity_check(df)
out = []
for _, row in df.iterrows():
mod = str(row["module"]).strip()
name = str(row["name"]).strip()
_validate_module_and_name(mod, name)
item = ItemRow(
module=mod,
name=name,
quantity=float(row["quantity"]),
unit=str(row["unit"]),
ef=float(row["ef"]),
ef_unit=str(row["ef_unit"]),
notes=str(row.get("notes", "")),
)
kg = _row_to_kgco2e(item)
rec = dict(row)
rec["kgCO2e"] = kg
rec["tCO2e"] = kg / 1000.0
out.append(rec)
rows = pd.DataFrame(out)
summ = rows.groupby("module", as_index=False)[["kgCO2e", "tCO2e"]].sum()
summ.loc["Total"] = ["Total", float(summ["kgCO2e"].sum()), float(summ["tCO2e"].sum())]
return rows, summ
# ============ Examples / defaults ============
def default_a1a5_table() -> pd.DataFrame:
return pd.DataFrame(
[
# A1 materials
{"module": "A1", "name": "Concrete", "quantity": 10000, "unit": "kg", "ef": 0.12, "ef_unit": "kgCO2e/kg", "notes": "demo"},
{"module": "A1", "name": "Steel", "quantity": 2000, "unit": "kg", "ef": 1.9, "ef_unit": "tCO2e/t", "notes": "demo"},
# A3 fabrication energy
{"module": "A3", "name": "CLT Fabrication", "quantity": 8, "unit": "t", "ef": 0.35, "ef_unit": "tCO2e/t", "notes": "demo"},
# A4 transport
{"module": "A4", "name": "On-site Trucking", "quantity": 800, "unit": "ton_km", "ef": 0.1, "ef_unit": "kgCO2e/ton_km", "notes": "demo"},
# A5 site stage
{"module": "A5", "name": "Construction Waste", "quantity": 2, "unit": "t", "ef": 0.25, "ef_unit": "tCO2e/t", "notes": "demo"},
]
)
def example_case_1() -> pd.DataFrame:
return default_a1a5_table()
def example_case_2() -> pd.DataFrame:
return pd.DataFrame(
[
{"module": "A1", "name": "Timber", "quantity": 3, "unit": "t", "ef": 0.25, "ef_unit": "tCO2e/t", "notes": "example2"},
{"module": "A1", "name": "Steel", "quantity": 200, "unit": "kg", "ef": 2.0, "ef_unit": "tCO2e/t", "notes": "example2"},
{"module": "A3", "name": "Precast Concrete", "quantity": 6, "unit": "t", "ef": 0.35, "ef_unit": "tCO2e/t", "notes": "example2"},
{"module": "A4", "name": "Haulage", "quantity": 600, "unit": "ton_km", "ef": 0.08, "ef_unit": "kgCO2e/ton_km", "notes": "example2"},
{"module": "A5", "name": "Construction Waste", "quantity": 1.5, "unit": "t", "ef": 0.25, "ef_unit": "tCO2e/t", "notes": "example2"},
]
)
def example_case_3() -> pd.DataFrame:
return pd.DataFrame(
[
{"module": "A1", "name": "Concrete", "quantity": 15000, "unit": "kg", "ef": 0.12, "ef_unit": "kgCO2e/kg", "notes": "example3"},
{"module": "A1", "name": "Steel", "quantity": 3000, "unit": "kg", "ef": 1.9, "ef_unit": "tCO2e/t", "notes": "example3"},
{"module": "A4", "name": "On-site Trucking", "quantity": 1200, "unit": "ton_km", "ef": 0.1, "ef_unit": "kgCO2e/ton_km", "notes": "example3"},
{"module": "A5", "name": "Site Electricity", "quantity": 1000, "unit": "kWh", "ef": 0.45, "ef_unit": "kgCO2e/kWh", "notes": "example3"},
{"module": "A5", "name": "Construction Waste", "quantity": 3, "unit": "t", "ef": 0.25, "ef_unit": "tCO2e/t", "notes": "example3"},
]
)
def empty_table() -> pd.DataFrame:
return pd.DataFrame(columns=["module", "name", "quantity", "unit", "ef", "ef_unit", "notes"])
# ============ Suggestions ============
def rule_based_suggestions(rows_df: pd.DataFrame, summ_df: pd.DataFrame) -> str:
tips: List[str] = []
total = float(summ_df.loc[summ_df["module"] == "Total", "tCO2e"].values[0])
a1a3 = float(summ_df[summ_df["module"].isin(["A1", "A2", "A3"])]["tCO2e"].sum()) # include A2
a4 = float(summ_df[summ_df["module"] == "A4"]["tCO2e"].sum())
a5 = float(summ_df[summ_df["module"] == "A5"]["tCO2e"].sum())
if total > 0 and a1a3 / total > 0.6:
tips.append("- A1–A3: lower-carbon EPDs, increase recycled content, or optimize mixes/suppliers.")
if a4 > 0.5:
tips.append("- A4: cut ton·km via nearer sourcing, mode shift (rail/ship), improve load factor.")
if a5 > 0.3:
tips.append("- A5: reduce site electricity/fuel; tighten waste rates; on-site segregation.")
if not tips:
tips.append("- General: verify quantities & EF sources; target top tCO2e items first.")
return "Rule-based suggestions:\n" + "\n".join(tips)
def _summarize_for_prompt(rows_df: pd.DataFrame, summ_df: pd.DataFrame) -> str:
rows_sorted = rows_df.sort_values("tCO2e", ascending=False)
top = rows_sorted.head(5)[["module", "name", "tCO2e"]]
lines = [f"- {r.module}: {r.name}{r.tCO2e:.2f} tCO2e" for r in top.itertuples()]
mod_lines = [f"- {r.module}: {r.tCO2e:.2f} tCO2e" for r in summ_df[summ_df["module"] != "Total"].itertuples()]
total = float(summ_df.loc[summ_df["module"] == "Total", "tCO2e"].values[0])
text = "Module totals:\n" + "\n".join(mod_lines)
text += f"\n\nTotal: {total:.2f} tCO2e\n\n"
text += "Top contributors:\n" + "\n".join(lines)
return text
def llm_feedback(rows_df: pd.DataFrame, summ_df: pd.DataFrame) -> str:
if _llm is None:
return "(LLM unavailable — flan-t5-base not loaded)"
prompt = (
"You are a sustainability engineer.\n"
"Below are EXAMPLES of good embodied carbon reduction actions:\n"
"- A1: Use low-carbon or recycled concrete.\n"
"- A1: Increase recycled steel content.\n"
"- A3: Optimize manufacturing energy efficiency.\n"
"- A4: Shorten transport distance or shift to rail.\n"
"- A5: Reduce on-site electricity use and minimize waste.\n\n"
"Now, based on the embodied carbon summary below, write 4–6 similar practical strategies.\n"
"Avoid repeating the table values. Write in bullet form, each line ≤ 20 words.\n\n"
+ _summarize_for_prompt(rows_df, summ_df)
)
try:
out = _llm(prompt, max_new_tokens=220)[0]["generated_text"]
return "LLM suggestions (local):\n" + out.strip()
except Exception as e:
return f"(LLM error: {e})"
# ============ Gradio actions ============
def update_materials(module: str):
return gr.update(choices=MODULE_OPTIONS.get(module, []), value=None)
def add_row(module, material, quantity, unit, ef, ef_unit, notes, df):
if module not in MODULE_OPTIONS:
return df, f"Error: Unsupported module '{module}'."
if material not in MODULE_OPTIONS[module]:
return df, f"Error: Invalid material '{material}' for {module}. Valid: {MODULE_OPTIONS[module]}"
if quantity is None or ef is None:
return df, "Error: Quantity and EF must be provided."
new_row = {
"module": module,
"name": material,
"quantity": float(quantity),
"unit": unit,
"ef": float(ef),
"ef_unit": ef_unit,
"notes": notes or "",
}
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
return df, "Row added."
def _compute_all(df_in):
try:
if df_in is None or df_in.empty:
return pd.DataFrame(), pd.DataFrame(), 0.0, "", "No data. Please add rows.", ""
rows_df, summ_df = compute_A1A5_emissions(df_in)
total_t = float(summ_df.loc[summ_df["module"] == "Total", "tCO2e"].values[0])
rule_sugg = rule_based_suggestions(rows_df, summ_df)
llm_sugg = llm_feedback(rows_df, summ_df)
status = f"LLM (local): {LLM_MODEL_ID}" if _llm else "LLM unavailable (distilgpt2 not loaded)"
return rows_df, summ_df, total_t, status, llm_sugg, rule_sugg
except Exception as e:
return pd.DataFrame(), pd.DataFrame(), 0.0, "", "", f"Error: {e}"
# ============ UI ============
with gr.Blocks(title="A1–A5 Carbon Calculator (Local LLM)") as demo:
gr.Markdown("# 🏗️ A1–A5 Upfront Embodied Carbon Calculator")
gr.Markdown(
"Fill rows using the form (module-specific options). Then **Add Row** → **Compute & Suggest**.\n\n"
"**Units**: Materials/Waste — kg|t with kgCO2e/kg or tCO2e/t; "
"Transport — ton_km with kgCO2e/ton_km; Site — kWh with kgCO2e/kWh."
)
with gr.Row():
with gr.Column(scale=3):
# Form inputs
module_dd = gr.Dropdown(list(MODULE_OPTIONS.keys()), label="Module (A1–A5)", value="A1")
material_dd = gr.Dropdown([], label="Material/Activity")
module_dd.change(update_materials, inputs=module_dd, outputs=material_dd)
quantity = gr.Number(value=1000, label="Quantity")
unit = gr.Dropdown(choices=["kg", "t", "ton_km", "kWh"], value="kg", label="Unit")
ef = gr.Number(value=0.12, label="Emission Factor (EF)")
ef_unit = gr.Dropdown(
choices=["kgCO2e/kg", "tCO2e/t", "kgCO2e/ton_km", "kgCO2e/kWh"],
value="kgCO2e/kg",
label="EF Unit"
)
notes = gr.Textbox(label="Notes", placeholder="optional")
df_table = gr.Dataframe(
headers=["module", "name", "quantity", "unit", "ef", "ef_unit", "notes"],
value=empty_table(),
interactive=False,
label="Input Table",
)
add_btn = gr.Button("➕ Add Row")
add_msg = gr.Markdown()
add_btn.click(
add_row,
inputs=[module_dd, material_dd, quantity, unit, ef, ef_unit, notes, df_table],
outputs=[df_table, add_msg],
)
with gr.Row():
btn_ex1 = gr.Button("Load Example 1")
btn_ex2 = gr.Button("Load Example 2")
btn_ex3 = gr.Button("Load Example 3")
btn_clr = gr.Button("Clear Table")
btn_ex1.click(lambda: example_case_1(), inputs=None, outputs=df_table, concurrency_limit=1)
btn_ex2.click(lambda: example_case_2(), inputs=None, outputs=df_table, concurrency_limit=1)
btn_ex3.click(lambda: example_case_3(), inputs=None, outputs=df_table, concurrency_limit=1)
btn_clr.click(lambda: empty_table(), inputs=None, outputs=df_table, concurrency_limit=1)
run_btn = gr.Button("✅ Compute & Suggest")
with gr.Column(scale=2):
rows_out = gr.Dataframe(label="Per-row Results", interactive=False)
summ_out = gr.Dataframe(label="By-module Summary", interactive=False)
total_out = gr.Number(label="Total upfront (tCO2e)", interactive=False)
status_md = gr.Markdown(label="LLM Status")
sugg_llm_md = gr.Markdown(label="LLM Suggestions (local)")
sugg_rule_md = gr.Markdown(label="Rule-based Suggestions")
run_btn.click(
_compute_all,
inputs=[df_table],
outputs=[rows_out, summ_out, total_out, status_md, sugg_llm_md, sugg_rule_md],
)
if __name__ == "__main__":
demo.launch()