""" ClimateBERT — Greenwashing Signal Detector (Gradio demo) Runs six specialized ClimateBERT models on a paragraph of text and returns a proxy "cheap talk" greenwashing risk score. Aligned with the EU ECGT Directive (applies 27 September 2026) and the proposed Green Claims Directive. All models are Apache-2.0, from https://huggingface.co/climatebert """ import gradio as gr from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline # CPU-only (HF Spaces free tier) DEVICE = -1 MAX_LEN = 512 # Each entry: (internal_key, model_repo, tokenizer_repo_or_None) MODELS = [ ("detector", "climatebert/distilroberta-base-climate-detector", None), ("env_claims", "climatebert/environmental-claims", None), ("specificity", "climatebert/distilroberta-base-climate-specificity", None), ("commitment", "climatebert/distilroberta-base-climate-commitment", None), ("sentiment", "climatebert/distilroberta-base-climate-sentiment", None), # netzero-reduction does not ship its own tokenizer — use the base LM ("netzero", "climatebert/netzero-reduction", "climatebert/distilroberta-base-climate-f"), ] print("Loading ClimateBERT models (first run downloads ~2 GB)...") PIPES = {} for key, model_repo, tok_repo in MODELS: tok = AutoTokenizer.from_pretrained(tok_repo or model_repo, model_max_length=MAX_LEN) mdl = AutoModelForSequenceClassification.from_pretrained(model_repo) PIPES[key] = pipeline( "text-classification", model=mdl, tokenizer=tok, truncation=True, padding=True, max_length=MAX_LEN, device=DEVICE, ) print(f" loaded {key}: {model_repo}") print("All models loaded.") def _norm(label: str) -> str: return (label or "").strip().lower() def _is_positive(label: str, positive_keywords=("yes", "claim", "climate", "true", "1")) -> bool: label = _norm(label) return any(kw in label for kw in positive_keywords) def _is_non_specific(label: str) -> bool: label = _norm(label) return "non" in label # "non-specific", "nonspecific" def _no_commitment(label: str) -> bool: label = _norm(label) return label in ("no", "none") or "no" == label[:2] or "none" in label def classify(text: str): if not text or not text.strip(): return "Please enter some text to analyze.", {}, "", "" text = text.strip() results = {key: pipe(text)[0] for key, pipe in PIPES.items()} det = results["detector"] is_climate = _is_positive(det["label"]) # Greenwashing risk score (only meaningful if climate-related) # Weights follow the Bingler/Kraus/Leippold/Webersinke "cheap talk" pattern: # environmental claim + non-specific + no commitment + opportunity framing. risk = 0.0 reasons = [] if is_climate: claim = results["env_claims"] spec = results["specificity"] commit = results["commitment"] senti = results["sentiment"] if _is_positive(claim["label"]): risk += 0.40 * claim["score"] reasons.append( "- **Environmental claim detected** — subject to the EU ECGT Directive (from 27 Sep 2026)." ) if _is_non_specific(spec["label"]): risk += 0.30 * spec["score"] reasons.append("- **Non-specific language** — a classic cheap-talk signal.") if _no_commitment(commit["label"]): risk += 0.20 * commit["score"] reasons.append("- **No concrete commitment detected** — claim without follow-through.") if "opportunity" in _norm(senti["label"]): risk += 0.10 * senti["score"] reasons.append("- **Opportunity framing** — positive cherry-picking is common in greenwashing.") risk_pct = round(risk * 100, 1) # Verdict summary if not is_climate: summary = ( f"### Verdict: Not climate-related\n\n" f"Detector confidence: **{det['score']:.1%}**\n\n" f"_Greenwashing scoring is skipped for non-climate text. " f"Other signals below are informational only._" ) else: if risk >= 0.5: badge = "HIGH greenwashing risk" elif risk >= 0.25: badge = "MODERATE greenwashing risk" else: badge = "LOW greenwashing risk" summary = ( f"### Verdict: {badge}\n\n" f"**Risk score: {risk_pct} / 100**\n\n" f"Climate detector confidence: {det['score']:.1%}" ) # Signal breakdown (dict for Gradio JSON component) def fmt(r): return {"label": r["label"], "confidence": round(float(r["score"]), 4)} signals = { "climate_related": fmt(det), "environmental_claim": fmt(results["env_claims"]), "specificity": fmt(results["specificity"]), "commitment": fmt(results["commitment"]), "sentiment": fmt(results["sentiment"]), "netzero_reduction": fmt(results["netzero"]), } explanation = "\n".join(reasons) if reasons else "_No strong greenwashing signals detected._" raw = "\n".join(f"{k}: {v}" for k, v in results.items()) return summary, signals, explanation, raw EXAMPLES = [ [ "We are proud to announce our commitment to become climate neutral by 2040 " "through a combination of renewable energy investments and carbon offsetting." ], [ "In 2024 we reduced our Scope 1 and Scope 2 emissions by 23% year-over-year, " "from 145,000 tCO2e to 111,650 tCO2e, verified by an independent third-party " "auditor and aligned with our SBTi-validated 1.5C pathway." ], [ "Our eco-friendly products are designed with the planet in mind, featuring " "sustainable materials and a greener approach to packaging that customers love." ], [ "The quarterly earnings report showed revenue growth of 12% driven by strong " "performance in our core European markets and improved operational efficiency." ], [ "By 2030 we aim to achieve net-zero emissions across our entire value chain, " "aligned with a 1.5C science-based target validated by SBTi, with interim " "milestones of 50% absolute reduction by 2027 against a 2020 baseline." ], ] INTRO = """ # ClimateBERT — Greenwashing Signal Detector Paste a paragraph from a sustainability report, marketing copy, or corporate disclosure. This demo runs **six specialized ClimateBERT classifiers** in parallel to surface cheap-talk signals relevant to the upcoming EU regulations: - **ECGT Directive** — applies 27 September 2026, bans vague green claims and "climate neutral via offsetting" statements. - **Green Claims Directive** (proposed) — pre-verification of environmental claims. - **CSRD / ESRS** — the source of text that will be scrutinized. **Models** (all from [climatebert on Hugging Face](https://huggingface.co/climatebert), Apache-2.0): `distilroberta-base-climate-detector`, `environmental-claims`, `distilroberta-base-climate-specificity`, `distilroberta-base-climate-commitment`, `distilroberta-base-climate-sentiment`, `netzero-reduction`. > **Caveats.** Models are trained on **paragraphs** (not single sentences) and on > **English** only. Outputs are proxy signals, not a legal verdict. Ground-truth > greenwashing labels do not exist in any public dataset — every detector > operationalizes proxies (specificity, commitment gap, cheap talk). """ with gr.Blocks(title="ClimateBERT — Greenwashing Signal Detector") as demo: gr.Markdown(INTRO) with gr.Row(): with gr.Column(scale=2): text_in = gr.Textbox( label="Text to analyze (a paragraph works best)", lines=8, placeholder="Paste a paragraph from a sustainability report, press release, or marketing page...", ) analyze_btn = gr.Button("Analyze", variant="primary") gr.Examples(examples=EXAMPLES, inputs=text_in, label="Try an example") with gr.Column(scale=3): summary_out = gr.Markdown(label="Verdict") explain_out = gr.Markdown(label="Why this score") signals_out = gr.JSON(label="Per-model signal breakdown") with gr.Accordion("Raw model outputs", open=False): raw_out = gr.Textbox(label="Raw", lines=8) gr.Markdown( "---\n" "Built on [ClimateBERT](https://huggingface.co/climatebert) by Webersinke, " "Kraus, Bingler & Leippold. Scoring heuristic inspired by Bingler et al., " "*Cheap talk and cherry-picking: What ClimateBERT has to say on corporate " "climate risk disclosures*, Finance Research Letters (2022)." ) analyze_btn.click( classify, inputs=text_in, outputs=[summary_out, signals_out, explain_out, raw_out], ) text_in.submit( classify, inputs=text_in, outputs=[summary_out, signals_out, explain_out, raw_out], ) if __name__ == "__main__": demo.launch()