Spaces:
Running
Running
| """ | |
| ClimateBERT β Greenwashing Signal Detector (Gradio demo) | |
| Runs six specialized ClimateBERT models on a paragraph of text and returns | |
| a proxy "cheap talk" greenwashing risk score. Aligned with the EU ECGT | |
| Directive (applies 27 September 2026) and the proposed Green Claims Directive. | |
| All models are Apache-2.0, from https://huggingface.co/climatebert | |
| """ | |
| import gradio as gr | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline | |
| # CPU-only (HF Spaces free tier) | |
| DEVICE = -1 | |
| MAX_LEN = 512 | |
| # Each entry: (internal_key, model_repo, tokenizer_repo_or_None) | |
| MODELS = [ | |
| ("detector", "climatebert/distilroberta-base-climate-detector", None), | |
| ("env_claims", "climatebert/environmental-claims", None), | |
| ("specificity", "climatebert/distilroberta-base-climate-specificity", None), | |
| ("commitment", "climatebert/distilroberta-base-climate-commitment", None), | |
| ("sentiment", "climatebert/distilroberta-base-climate-sentiment", None), | |
| # netzero-reduction does not ship its own tokenizer β use the base LM | |
| ("netzero", "climatebert/netzero-reduction", | |
| "climatebert/distilroberta-base-climate-f"), | |
| ] | |
| print("Loading ClimateBERT models (first run downloads ~2 GB)...") | |
| PIPES = {} | |
| for key, model_repo, tok_repo in MODELS: | |
| tok = AutoTokenizer.from_pretrained(tok_repo or model_repo, model_max_length=MAX_LEN) | |
| mdl = AutoModelForSequenceClassification.from_pretrained(model_repo) | |
| PIPES[key] = pipeline( | |
| "text-classification", | |
| model=mdl, | |
| tokenizer=tok, | |
| truncation=True, | |
| padding=True, | |
| max_length=MAX_LEN, | |
| device=DEVICE, | |
| ) | |
| print(f" loaded {key}: {model_repo}") | |
| print("All models loaded.") | |
| def _norm(label: str) -> str: | |
| return (label or "").strip().lower() | |
| def _is_positive(label: str, positive_keywords=("yes", "claim", "climate", "true", "1")) -> bool: | |
| label = _norm(label) | |
| return any(kw in label for kw in positive_keywords) | |
| def _is_non_specific(label: str) -> bool: | |
| label = _norm(label) | |
| return "non" in label # "non-specific", "nonspecific" | |
| def _no_commitment(label: str) -> bool: | |
| label = _norm(label) | |
| return label in ("no", "none") or "no" == label[:2] or "none" in label | |
| def classify(text: str): | |
| if not text or not text.strip(): | |
| return "Please enter some text to analyze.", {}, "", "" | |
| text = text.strip() | |
| results = {key: pipe(text)[0] for key, pipe in PIPES.items()} | |
| det = results["detector"] | |
| is_climate = _is_positive(det["label"]) | |
| # Greenwashing risk score (only meaningful if climate-related) | |
| # Weights follow the Bingler/Kraus/Leippold/Webersinke "cheap talk" pattern: | |
| # environmental claim + non-specific + no commitment + opportunity framing. | |
| risk = 0.0 | |
| reasons = [] | |
| if is_climate: | |
| claim = results["env_claims"] | |
| spec = results["specificity"] | |
| commit = results["commitment"] | |
| senti = results["sentiment"] | |
| if _is_positive(claim["label"]): | |
| risk += 0.40 * claim["score"] | |
| reasons.append( | |
| "- **Environmental claim detected** β subject to the EU ECGT Directive (from 27 Sep 2026)." | |
| ) | |
| if _is_non_specific(spec["label"]): | |
| risk += 0.30 * spec["score"] | |
| reasons.append("- **Non-specific language** β a classic cheap-talk signal.") | |
| if _no_commitment(commit["label"]): | |
| risk += 0.20 * commit["score"] | |
| reasons.append("- **No concrete commitment detected** β claim without follow-through.") | |
| if "opportunity" in _norm(senti["label"]): | |
| risk += 0.10 * senti["score"] | |
| reasons.append("- **Opportunity framing** β positive cherry-picking is common in greenwashing.") | |
| risk_pct = round(risk * 100, 1) | |
| # Verdict summary | |
| if not is_climate: | |
| summary = ( | |
| f"### Verdict: Not climate-related\n\n" | |
| f"Detector confidence: **{det['score']:.1%}**\n\n" | |
| f"_Greenwashing scoring is skipped for non-climate text. " | |
| f"Other signals below are informational only._" | |
| ) | |
| else: | |
| if risk >= 0.5: | |
| badge = "HIGH greenwashing risk" | |
| elif risk >= 0.25: | |
| badge = "MODERATE greenwashing risk" | |
| else: | |
| badge = "LOW greenwashing risk" | |
| summary = ( | |
| f"### Verdict: {badge}\n\n" | |
| f"**Risk score: {risk_pct} / 100**\n\n" | |
| f"Climate detector confidence: {det['score']:.1%}" | |
| ) | |
| # Signal breakdown (dict for Gradio JSON component) | |
| def fmt(r): | |
| return {"label": r["label"], "confidence": round(float(r["score"]), 4)} | |
| signals = { | |
| "climate_related": fmt(det), | |
| "environmental_claim": fmt(results["env_claims"]), | |
| "specificity": fmt(results["specificity"]), | |
| "commitment": fmt(results["commitment"]), | |
| "sentiment": fmt(results["sentiment"]), | |
| "netzero_reduction": fmt(results["netzero"]), | |
| } | |
| explanation = "\n".join(reasons) if reasons else "_No strong greenwashing signals detected._" | |
| raw = "\n".join(f"{k}: {v}" for k, v in results.items()) | |
| return summary, signals, explanation, raw | |
| EXAMPLES = [ | |
| [ | |
| "We are proud to announce our commitment to become climate neutral by 2040 " | |
| "through a combination of renewable energy investments and carbon offsetting." | |
| ], | |
| [ | |
| "In 2024 we reduced our Scope 1 and Scope 2 emissions by 23% year-over-year, " | |
| "from 145,000 tCO2e to 111,650 tCO2e, verified by an independent third-party " | |
| "auditor and aligned with our SBTi-validated 1.5C pathway." | |
| ], | |
| [ | |
| "Our eco-friendly products are designed with the planet in mind, featuring " | |
| "sustainable materials and a greener approach to packaging that customers love." | |
| ], | |
| [ | |
| "The quarterly earnings report showed revenue growth of 12% driven by strong " | |
| "performance in our core European markets and improved operational efficiency." | |
| ], | |
| [ | |
| "By 2030 we aim to achieve net-zero emissions across our entire value chain, " | |
| "aligned with a 1.5C science-based target validated by SBTi, with interim " | |
| "milestones of 50% absolute reduction by 2027 against a 2020 baseline." | |
| ], | |
| ] | |
| INTRO = """ | |
| # ClimateBERT β Greenwashing Signal Detector | |
| Paste a paragraph from a sustainability report, marketing copy, or corporate | |
| disclosure. This demo runs **six specialized ClimateBERT classifiers** in parallel | |
| to surface cheap-talk signals relevant to the upcoming EU regulations: | |
| - **ECGT Directive** β applies 27 September 2026, bans vague green claims | |
| and "climate neutral via offsetting" statements. | |
| - **Green Claims Directive** (proposed) β pre-verification of environmental claims. | |
| - **CSRD / ESRS** β the source of text that will be scrutinized. | |
| **Models** (all from [climatebert on Hugging Face](https://huggingface.co/climatebert), Apache-2.0): | |
| `distilroberta-base-climate-detector`, `environmental-claims`, | |
| `distilroberta-base-climate-specificity`, `distilroberta-base-climate-commitment`, | |
| `distilroberta-base-climate-sentiment`, `netzero-reduction`. | |
| > **Caveats.** Models are trained on **paragraphs** (not single sentences) and on | |
| > **English** only. Outputs are proxy signals, not a legal verdict. Ground-truth | |
| > greenwashing labels do not exist in any public dataset β every detector | |
| > operationalizes proxies (specificity, commitment gap, cheap talk). | |
| """ | |
| with gr.Blocks(title="ClimateBERT β Greenwashing Signal Detector") as demo: | |
| gr.Markdown(INTRO) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| text_in = gr.Textbox( | |
| label="Text to analyze (a paragraph works best)", | |
| lines=8, | |
| placeholder="Paste a paragraph from a sustainability report, press release, or marketing page...", | |
| ) | |
| analyze_btn = gr.Button("Analyze", variant="primary") | |
| gr.Examples(examples=EXAMPLES, inputs=text_in, label="Try an example") | |
| with gr.Column(scale=3): | |
| summary_out = gr.Markdown(label="Verdict") | |
| explain_out = gr.Markdown(label="Why this score") | |
| signals_out = gr.JSON(label="Per-model signal breakdown") | |
| with gr.Accordion("Raw model outputs", open=False): | |
| raw_out = gr.Textbox(label="Raw", lines=8) | |
| gr.Markdown( | |
| "---\n" | |
| "Built on [ClimateBERT](https://huggingface.co/climatebert) by Webersinke, " | |
| "Kraus, Bingler & Leippold. Scoring heuristic inspired by Bingler et al., " | |
| "*Cheap talk and cherry-picking: What ClimateBERT has to say on corporate " | |
| "climate risk disclosures*, Finance Research Letters (2022)." | |
| ) | |
| analyze_btn.click( | |
| classify, | |
| inputs=text_in, | |
| outputs=[summary_out, signals_out, explain_out, raw_out], | |
| ) | |
| text_in.submit( | |
| classify, | |
| inputs=text_in, | |
| outputs=[summary_out, signals_out, explain_out, raw_out], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |