Spaces:

narcis2007
/

ClimateBERT

Running

App Files Files Community

ClimateBERT / app.py

narcis2007

Fix Textbox init: remove unsupported show_copy_button kwarg

6030204 about 1 month ago

raw

history blame contribute delete

9.13 kB

	"""
	ClimateBERT — Greenwashing Signal Detector (Gradio demo)

	Runs six specialized ClimateBERT models on a paragraph of text and returns
	a proxy "cheap talk" greenwashing risk score. Aligned with the EU ECGT
	Directive (applies 27 September 2026) and the proposed Green Claims Directive.

	All models are Apache-2.0, from https://huggingface.co/climatebert
	"""

	import gradio as gr
	from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

	# CPU-only (HF Spaces free tier)
	DEVICE = -1
	MAX_LEN = 512

	# Each entry: (internal_key, model_repo, tokenizer_repo_or_None)
	MODELS = [
	("detector", "climatebert/distilroberta-base-climate-detector", None),
	("env_claims", "climatebert/environmental-claims", None),
	("specificity", "climatebert/distilroberta-base-climate-specificity", None),
	("commitment", "climatebert/distilroberta-base-climate-commitment", None),
	("sentiment", "climatebert/distilroberta-base-climate-sentiment", None),
	# netzero-reduction does not ship its own tokenizer — use the base LM
	("netzero", "climatebert/netzero-reduction",
	"climatebert/distilroberta-base-climate-f"),
	]

	print("Loading ClimateBERT models (first run downloads ~2 GB)...")
	PIPES = {}
	for key, model_repo, tok_repo in MODELS:
	tok = AutoTokenizer.from_pretrained(tok_repo or model_repo, model_max_length=MAX_LEN)
	mdl = AutoModelForSequenceClassification.from_pretrained(model_repo)
	PIPES[key] = pipeline(
	"text-classification",
	model=mdl,
	tokenizer=tok,
	truncation=True,
	padding=True,
	max_length=MAX_LEN,
	device=DEVICE,
	)
	print(f" loaded {key}: {model_repo}")
	print("All models loaded.")


	def _norm(label: str) -> str:
	return (label or "").strip().lower()


	def _is_positive(label: str, positive_keywords=("yes", "claim", "climate", "true", "1")) -> bool:
	label = _norm(label)
	return any(kw in label for kw in positive_keywords)


	def _is_non_specific(label: str) -> bool:
	label = _norm(label)
	return "non" in label # "non-specific", "nonspecific"


	def _no_commitment(label: str) -> bool:
	label = _norm(label)
	return label in ("no", "none") or "no" == label[:2] or "none" in label


	def classify(text: str):
	if not text or not text.strip():
	return "Please enter some text to analyze.", {}, "", ""

	text = text.strip()

	results = {key: pipe(text)[0] for key, pipe in PIPES.items()}

	det = results["detector"]
	is_climate = _is_positive(det["label"])

	# Greenwashing risk score (only meaningful if climate-related)
	# Weights follow the Bingler/Kraus/Leippold/Webersinke "cheap talk" pattern:
	# environmental claim + non-specific + no commitment + opportunity framing.
	risk = 0.0
	reasons = []
	if is_climate:
	claim = results["env_claims"]
	spec = results["specificity"]
	commit = results["commitment"]
	senti = results["sentiment"]

	if _is_positive(claim["label"]):
	risk += 0.40 * claim["score"]
	reasons.append(
	"- Environmental claim detected — subject to the EU ECGT Directive (from 27 Sep 2026)."
	)
	if _is_non_specific(spec["label"]):
	risk += 0.30 * spec["score"]
	reasons.append("- Non-specific language — a classic cheap-talk signal.")
	if _no_commitment(commit["label"]):
	risk += 0.20 * commit["score"]
	reasons.append("- No concrete commitment detected — claim without follow-through.")
	if "opportunity" in _norm(senti["label"]):
	risk += 0.10 * senti["score"]
	reasons.append("- Opportunity framing — positive cherry-picking is common in greenwashing.")

	risk_pct = round(risk * 100, 1)

	# Verdict summary
	if not is_climate:
	summary = (
	f"### Verdict: Not climate-related\n\n"
	f"Detector confidence: {det['score']:.1%}\n\n"
	f"_Greenwashing scoring is skipped for non-climate text. "
	f"Other signals below are informational only._"
	)
	else:
	if risk >= 0.5:
	badge = "HIGH greenwashing risk"
	elif risk >= 0.25:
	badge = "MODERATE greenwashing risk"
	else:
	badge = "LOW greenwashing risk"
	summary = (
	f"### Verdict: {badge}\n\n"
	f"Risk score: {risk_pct} / 100\n\n"
	f"Climate detector confidence: {det['score']:.1%}"
	)

	# Signal breakdown (dict for Gradio JSON component)
	def fmt(r):
	return {"label": r["label"], "confidence": round(float(r["score"]), 4)}

	signals = {
	"climate_related": fmt(det),
	"environmental_claim": fmt(results["env_claims"]),
	"specificity": fmt(results["specificity"]),
	"commitment": fmt(results["commitment"]),
	"sentiment": fmt(results["sentiment"]),
	"netzero_reduction": fmt(results["netzero"]),
	}

	explanation = "\n".join(reasons) if reasons else "_No strong greenwashing signals detected._"

	raw = "\n".join(f"{k}: {v}" for k, v in results.items())

	return summary, signals, explanation, raw


	EXAMPLES = [
	[
	"We are proud to announce our commitment to become climate neutral by 2040 "
	"through a combination of renewable energy investments and carbon offsetting."
	],
	[
	"In 2024 we reduced our Scope 1 and Scope 2 emissions by 23% year-over-year, "
	"from 145,000 tCO2e to 111,650 tCO2e, verified by an independent third-party "
	"auditor and aligned with our SBTi-validated 1.5C pathway."
	],
	[
	"Our eco-friendly products are designed with the planet in mind, featuring "
	"sustainable materials and a greener approach to packaging that customers love."
	],
	[
	"The quarterly earnings report showed revenue growth of 12% driven by strong "
	"performance in our core European markets and improved operational efficiency."
	],
	[
	"By 2030 we aim to achieve net-zero emissions across our entire value chain, "
	"aligned with a 1.5C science-based target validated by SBTi, with interim "
	"milestones of 50% absolute reduction by 2027 against a 2020 baseline."
	],
	]


	INTRO = """
	# ClimateBERT — Greenwashing Signal Detector

	Paste a paragraph from a sustainability report, marketing copy, or corporate
	disclosure. This demo runs six specialized ClimateBERT classifiers in parallel
	to surface cheap-talk signals relevant to the upcoming EU regulations:

	- ECGT Directive — applies 27 September 2026, bans vague green claims
	and "climate neutral via offsetting" statements.
	- Green Claims Directive (proposed) — pre-verification of environmental claims.
	- CSRD / ESRS — the source of text that will be scrutinized.

	Models (all from [climatebert on Hugging Face](https://huggingface.co/climatebert), Apache-2.0):
	`distilroberta-base-climate-detector`, `environmental-claims`,
	`distilroberta-base-climate-specificity`, `distilroberta-base-climate-commitment`,
	`distilroberta-base-climate-sentiment`, `netzero-reduction`.

	> Caveats. Models are trained on paragraphs (not single sentences) and on
	> English only. Outputs are proxy signals, not a legal verdict. Ground-truth
	> greenwashing labels do not exist in any public dataset — every detector
	> operationalizes proxies (specificity, commitment gap, cheap talk).
	"""

	with gr.Blocks(title="ClimateBERT — Greenwashing Signal Detector") as demo:
	gr.Markdown(INTRO)

	with gr.Row():
	with gr.Column(scale=2):
	text_in = gr.Textbox(
	label="Text to analyze (a paragraph works best)",
	lines=8,
	placeholder="Paste a paragraph from a sustainability report, press release, or marketing page...",
	)
	analyze_btn = gr.Button("Analyze", variant="primary")
	gr.Examples(examples=EXAMPLES, inputs=text_in, label="Try an example")
	with gr.Column(scale=3):
	summary_out = gr.Markdown(label="Verdict")
	explain_out = gr.Markdown(label="Why this score")
	signals_out = gr.JSON(label="Per-model signal breakdown")
	with gr.Accordion("Raw model outputs", open=False):
	raw_out = gr.Textbox(label="Raw", lines=8)

	gr.Markdown(
	"---\n"
	"Built on [ClimateBERT](https://huggingface.co/climatebert) by Webersinke, "
	"Kraus, Bingler & Leippold. Scoring heuristic inspired by Bingler et al., "
	"*Cheap talk and cherry-picking: What ClimateBERT has to say on corporate "
	"climate risk disclosures*, Finance Research Letters (2022)."
	)

	analyze_btn.click(
	classify,
	inputs=text_in,
	outputs=[summary_out, signals_out, explain_out, raw_out],
	)
	text_in.submit(
	classify,
	inputs=text_in,
	outputs=[summary_out, signals_out, explain_out, raw_out],
	)

	if __name__ == "__main__":
	demo.launch()