""" đ ESG Intelligence Platform Advanced Multi-Label ESG Text Classification with Visual Analytics Compatible with Gradio 6.x """ import gradio as gr import numpy as np import pandas as pd import plotly.graph_objects as go from plotly.subplots import make_subplots from dataclasses import dataclass from typing import List, Dict, Tuple import re from collections import Counter # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ # đ¨ CONFIGURATION # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ @dataclass class ESGConfig: labels: List[str] = None label_names: Dict[str, str] = None thresholds: Dict[str, float] = None colors: Dict[str, str] = None icons: Dict[str, str] = None keywords: Dict[str, List[str]] = None def __post_init__(self): self.labels = ['E', 'S', 'G', 'non_ESG'] self.label_names = { 'E': 'Environmental', 'S': 'Social', 'G': 'Governance', 'non_ESG': 'Non-ESG' } self.thresholds = {'E': 0.35, 'S': 0.45, 'G': 0.40, 'non_ESG': 0.50} self.colors = {'E': '#22c55e', 'S': '#3b82f6', 'G': '#f59e0b', 'non_ESG': '#6b7280'} self.icons = {'E': 'đŋ', 'S': 'đĨ', 'G': 'âī¸', 'non_ESG': 'đ'} self.keywords = { 'E': ['climate', 'emission', 'carbon', 'renewable', 'energy', 'waste', 'pollution', 'biodiversity', 'sustainable', 'environmental', 'green', 'eco', 'recycle', 'solar', 'wind', 'water', 'forest', 'deforestation', 'conservation', 'footprint', 'net-zero', 'co2'], 'S': ['employee', 'worker', 'labor', 'diversity', 'inclusion', 'safety', 'health', 'human rights', 'community', 'training', 'equity', 'welfare', 'social', 'workforce', 'gender', 'minority', 'fair'], 'G': ['board', 'governance', 'ethics', 'compliance', 'transparency', 'audit', 'risk', 'shareholder', 'executive', 'compensation', 'anti-corruption', 'bribery', 'accountability', 'oversight'] } CONFIG = ESGConfig() # Compile keyword patterns PATTERNS = { label: re.compile(r'\b(' + '|'.join(re.escape(k) for k in kws) + r')\b', re.IGNORECASE) for label, kws in CONFIG.keywords.items() } # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ # đ¤ CLASSIFIER ENGINE # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ class ESGClassifier: """ESG Classification Engine using keyword-based heuristics""" def classify(self, text: str) -> Dict: if not text or not text.strip(): return {'scores': {l: 0.0 for l in CONFIG.labels}, 'predictions': ['non_ESG'], 'confidence': 0.5} text_lower = text.lower() words = text_lower.split() total_words = max(len(words), 1) scores = {} for label in ['E', 'S', 'G']: matches = PATTERNS[label].findall(text_lower) density = len(matches) / total_words unique = len(set(m.lower() for m in matches)) / max(len(CONFIG.keywords[label]), 1) # Context boost context = sum(0.1 for sent in re.split(r'[.!?]', text) if len(PATTERNS[label].findall(sent.lower())) >= 2) np.random.seed(hash(text + label) % 2**32) scores[label] = np.clip(0.3 + density * 15 + unique * 0.4 + min(context, 0.3) + np.random.uniform(-0.05, 0.05), 0.0, 1.0) scores['non_ESG'] = max(0.1, 1.0 - max(scores['E'], scores['S'], scores['G']) - 0.1) predictions = [l for l, s in scores.items() if s >= CONFIG.thresholds[l]] if not predictions: predictions = ['non_ESG'] scores['non_ESG'] = max(scores['non_ESG'], 0.6) return { 'scores': scores, 'predictions': predictions, 'confidence': np.mean([scores[p] for p in predictions]) } def find_keywords(self, text: str) -> Dict[str, List[str]]: return {l: list(set(m.lower() for m in PATTERNS[l].findall(text.lower()))) for l in ['E', 'S', 'G'] if PATTERNS[l].findall(text.lower())} def highlight(self, text: str, keywords: Dict) -> str: result = text for kw, label in sorted([(k, l) for l, ks in keywords.items() for k in ks], key=lambda x: -len(x[0])): color = {'E': '#dcfce7', 'S': '#dbeafe', 'G': '#fef3c7'}.get(label, '#f3f4f6') result = re.sub(re.escape(kw), f'{kw}', result, flags=re.IGNORECASE) return result classifier = ESGClassifier() # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ # đ VISUALIZATION # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ def create_radar(scores: Dict) -> go.Figure: categories = ['Environmental', 'Social', 'Governance'] values = [scores['E'], scores['S'], scores['G'], scores['E']] fig = go.Figure() fig.add_trace(go.Scatterpolar( r=values, theta=categories + [categories[0]], fill='toself', fillcolor='rgba(34, 197, 94, 0.3)', line=dict(color='#22c55e', width=3) )) fig.update_layout( polar=dict(radialaxis=dict(visible=True, range=[0, 1], gridcolor='#e5e7eb'), bgcolor='white'), showlegend=False, margin=dict(l=60, r=60, t=40, b=40), paper_bgcolor='white', height=320 ) return fig def create_bars(scores: Dict, predictions: List[str]) -> go.Figure: labels = ['Environmental (E)', 'Social (S)', 'Governance (G)', 'Non-ESG'] keys = ['E', 'S', 'G', 'non_ESG'] values = [scores[k] * 100 for k in keys] colors = [CONFIG.colors[k] if k in predictions else '#d1d5db' for k in keys] fig = go.Figure() fig.add_trace(go.Bar( y=labels, x=values, orientation='h', marker=dict(color=colors, line=dict(color='white', width=1)), text=[f'{v:.1f}%' for v in values], textposition='outside' )) for i, k in enumerate(keys): fig.add_shape(type='line', x0=CONFIG.thresholds[k]*100, x1=CONFIG.thresholds[k]*100, y0=i-0.4, y1=i+0.4, line=dict(color='#ef4444', width=2, dash='dash')) fig.update_layout( xaxis=dict(range=[0, 110], title='Confidence (%)', gridcolor='#f3f4f6'), yaxis=dict(tickfont=dict(size=12)), margin=dict(l=120, r=40, t=20, b=50), paper_bgcolor='white', plot_bgcolor='white', height=260 ) return fig def create_batch_charts(results: List[Dict]): counts = Counter(p for r in results for p in r['predictions']) labels = ['Environmental', 'Social', 'Governance', 'Non-ESG'] keys = ['E', 'S', 'G', 'non_ESG'] vals = [counts.get(k, 0) for k in keys] colors = [CONFIG.colors[k] for k in keys] fig1 = make_subplots(rows=1, cols=2, specs=[[{"type": "pie"}, {"type": "bar"}]], subplot_titles=('Distribution', 'Counts')) fig1.add_trace(go.Pie(labels=labels, values=vals, marker=dict(colors=colors), hole=0.4), row=1, col=1) fig1.add_trace(go.Bar(x=labels, y=vals, marker=dict(color=colors), text=vals, textposition='outside'), row=1, col=2) fig1.update_layout(height=320, showlegend=False, paper_bgcolor='white', margin=dict(l=20, r=20, t=60, b=20)) fig2 = go.Figure() for label in ['E', 'S', 'G']: fig2.add_trace(go.Scatter( x=list(range(1, len(results)+1)), y=[r['scores'][label] for r in results], mode='lines+markers', name=f'{CONFIG.icons[label]} {label}', line=dict(color=CONFIG.colors[label], width=3) )) fig2.update_layout( xaxis=dict(title='Document #'), yaxis=dict(title='Score', range=[0, 1]), legend=dict(orientation='h', y=1.02, x=0.5, xanchor='center'), height=280, paper_bgcolor='white', plot_bgcolor='white', margin=dict(l=60, r=20, t=40, b=60) ) return fig1, fig2 # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ # đ¯ INTERFACE FUNCTIONS # âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ def analyze_text(text: str): result = classifier.classify(text) keywords = classifier.find_keywords(text) # Pills HTML pills = '
Advanced Multi-Label ESG Text Classification