bechir09's picture
Upload folder using huggingface_hub
4d1bb75 verified
"""
🌍 ESG Intelligence Platform
Advanced Multi-Label ESG Text Classification with Visual Analytics
Compatible with Gradio 6.x
"""
import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from dataclasses import dataclass
from typing import List, Dict, Tuple
import re
from collections import Counter
# ═══════════════════════════════════════════════════════════════════════════════
# 🎨 CONFIGURATION
# ═══════════════════════════════════════════════════════════════════════════════
@dataclass
class ESGConfig:
labels: List[str] = None
label_names: Dict[str, str] = None
thresholds: Dict[str, float] = None
colors: Dict[str, str] = None
icons: Dict[str, str] = None
keywords: Dict[str, List[str]] = None
def __post_init__(self):
self.labels = ['E', 'S', 'G', 'non_ESG']
self.label_names = {
'E': 'Environmental', 'S': 'Social',
'G': 'Governance', 'non_ESG': 'Non-ESG'
}
self.thresholds = {'E': 0.35, 'S': 0.45, 'G': 0.40, 'non_ESG': 0.50}
self.colors = {'E': '#22c55e', 'S': '#3b82f6', 'G': '#f59e0b', 'non_ESG': '#6b7280'}
self.icons = {'E': '🌿', 'S': 'πŸ‘₯', 'G': 'βš–οΈ', 'non_ESG': 'πŸ“„'}
self.keywords = {
'E': ['climate', 'emission', 'carbon', 'renewable', 'energy', 'waste',
'pollution', 'biodiversity', 'sustainable', 'environmental',
'green', 'eco', 'recycle', 'solar', 'wind', 'water', 'forest',
'deforestation', 'conservation', 'footprint', 'net-zero', 'co2'],
'S': ['employee', 'worker', 'labor', 'diversity', 'inclusion', 'safety',
'health', 'human rights', 'community', 'training', 'equity',
'welfare', 'social', 'workforce', 'gender', 'minority', 'fair'],
'G': ['board', 'governance', 'ethics', 'compliance', 'transparency',
'audit', 'risk', 'shareholder', 'executive', 'compensation',
'anti-corruption', 'bribery', 'accountability', 'oversight']
}
CONFIG = ESGConfig()
# Compile keyword patterns
PATTERNS = {
label: re.compile(r'\b(' + '|'.join(re.escape(k) for k in kws) + r')\b', re.IGNORECASE)
for label, kws in CONFIG.keywords.items()
}
# ═══════════════════════════════════════════════════════════════════════════════
# πŸ€– CLASSIFIER ENGINE
# ═══════════════════════════════════════════════════════════════════════════════
class ESGClassifier:
"""ESG Classification Engine using keyword-based heuristics"""
def classify(self, text: str) -> Dict:
if not text or not text.strip():
return {'scores': {l: 0.0 for l in CONFIG.labels}, 'predictions': ['non_ESG'], 'confidence': 0.5}
text_lower = text.lower()
words = text_lower.split()
total_words = max(len(words), 1)
scores = {}
for label in ['E', 'S', 'G']:
matches = PATTERNS[label].findall(text_lower)
density = len(matches) / total_words
unique = len(set(m.lower() for m in matches)) / max(len(CONFIG.keywords[label]), 1)
# Context boost
context = sum(0.1 for sent in re.split(r'[.!?]', text)
if len(PATTERNS[label].findall(sent.lower())) >= 2)
np.random.seed(hash(text + label) % 2**32)
scores[label] = np.clip(0.3 + density * 15 + unique * 0.4 + min(context, 0.3) +
np.random.uniform(-0.05, 0.05), 0.0, 1.0)
scores['non_ESG'] = max(0.1, 1.0 - max(scores['E'], scores['S'], scores['G']) - 0.1)
predictions = [l for l, s in scores.items() if s >= CONFIG.thresholds[l]]
if not predictions:
predictions = ['non_ESG']
scores['non_ESG'] = max(scores['non_ESG'], 0.6)
return {
'scores': scores,
'predictions': predictions,
'confidence': np.mean([scores[p] for p in predictions])
}
def find_keywords(self, text: str) -> Dict[str, List[str]]:
return {l: list(set(m.lower() for m in PATTERNS[l].findall(text.lower())))
for l in ['E', 'S', 'G'] if PATTERNS[l].findall(text.lower())}
def highlight(self, text: str, keywords: Dict) -> str:
result = text
for kw, label in sorted([(k, l) for l, ks in keywords.items() for k in ks],
key=lambda x: -len(x[0])):
color = {'E': '#dcfce7', 'S': '#dbeafe', 'G': '#fef3c7'}.get(label, '#f3f4f6')
result = re.sub(re.escape(kw),
f'<span style="background:{color};padding:2px 6px;border-radius:4px">{kw}</span>',
result, flags=re.IGNORECASE)
return result
classifier = ESGClassifier()
# ═══════════════════════════════════════════════════════════════════════════════
# πŸ“Š VISUALIZATION
# ═══════════════════════════════════════════════════════════════════════════════
def create_radar(scores: Dict) -> go.Figure:
categories = ['Environmental', 'Social', 'Governance']
values = [scores['E'], scores['S'], scores['G'], scores['E']]
fig = go.Figure()
fig.add_trace(go.Scatterpolar(
r=values, theta=categories + [categories[0]], fill='toself',
fillcolor='rgba(34, 197, 94, 0.3)', line=dict(color='#22c55e', width=3)
))
fig.update_layout(
polar=dict(radialaxis=dict(visible=True, range=[0, 1], gridcolor='#e5e7eb'), bgcolor='white'),
showlegend=False, margin=dict(l=60, r=60, t=40, b=40), paper_bgcolor='white', height=320
)
return fig
def create_bars(scores: Dict, predictions: List[str]) -> go.Figure:
labels = ['Environmental (E)', 'Social (S)', 'Governance (G)', 'Non-ESG']
keys = ['E', 'S', 'G', 'non_ESG']
values = [scores[k] * 100 for k in keys]
colors = [CONFIG.colors[k] if k in predictions else '#d1d5db' for k in keys]
fig = go.Figure()
fig.add_trace(go.Bar(
y=labels, x=values, orientation='h',
marker=dict(color=colors, line=dict(color='white', width=1)),
text=[f'{v:.1f}%' for v in values], textposition='outside'
))
for i, k in enumerate(keys):
fig.add_shape(type='line', x0=CONFIG.thresholds[k]*100, x1=CONFIG.thresholds[k]*100,
y0=i-0.4, y1=i+0.4, line=dict(color='#ef4444', width=2, dash='dash'))
fig.update_layout(
xaxis=dict(range=[0, 110], title='Confidence (%)', gridcolor='#f3f4f6'),
yaxis=dict(tickfont=dict(size=12)), margin=dict(l=120, r=40, t=20, b=50),
paper_bgcolor='white', plot_bgcolor='white', height=260
)
return fig
def create_batch_charts(results: List[Dict]):
counts = Counter(p for r in results for p in r['predictions'])
labels = ['Environmental', 'Social', 'Governance', 'Non-ESG']
keys = ['E', 'S', 'G', 'non_ESG']
vals = [counts.get(k, 0) for k in keys]
colors = [CONFIG.colors[k] for k in keys]
fig1 = make_subplots(rows=1, cols=2, specs=[[{"type": "pie"}, {"type": "bar"}]],
subplot_titles=('Distribution', 'Counts'))
fig1.add_trace(go.Pie(labels=labels, values=vals, marker=dict(colors=colors), hole=0.4), row=1, col=1)
fig1.add_trace(go.Bar(x=labels, y=vals, marker=dict(color=colors), text=vals, textposition='outside'), row=1, col=2)
fig1.update_layout(height=320, showlegend=False, paper_bgcolor='white', margin=dict(l=20, r=20, t=60, b=20))
fig2 = go.Figure()
for label in ['E', 'S', 'G']:
fig2.add_trace(go.Scatter(
x=list(range(1, len(results)+1)), y=[r['scores'][label] for r in results],
mode='lines+markers', name=f'{CONFIG.icons[label]} {label}',
line=dict(color=CONFIG.colors[label], width=3)
))
fig2.update_layout(
xaxis=dict(title='Document #'), yaxis=dict(title='Score', range=[0, 1]),
legend=dict(orientation='h', y=1.02, x=0.5, xanchor='center'),
height=280, paper_bgcolor='white', plot_bgcolor='white', margin=dict(l=60, r=20, t=40, b=60)
)
return fig1, fig2
# ═══════════════════════════════════════════════════════════════════════════════
# 🎯 INTERFACE FUNCTIONS
# ═══════════════════════════════════════════════════════════════════════════════
def analyze_text(text: str):
result = classifier.classify(text)
keywords = classifier.find_keywords(text)
# Pills HTML
pills = '<div style="display:flex;flex-wrap:wrap;gap:8px;margin:16px 0;">'
for pred in result['predictions']:
color = {'E': '#dcfce7;color:#166534;border:2px solid #22c55e',
'S': '#dbeafe;color:#1e40af;border:2px solid #3b82f6',
'G': '#fef3c7;color:#92400e;border:2px solid #f59e0b',
'non_ESG': '#f3f4f6;color:#4b5563;border:2px solid #9ca3af'}.get(pred)
pills += f'<div style="background:{color};padding:8px 16px;border-radius:24px;font-weight:600">'
pills += f'{CONFIG.icons[pred]} {pred} ({result["scores"][pred]*100:.0f}%)</div>'
pills += '</div>'
# Highlighted text
highlighted = f'''<div style="background:#f8fafc;padding:20px;border-radius:12px;
border-left:4px solid #22c55e;line-height:1.8">{classifier.highlight(text, keywords)}</div>'''
# Explanation
if 'non_ESG' in result['predictions'] and len(result['predictions']) == 1:
explanation = "πŸ“„ This text appears to be general business content without specific ESG relevance."
else:
explanation = '\n'.join(
f"{CONFIG.icons[p]} **{CONFIG.label_names[p]}**: Detected via keywords ({', '.join(keywords.get(p, ['context'])[:5])})"
for p in result['predictions'] if p != 'non_ESG'
) or "Analysis complete."
# Score
esg_score = (result['scores']['E'] + result['scores']['S'] + result['scores']['G']) / 3 * 100
score_html = f'''<div style="text-align:center;padding:20px">
<div style="font-size:3.5rem;font-weight:800;background:linear-gradient(135deg,#22c55e,#16a34a);
-webkit-background-clip:text;-webkit-text-fill-color:transparent">{esg_score:.0f}</div>
<div style="color:#6b7280;text-transform:uppercase;letter-spacing:0.1em">ESG Score</div></div>'''
return pills, highlighted, explanation, create_radar(result['scores']), create_bars(result['scores'], result['predictions']), score_html
def analyze_batch(file):
if file is None:
return "Please upload a file", None, None, None
try:
if file.name.endswith('.csv'):
texts = pd.read_csv(file.name).iloc[:, 0].astype(str).tolist()
else:
texts = [t.strip() for t in open(file.name).read().split('\n\n') if t.strip()]
results = [classifier.classify(t) for t in texts[:50]]
summary = pd.DataFrame([{
'ID': i+1, 'Text': t[:80]+'...' if len(t)>80 else t,
'E': f"{'βœ“' if 'E' in r['predictions'] else 'β—‹'} {r['scores']['E']:.0%}",
'S': f"{'βœ“' if 'S' in r['predictions'] else 'β—‹'} {r['scores']['S']:.0%}",
'G': f"{'βœ“' if 'G' in r['predictions'] else 'β—‹'} {r['scores']['G']:.0%}",
'Labels': ', '.join(r['predictions'])
} for i, (t, r) in enumerate(zip(texts[:50], results))])
e, s, g = [sum(1 for r in results if l in r['predictions']) for l in ['E', 'S', 'G']]
stats = f'''<div style="display:grid;grid-template-columns:repeat(4,1fr);gap:16px;margin:20px 0">
<div style="background:white;border-radius:12px;padding:16px;text-align:center;box-shadow:0 2px 8px rgba(0,0,0,0.06)">
<div style="font-size:2rem;font-weight:700">{len(results)}</div>
<div style="color:#6b7280;text-transform:uppercase;font-size:0.85rem">Documents</div></div>
<div style="background:white;border-radius:12px;padding:16px;text-align:center;border-left:4px solid #22c55e">
<div style="font-size:2rem;font-weight:700;color:#22c55e">{e}</div>
<div style="color:#6b7280;text-transform:uppercase;font-size:0.85rem">🌿 Environmental</div></div>
<div style="background:white;border-radius:12px;padding:16px;text-align:center;border-left:4px solid #3b82f6">
<div style="font-size:2rem;font-weight:700;color:#3b82f6">{s}</div>
<div style="color:#6b7280;text-transform:uppercase;font-size:0.85rem">πŸ‘₯ Social</div></div>
<div style="background:white;border-radius:12px;padding:16px;text-align:center;border-left:4px solid #f59e0b">
<div style="font-size:2rem;font-weight:700;color:#f59e0b">{g}</div>
<div style="color:#6b7280;text-transform:uppercase;font-size:0.85rem">βš–οΈ Governance</div></div></div>'''
fig1, fig2 = create_batch_charts(results)
return stats, summary, fig1, fig2
except Exception as e:
return f"Error: {e}", None, None, None
# ═══════════════════════════════════════════════════════════════════════════════
# πŸ“š SAMPLES
# ═══════════════════════════════════════════════════════════════════════════════
SAMPLES = {
"🌿 Environmental": """Our company has committed to achieving carbon neutrality by 2030.
We are investing heavily in renewable energy sources including solar and wind power,
reducing our carbon footprint by 40% since 2020. Our waste management system achieved 95% recycling rates.""",
"πŸ‘₯ Social": """We are proud to announce our expanded diversity and inclusion program.
This year, we achieved 45% female representation in leadership positions and
launched comprehensive employee wellness programs including mental health support.""",
"βš–οΈ Governance": """The Board of Directors has adopted enhanced corporate governance policies
including an independent audit committee and transparent executive compensation disclosure.
Our anti-corruption compliance program meets FCPA requirements.""",
"🌍 Multi-Label": """Our sustainability report demonstrates commitment across all ESG dimensions.
Environmentally, we've reduced emissions 50% through renewable energy.
Socially, we've implemented fair labor practices. Our board has an ESG oversight committee.""",
"πŸ“„ Non-ESG": """Q3 financial results show revenue growth of 12% year-over-year.
The company completed the acquisition of TechCorp for $500 million,
expanding market presence in enterprise software."""
}
# ═══════════════════════════════════════════════════════════════════════════════
# πŸš€ BUILD APP
# ═══════════════════════════════════════════════════════════════════════════════
with gr.Blocks(title="ESG Intelligence Platform") as app:
# Header
gr.HTML("""<div style="text-align:center;padding:30px 0 20px 0">
<h1 style="background:linear-gradient(135deg,#1a5f2a 0%,#2d8a4e 50%,#0d3d56 100%);
-webkit-background-clip:text;-webkit-text-fill-color:transparent;font-size:2.5rem;font-weight:800">
🌍 ESG Intelligence Platform</h1>
<p style="color:#6b7280;font-size:1.1rem">Advanced Multi-Label ESG Text Classification</p>
<div style="display:flex;justify-content:center;gap:20px;margin-top:16px">
<span style="background:#dcfce7;padding:6px 14px;border-radius:20px">🌿 Environmental</span>
<span style="background:#dbeafe;padding:6px 14px;border-radius:20px">πŸ‘₯ Social</span>
<span style="background:#fef3c7;padding:6px 14px;border-radius:20px">βš–οΈ Governance</span>
</div></div>""")
with gr.Tabs():
# Tab 1: Text Analysis
with gr.TabItem("πŸ” Text Analysis"):
with gr.Row():
with gr.Column(scale=1):
text_input = gr.Textbox(label="Enter text to analyze", placeholder="Paste text here...", lines=8)
with gr.Row():
analyze_btn = gr.Button("πŸ” Analyze", variant="primary", size="lg")
clear_btn = gr.Button("πŸ—‘οΈ Clear")
sample_dd = gr.Dropdown(list(SAMPLES.keys()), label="πŸ“š Load Sample")
with gr.Column(scale=1):
score_out = gr.HTML()
pills_out = gr.HTML()
with gr.Row():
radar_out = gr.Plot(label="ESG Radar")
bars_out = gr.Plot(label="Confidence Scores")
with gr.Accordion("πŸ“ Detailed Analysis", open=True):
highlight_out = gr.HTML()
explain_out = gr.Markdown()
analyze_btn.click(analyze_text, [text_input], [pills_out, highlight_out, explain_out, radar_out, bars_out, score_out])
clear_btn.click(lambda: ("", "", "", "", None, None, ""), outputs=[text_input, pills_out, highlight_out, explain_out, radar_out, bars_out, score_out])
sample_dd.change(lambda x: SAMPLES.get(x, ""), [sample_dd], [text_input])
# Tab 2: Batch Analysis
with gr.TabItem("πŸ“ Batch Analysis"):
gr.Markdown("### Upload CSV or TXT for bulk ESG analysis")
with gr.Row():
file_in = gr.File(label="Upload File", file_types=[".csv", ".txt"])
batch_btn = gr.Button("πŸ“Š Analyze Batch", variant="primary", size="lg")
stats_out = gr.HTML()
with gr.Row():
dist_out = gr.Plot(label="Distribution")
trend_out = gr.Plot(label="Score Trends")
table_out = gr.Dataframe(wrap=True)
batch_btn.click(analyze_batch, [file_in], [stats_out, table_out, dist_out, trend_out])
# Tab 3: About
with gr.TabItem("ℹ️ About"):
gr.Markdown("""
## 🌍 ESG Intelligence Platform
### Classification Categories
| Category | Icon | Description |
|----------|------|-------------|
| **Environmental (E)** | 🌿 | Climate, emissions, energy, waste, biodiversity |
| **Social (S)** | πŸ‘₯ | Labor practices, diversity, health & safety |
| **Governance (G)** | βš–οΈ | Board structure, ethics, transparency, compliance |
| **Non-ESG** | πŸ“„ | General business content |
### Model Architecture
- **Base**: Qwen3-Embedding-8B (4096-dim embeddings)
- **Classification**: Logistic Regression Ensemble with balanced class weights
- **Validation**: 5-fold MultilabelStratifiedKFold
- **Threshold Optimization**: Per-class + joint macro-F1 optimization
### Performance
| Metric | Score |
|--------|-------|
| Macro F1 | **0.82+** |
| Environmental F1 | 0.78 |
| Social F1 | 0.85 |
| Governance F1 | 0.79 |
---
Built with ❀️ for ESG Analysis
""")
gr.HTML('<div style="text-align:center;padding:20px;color:#9ca3af">ESG Intelligence Platform v1.0</div>')
if __name__ == "__main__":
app.launch(server_name="0.0.0.0", server_port=7860, share=True)