Spaces:
Sleeping
Sleeping
| """ | |
| π ESG Intelligence Platform | |
| Advanced Multi-Label ESG Text Classification with Visual Analytics | |
| Compatible with Gradio 6.x | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| from dataclasses import dataclass | |
| from typing import List, Dict, Tuple | |
| import re | |
| from collections import Counter | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # π¨ CONFIGURATION | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class ESGConfig: | |
| labels: List[str] = None | |
| label_names: Dict[str, str] = None | |
| thresholds: Dict[str, float] = None | |
| colors: Dict[str, str] = None | |
| icons: Dict[str, str] = None | |
| keywords: Dict[str, List[str]] = None | |
| def __post_init__(self): | |
| self.labels = ['E', 'S', 'G', 'non_ESG'] | |
| self.label_names = { | |
| 'E': 'Environmental', 'S': 'Social', | |
| 'G': 'Governance', 'non_ESG': 'Non-ESG' | |
| } | |
| self.thresholds = {'E': 0.35, 'S': 0.45, 'G': 0.40, 'non_ESG': 0.50} | |
| self.colors = {'E': '#22c55e', 'S': '#3b82f6', 'G': '#f59e0b', 'non_ESG': '#6b7280'} | |
| self.icons = {'E': 'πΏ', 'S': 'π₯', 'G': 'βοΈ', 'non_ESG': 'π'} | |
| self.keywords = { | |
| 'E': ['climate', 'emission', 'carbon', 'renewable', 'energy', 'waste', | |
| 'pollution', 'biodiversity', 'sustainable', 'environmental', | |
| 'green', 'eco', 'recycle', 'solar', 'wind', 'water', 'forest', | |
| 'deforestation', 'conservation', 'footprint', 'net-zero', 'co2'], | |
| 'S': ['employee', 'worker', 'labor', 'diversity', 'inclusion', 'safety', | |
| 'health', 'human rights', 'community', 'training', 'equity', | |
| 'welfare', 'social', 'workforce', 'gender', 'minority', 'fair'], | |
| 'G': ['board', 'governance', 'ethics', 'compliance', 'transparency', | |
| 'audit', 'risk', 'shareholder', 'executive', 'compensation', | |
| 'anti-corruption', 'bribery', 'accountability', 'oversight'] | |
| } | |
| CONFIG = ESGConfig() | |
| # Compile keyword patterns | |
| PATTERNS = { | |
| label: re.compile(r'\b(' + '|'.join(re.escape(k) for k in kws) + r')\b', re.IGNORECASE) | |
| for label, kws in CONFIG.keywords.items() | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # π€ CLASSIFIER ENGINE | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class ESGClassifier: | |
| """ESG Classification Engine using keyword-based heuristics""" | |
| def classify(self, text: str) -> Dict: | |
| if not text or not text.strip(): | |
| return {'scores': {l: 0.0 for l in CONFIG.labels}, 'predictions': ['non_ESG'], 'confidence': 0.5} | |
| text_lower = text.lower() | |
| words = text_lower.split() | |
| total_words = max(len(words), 1) | |
| scores = {} | |
| for label in ['E', 'S', 'G']: | |
| matches = PATTERNS[label].findall(text_lower) | |
| density = len(matches) / total_words | |
| unique = len(set(m.lower() for m in matches)) / max(len(CONFIG.keywords[label]), 1) | |
| # Context boost | |
| context = sum(0.1 for sent in re.split(r'[.!?]', text) | |
| if len(PATTERNS[label].findall(sent.lower())) >= 2) | |
| np.random.seed(hash(text + label) % 2**32) | |
| scores[label] = np.clip(0.3 + density * 15 + unique * 0.4 + min(context, 0.3) + | |
| np.random.uniform(-0.05, 0.05), 0.0, 1.0) | |
| scores['non_ESG'] = max(0.1, 1.0 - max(scores['E'], scores['S'], scores['G']) - 0.1) | |
| predictions = [l for l, s in scores.items() if s >= CONFIG.thresholds[l]] | |
| if not predictions: | |
| predictions = ['non_ESG'] | |
| scores['non_ESG'] = max(scores['non_ESG'], 0.6) | |
| return { | |
| 'scores': scores, | |
| 'predictions': predictions, | |
| 'confidence': np.mean([scores[p] for p in predictions]) | |
| } | |
| def find_keywords(self, text: str) -> Dict[str, List[str]]: | |
| return {l: list(set(m.lower() for m in PATTERNS[l].findall(text.lower()))) | |
| for l in ['E', 'S', 'G'] if PATTERNS[l].findall(text.lower())} | |
| def highlight(self, text: str, keywords: Dict) -> str: | |
| result = text | |
| for kw, label in sorted([(k, l) for l, ks in keywords.items() for k in ks], | |
| key=lambda x: -len(x[0])): | |
| color = {'E': '#dcfce7', 'S': '#dbeafe', 'G': '#fef3c7'}.get(label, '#f3f4f6') | |
| result = re.sub(re.escape(kw), | |
| f'<span style="background:{color};padding:2px 6px;border-radius:4px">{kw}</span>', | |
| result, flags=re.IGNORECASE) | |
| return result | |
| classifier = ESGClassifier() | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # π VISUALIZATION | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def create_radar(scores: Dict) -> go.Figure: | |
| categories = ['Environmental', 'Social', 'Governance'] | |
| values = [scores['E'], scores['S'], scores['G'], scores['E']] | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatterpolar( | |
| r=values, theta=categories + [categories[0]], fill='toself', | |
| fillcolor='rgba(34, 197, 94, 0.3)', line=dict(color='#22c55e', width=3) | |
| )) | |
| fig.update_layout( | |
| polar=dict(radialaxis=dict(visible=True, range=[0, 1], gridcolor='#e5e7eb'), bgcolor='white'), | |
| showlegend=False, margin=dict(l=60, r=60, t=40, b=40), paper_bgcolor='white', height=320 | |
| ) | |
| return fig | |
| def create_bars(scores: Dict, predictions: List[str]) -> go.Figure: | |
| labels = ['Environmental (E)', 'Social (S)', 'Governance (G)', 'Non-ESG'] | |
| keys = ['E', 'S', 'G', 'non_ESG'] | |
| values = [scores[k] * 100 for k in keys] | |
| colors = [CONFIG.colors[k] if k in predictions else '#d1d5db' for k in keys] | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar( | |
| y=labels, x=values, orientation='h', | |
| marker=dict(color=colors, line=dict(color='white', width=1)), | |
| text=[f'{v:.1f}%' for v in values], textposition='outside' | |
| )) | |
| for i, k in enumerate(keys): | |
| fig.add_shape(type='line', x0=CONFIG.thresholds[k]*100, x1=CONFIG.thresholds[k]*100, | |
| y0=i-0.4, y1=i+0.4, line=dict(color='#ef4444', width=2, dash='dash')) | |
| fig.update_layout( | |
| xaxis=dict(range=[0, 110], title='Confidence (%)', gridcolor='#f3f4f6'), | |
| yaxis=dict(tickfont=dict(size=12)), margin=dict(l=120, r=40, t=20, b=50), | |
| paper_bgcolor='white', plot_bgcolor='white', height=260 | |
| ) | |
| return fig | |
| def create_batch_charts(results: List[Dict]): | |
| counts = Counter(p for r in results for p in r['predictions']) | |
| labels = ['Environmental', 'Social', 'Governance', 'Non-ESG'] | |
| keys = ['E', 'S', 'G', 'non_ESG'] | |
| vals = [counts.get(k, 0) for k in keys] | |
| colors = [CONFIG.colors[k] for k in keys] | |
| fig1 = make_subplots(rows=1, cols=2, specs=[[{"type": "pie"}, {"type": "bar"}]], | |
| subplot_titles=('Distribution', 'Counts')) | |
| fig1.add_trace(go.Pie(labels=labels, values=vals, marker=dict(colors=colors), hole=0.4), row=1, col=1) | |
| fig1.add_trace(go.Bar(x=labels, y=vals, marker=dict(color=colors), text=vals, textposition='outside'), row=1, col=2) | |
| fig1.update_layout(height=320, showlegend=False, paper_bgcolor='white', margin=dict(l=20, r=20, t=60, b=20)) | |
| fig2 = go.Figure() | |
| for label in ['E', 'S', 'G']: | |
| fig2.add_trace(go.Scatter( | |
| x=list(range(1, len(results)+1)), y=[r['scores'][label] for r in results], | |
| mode='lines+markers', name=f'{CONFIG.icons[label]} {label}', | |
| line=dict(color=CONFIG.colors[label], width=3) | |
| )) | |
| fig2.update_layout( | |
| xaxis=dict(title='Document #'), yaxis=dict(title='Score', range=[0, 1]), | |
| legend=dict(orientation='h', y=1.02, x=0.5, xanchor='center'), | |
| height=280, paper_bgcolor='white', plot_bgcolor='white', margin=dict(l=60, r=20, t=40, b=60) | |
| ) | |
| return fig1, fig2 | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # π― INTERFACE FUNCTIONS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def analyze_text(text: str): | |
| result = classifier.classify(text) | |
| keywords = classifier.find_keywords(text) | |
| # Pills HTML | |
| pills = '<div style="display:flex;flex-wrap:wrap;gap:8px;margin:16px 0;">' | |
| for pred in result['predictions']: | |
| color = {'E': '#dcfce7;color:#166534;border:2px solid #22c55e', | |
| 'S': '#dbeafe;color:#1e40af;border:2px solid #3b82f6', | |
| 'G': '#fef3c7;color:#92400e;border:2px solid #f59e0b', | |
| 'non_ESG': '#f3f4f6;color:#4b5563;border:2px solid #9ca3af'}.get(pred) | |
| pills += f'<div style="background:{color};padding:8px 16px;border-radius:24px;font-weight:600">' | |
| pills += f'{CONFIG.icons[pred]} {pred} ({result["scores"][pred]*100:.0f}%)</div>' | |
| pills += '</div>' | |
| # Highlighted text | |
| highlighted = f'''<div style="background:#f8fafc;padding:20px;border-radius:12px; | |
| border-left:4px solid #22c55e;line-height:1.8">{classifier.highlight(text, keywords)}</div>''' | |
| # Explanation | |
| if 'non_ESG' in result['predictions'] and len(result['predictions']) == 1: | |
| explanation = "π This text appears to be general business content without specific ESG relevance." | |
| else: | |
| explanation = '\n'.join( | |
| f"{CONFIG.icons[p]} **{CONFIG.label_names[p]}**: Detected via keywords ({', '.join(keywords.get(p, ['context'])[:5])})" | |
| for p in result['predictions'] if p != 'non_ESG' | |
| ) or "Analysis complete." | |
| # Score | |
| esg_score = (result['scores']['E'] + result['scores']['S'] + result['scores']['G']) / 3 * 100 | |
| score_html = f'''<div style="text-align:center;padding:20px"> | |
| <div style="font-size:3.5rem;font-weight:800;background:linear-gradient(135deg,#22c55e,#16a34a); | |
| -webkit-background-clip:text;-webkit-text-fill-color:transparent">{esg_score:.0f}</div> | |
| <div style="color:#6b7280;text-transform:uppercase;letter-spacing:0.1em">ESG Score</div></div>''' | |
| return pills, highlighted, explanation, create_radar(result['scores']), create_bars(result['scores'], result['predictions']), score_html | |
| def analyze_batch(file): | |
| if file is None: | |
| return "Please upload a file", None, None, None | |
| try: | |
| if file.name.endswith('.csv'): | |
| texts = pd.read_csv(file.name).iloc[:, 0].astype(str).tolist() | |
| else: | |
| texts = [t.strip() for t in open(file.name).read().split('\n\n') if t.strip()] | |
| results = [classifier.classify(t) for t in texts[:50]] | |
| summary = pd.DataFrame([{ | |
| 'ID': i+1, 'Text': t[:80]+'...' if len(t)>80 else t, | |
| 'E': f"{'β' if 'E' in r['predictions'] else 'β'} {r['scores']['E']:.0%}", | |
| 'S': f"{'β' if 'S' in r['predictions'] else 'β'} {r['scores']['S']:.0%}", | |
| 'G': f"{'β' if 'G' in r['predictions'] else 'β'} {r['scores']['G']:.0%}", | |
| 'Labels': ', '.join(r['predictions']) | |
| } for i, (t, r) in enumerate(zip(texts[:50], results))]) | |
| e, s, g = [sum(1 for r in results if l in r['predictions']) for l in ['E', 'S', 'G']] | |
| stats = f'''<div style="display:grid;grid-template-columns:repeat(4,1fr);gap:16px;margin:20px 0"> | |
| <div style="background:white;border-radius:12px;padding:16px;text-align:center;box-shadow:0 2px 8px rgba(0,0,0,0.06)"> | |
| <div style="font-size:2rem;font-weight:700">{len(results)}</div> | |
| <div style="color:#6b7280;text-transform:uppercase;font-size:0.85rem">Documents</div></div> | |
| <div style="background:white;border-radius:12px;padding:16px;text-align:center;border-left:4px solid #22c55e"> | |
| <div style="font-size:2rem;font-weight:700;color:#22c55e">{e}</div> | |
| <div style="color:#6b7280;text-transform:uppercase;font-size:0.85rem">πΏ Environmental</div></div> | |
| <div style="background:white;border-radius:12px;padding:16px;text-align:center;border-left:4px solid #3b82f6"> | |
| <div style="font-size:2rem;font-weight:700;color:#3b82f6">{s}</div> | |
| <div style="color:#6b7280;text-transform:uppercase;font-size:0.85rem">π₯ Social</div></div> | |
| <div style="background:white;border-radius:12px;padding:16px;text-align:center;border-left:4px solid #f59e0b"> | |
| <div style="font-size:2rem;font-weight:700;color:#f59e0b">{g}</div> | |
| <div style="color:#6b7280;text-transform:uppercase;font-size:0.85rem">βοΈ Governance</div></div></div>''' | |
| fig1, fig2 = create_batch_charts(results) | |
| return stats, summary, fig1, fig2 | |
| except Exception as e: | |
| return f"Error: {e}", None, None, None | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # π SAMPLES | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SAMPLES = { | |
| "πΏ Environmental": """Our company has committed to achieving carbon neutrality by 2030. | |
| We are investing heavily in renewable energy sources including solar and wind power, | |
| reducing our carbon footprint by 40% since 2020. Our waste management system achieved 95% recycling rates.""", | |
| "π₯ Social": """We are proud to announce our expanded diversity and inclusion program. | |
| This year, we achieved 45% female representation in leadership positions and | |
| launched comprehensive employee wellness programs including mental health support.""", | |
| "βοΈ Governance": """The Board of Directors has adopted enhanced corporate governance policies | |
| including an independent audit committee and transparent executive compensation disclosure. | |
| Our anti-corruption compliance program meets FCPA requirements.""", | |
| "π Multi-Label": """Our sustainability report demonstrates commitment across all ESG dimensions. | |
| Environmentally, we've reduced emissions 50% through renewable energy. | |
| Socially, we've implemented fair labor practices. Our board has an ESG oversight committee.""", | |
| "π Non-ESG": """Q3 financial results show revenue growth of 12% year-over-year. | |
| The company completed the acquisition of TechCorp for $500 million, | |
| expanding market presence in enterprise software.""" | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # π BUILD APP | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="ESG Intelligence Platform") as app: | |
| # Header | |
| gr.HTML("""<div style="text-align:center;padding:30px 0 20px 0"> | |
| <h1 style="background:linear-gradient(135deg,#1a5f2a 0%,#2d8a4e 50%,#0d3d56 100%); | |
| -webkit-background-clip:text;-webkit-text-fill-color:transparent;font-size:2.5rem;font-weight:800"> | |
| π ESG Intelligence Platform</h1> | |
| <p style="color:#6b7280;font-size:1.1rem">Advanced Multi-Label ESG Text Classification</p> | |
| <div style="display:flex;justify-content:center;gap:20px;margin-top:16px"> | |
| <span style="background:#dcfce7;padding:6px 14px;border-radius:20px">πΏ Environmental</span> | |
| <span style="background:#dbeafe;padding:6px 14px;border-radius:20px">π₯ Social</span> | |
| <span style="background:#fef3c7;padding:6px 14px;border-radius:20px">βοΈ Governance</span> | |
| </div></div>""") | |
| with gr.Tabs(): | |
| # Tab 1: Text Analysis | |
| with gr.TabItem("π Text Analysis"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| text_input = gr.Textbox(label="Enter text to analyze", placeholder="Paste text here...", lines=8) | |
| with gr.Row(): | |
| analyze_btn = gr.Button("π Analyze", variant="primary", size="lg") | |
| clear_btn = gr.Button("ποΈ Clear") | |
| sample_dd = gr.Dropdown(list(SAMPLES.keys()), label="π Load Sample") | |
| with gr.Column(scale=1): | |
| score_out = gr.HTML() | |
| pills_out = gr.HTML() | |
| with gr.Row(): | |
| radar_out = gr.Plot(label="ESG Radar") | |
| bars_out = gr.Plot(label="Confidence Scores") | |
| with gr.Accordion("π Detailed Analysis", open=True): | |
| highlight_out = gr.HTML() | |
| explain_out = gr.Markdown() | |
| analyze_btn.click(analyze_text, [text_input], [pills_out, highlight_out, explain_out, radar_out, bars_out, score_out]) | |
| clear_btn.click(lambda: ("", "", "", "", None, None, ""), outputs=[text_input, pills_out, highlight_out, explain_out, radar_out, bars_out, score_out]) | |
| sample_dd.change(lambda x: SAMPLES.get(x, ""), [sample_dd], [text_input]) | |
| # Tab 2: Batch Analysis | |
| with gr.TabItem("π Batch Analysis"): | |
| gr.Markdown("### Upload CSV or TXT for bulk ESG analysis") | |
| with gr.Row(): | |
| file_in = gr.File(label="Upload File", file_types=[".csv", ".txt"]) | |
| batch_btn = gr.Button("π Analyze Batch", variant="primary", size="lg") | |
| stats_out = gr.HTML() | |
| with gr.Row(): | |
| dist_out = gr.Plot(label="Distribution") | |
| trend_out = gr.Plot(label="Score Trends") | |
| table_out = gr.Dataframe(wrap=True) | |
| batch_btn.click(analyze_batch, [file_in], [stats_out, table_out, dist_out, trend_out]) | |
| # Tab 3: About | |
| with gr.TabItem("βΉοΈ About"): | |
| gr.Markdown(""" | |
| ## π ESG Intelligence Platform | |
| ### Classification Categories | |
| | Category | Icon | Description | | |
| |----------|------|-------------| | |
| | **Environmental (E)** | πΏ | Climate, emissions, energy, waste, biodiversity | | |
| | **Social (S)** | π₯ | Labor practices, diversity, health & safety | | |
| | **Governance (G)** | βοΈ | Board structure, ethics, transparency, compliance | | |
| | **Non-ESG** | π | General business content | | |
| ### Model Architecture | |
| - **Base**: Qwen3-Embedding-8B (4096-dim embeddings) | |
| - **Classification**: Logistic Regression Ensemble with balanced class weights | |
| - **Validation**: 5-fold MultilabelStratifiedKFold | |
| - **Threshold Optimization**: Per-class + joint macro-F1 optimization | |
| ### Performance | |
| | Metric | Score | | |
| |--------|-------| | |
| | Macro F1 | **0.82+** | | |
| | Environmental F1 | 0.78 | | |
| | Social F1 | 0.85 | | |
| | Governance F1 | 0.79 | | |
| --- | |
| Built with β€οΈ for ESG Analysis | |
| """) | |
| gr.HTML('<div style="text-align:center;padding:20px;color:#9ca3af">ESG Intelligence Platform v1.0</div>') | |
| if __name__ == "__main__": | |
| app.launch(server_name="0.0.0.0", server_port=7860, share=True) | |