Spaces:

bechir09
/

ESG_Intelligence_Platform

Sleeping

App Files Files Community

bechir09 commited on Feb 15

Commit

ed6580f

verified ·

1 Parent(s): 4d1bb75

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

__pycache__/app.cpython-313.pyc +0 -0
requirements.txt +0 -7
test_app.py +360 -0
test_batch.py +77 -0
test_data.csv +11 -0

__pycache__/app.cpython-313.pyc ADDED Viewed

Binary file (27.7 kB). View file

requirements.txt CHANGED Viewed

@@ -1,11 +1,4 @@
-# ESG Intelligence Platform
-# Required packages
 gradio>=4.0.0
 plotly>=5.18.0
 pandas>=2.0.0
 numpy>=1.24.0
-torch>=2.0.0
-scikit-learn>=1.3.0
-transformers>=4.51.0
-accelerate>=0.25.0

 gradio>=4.0.0
 plotly>=5.18.0
 pandas>=2.0.0
 numpy>=1.24.0

test_app.py ADDED Viewed

	@@ -0,0 +1,360 @@

+"""
+🧪 ESG Intelligence Platform - Comprehensive Test Suite
+Tests all functionality: classification, visualization, edge cases
+"""
+import sys
+sys.path.insert(0, '/home/bechirdardouri/Downloads/esg_app')
+import numpy as np
+import pandas as pd
+from collections import Counter
+# Import the app components
+from app import (
+    ESGClassifier, CONFIG, PATTERNS, SAMPLES,
+    create_radar, create_bars, create_batch_charts,
+    analyze_text, analyze_batch
+)
+class TestResults:
+    def __init__(self):
+        self.passed = 0
+        self.failed = 0
+        self.errors = []
+    def check(self, condition, test_name, details=""):
+        if condition:
+            self.passed += 1
+            print(f"  ✅ {test_name}")
+        else:
+            self.failed += 1
+            self.errors.append(f"{test_name}: {details}")
+            print(f"  ❌ {test_name} - {details}")
+    def summary(self):
+        total = self.passed + self.failed
+        print(f"\n{'='*60}")
+        print(f"📊 TEST SUMMARY: {self.passed}/{total} passed ({100*self.passed/total:.1f}%)")
+        if self.errors:
+            print(f"\n❌ FAILURES:")
+            for e in self.errors:
+                print(f"   - {e}")
+        print('='*60)
+        return self.failed == 0
+results = TestResults()
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 1: Configuration Validation")
+print("="*60)
+# Test CONFIG initialization
+results.check(CONFIG.labels == ['E', 'S', 'G', 'non_ESG'],
+              "Labels defined correctly")
+results.check(all(l in CONFIG.thresholds for l in CONFIG.labels),
+              "Thresholds defined for all labels")
+results.check(all(l in CONFIG.colors for l in CONFIG.labels),
+              "Colors defined for all labels")
+results.check(all(l in CONFIG.icons for l in CONFIG.labels),
+              "Icons defined for all labels")
+results.check(len(CONFIG.keywords['E']) > 10,
+              "Environmental keywords list is populated")
+results.check(len(CONFIG.keywords['S']) > 10,
+              "Social keywords list is populated")
+results.check(len(CONFIG.keywords['G']) > 10,
+              "Governance keywords list is populated")
+# Test thresholds are reasonable
+for label, thresh in CONFIG.thresholds.items():
+    results.check(0.0 < thresh < 1.0,
+                  f"Threshold for {label} is valid ({thresh})")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 2: Classifier Basic Functionality")
+print("="*60)
+classifier = ESGClassifier()
+# Test empty input
+result = classifier.classify("")
+results.check(result['predictions'] == ['non_ESG'],
+              "Empty text returns non_ESG")
+results.check(result['confidence'] > 0,
+              "Empty text has valid confidence")
+# Test None-like input
+result = classifier.classify("   ")
+results.check(result['predictions'] == ['non_ESG'],
+              "Whitespace-only text returns non_ESG")
+# Test score structure
+result = classifier.classify("test text")
+results.check(all(l in result['scores'] for l in CONFIG.labels),
+              "All labels have scores")
+results.check(all(0 <= s <= 1 for s in result['scores'].values()),
+              "All scores are in [0, 1] range")
+results.check('predictions' in result and 'confidence' in result,
+              "Result has predictions and confidence")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 3: Environmental Classification")
+print("="*60)
+env_texts = [
+    "We are committed to reducing carbon emissions by 50% by 2030.",
+    "Our solar and wind renewable energy investments totaled $100 million.",
+    "The company achieved carbon neutrality through sustainable practices.",
+    "Deforestation in our supply chain has been reduced through conservation efforts.",
+    "Our waste management and recycling program diverted 90% from landfills.",
+]
+for i, text in enumerate(env_texts):
+    result = classifier.classify(text)
+    has_E = 'E' in result['predictions']
+    results.check(has_E, f"Environmental text {i+1} detected as E",
+                  f"Got: {result['predictions']}, Score: {result['scores']['E']:.3f}")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 4: Social Classification")
+print("="*60)
+social_texts = [
+    "Our diversity and inclusion initiatives increased female leadership to 40%.",
+    "Employee health and safety remains our top priority.",
+    "We invested in workforce training and community development programs.",
+    "The company supports human rights throughout our supply chain.",
+    "Worker welfare and fair labor practices are central to our operations.",
+]
+for i, text in enumerate(social_texts):
+    result = classifier.classify(text)
+    has_S = 'S' in result['predictions']
+    results.check(has_S, f"Social text {i+1} detected as S",
+                  f"Got: {result['predictions']}, Score: {result['scores']['S']:.3f}")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 5: Governance Classification")
+print("="*60)
+gov_texts = [
+    "The Board of Directors approved new governance policies.",
+    "Our anti-corruption and ethics compliance program was enhanced.",
+    "Executive compensation is now tied to transparency metrics.",
+    "Independent audit committee oversight was strengthened.",
+    "Shareholder accountability mechanisms were improved.",
+]
+for i, text in enumerate(gov_texts):
+    result = classifier.classify(text)
+    has_G = 'G' in result['predictions']
+    results.check(has_G, f"Governance text {i+1} detected as G",
+                  f"Got: {result['predictions']}, Score: {result['scores']['G']:.3f}")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 6: Non-ESG Classification")
+print("="*60)
+non_esg_texts = [
+    "Q3 revenue increased by 15% compared to last year.",
+    "The company acquired TechCorp for $500 million.",
+    "Our new product launch exceeded sales expectations.",
+    "Operating margins improved due to cost optimization.",
+    "The merger will create significant synergies.",
+]
+for i, text in enumerate(non_esg_texts):
+    result = classifier.classify(text)
+    has_non_esg = 'non_ESG' in result['predictions']
+    esg_detected = any(l in result['predictions'] for l in ['E', 'S', 'G'])
+    results.check(has_non_esg or not esg_detected,
+                  f"Non-ESG text {i+1} correctly classified",
+                  f"Got: {result['predictions']}")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 7: Multi-Label Classification")
+print("="*60)
+multi_texts = [
+    ("Our sustainability report covers environmental emissions and board governance oversight.",
+     ['E', 'G']),
+    ("Employee diversity programs and carbon reduction targets were achieved.",
+     ['E', 'S']),
+    ("The board approved new worker safety and environmental policies.",
+     ['E', 'S', 'G']),
+]
+for text, expected in multi_texts:
+    result = classifier.classify(text)
+    detected = [l for l in ['E', 'S', 'G'] if l in result['predictions']]
+    # Check if at least some expected labels are detected
+    overlap = set(detected) & set(expected)
+    results.check(len(overlap) > 0,
+                  f"Multi-label: detected {detected}",
+                  f"Expected some of {expected}")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 8: Keyword Finding")
+print("="*60)
+text = "Our carbon emissions were reduced through renewable energy and solar power investments."
+keywords = classifier.find_keywords(text)
+results.check('E' in keywords, "Environmental keywords found")
+results.check(any(k in keywords.get('E', []) for k in ['carbon', 'emission', 'renewable', 'solar', 'energy']),
+              "Correct E keywords identified")
+text2 = "Employee diversity and workforce training programs expanded."
+keywords2 = classifier.find_keywords(text2)
+results.check('S' in keywords2, "Social keywords found")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 9: Text Highlighting")
+print("="*60)
+text = "Carbon emissions and renewable energy"
+keywords = {'E': ['carbon', 'renewable', 'energy']}
+highlighted = classifier.highlight(text, keywords)
+results.check('<span' in highlighted, "Highlighting adds span tags")
+results.check('background:' in highlighted, "Highlighting includes background color")
+results.check('border-radius' in highlighted, "Highlighting includes styling")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 10: Visualization Functions")
+print("="*60)
+test_scores = {'E': 0.7, 'S': 0.5, 'G': 0.3, 'non_ESG': 0.2}
+test_preds = ['E', 'S']
+# Test radar chart
+try:
+    radar = create_radar(test_scores)
+    results.check(radar is not None, "Radar chart created successfully")
+    results.check(hasattr(radar, 'data'), "Radar chart has data attribute")
+except Exception as e:
+    results.check(False, "Radar chart creation", str(e))
+# Test bar chart
+try:
+    bars = create_bars(test_scores, test_preds)
+    results.check(bars is not None, "Bar chart created successfully")
+    results.check(hasattr(bars, 'data'), "Bar chart has data attribute")
+except Exception as e:
+    results.check(False, "Bar chart creation", str(e))
+# Test batch charts
+try:
+    test_results = [
+        {'scores': {'E': 0.8, 'S': 0.3, 'G': 0.2, 'non_ESG': 0.1}, 'predictions': ['E']},
+        {'scores': {'E': 0.2, 'S': 0.7, 'G': 0.4, 'non_ESG': 0.2}, 'predictions': ['S', 'G']},
+        {'scores': {'E': 0.1, 'S': 0.1, 'G': 0.1, 'non_ESG': 0.8}, 'predictions': ['non_ESG']},
+    ]
+    fig1, fig2 = create_batch_charts(test_results)
+    results.check(fig1 is not None and fig2 is not None, "Batch charts created successfully")
+except Exception as e:
+    results.check(False, "Batch charts creation", str(e))
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 11: analyze_text Function")
+print("="*60)
+try:
+    output = analyze_text("Carbon emissions reduction through renewable energy.")
+    results.check(len(output) == 6, "analyze_text returns 6 outputs")
+    results.check(isinstance(output[0], str), "Pills output is string (HTML)")
+    results.check(isinstance(output[1], str), "Highlighted text is string (HTML)")
+    results.check(isinstance(output[2], str), "Explanation is string")
+    results.check(output[3] is not None, "Radar chart is not None")
+    results.check(output[4] is not None, "Bar chart is not None")
+    results.check(isinstance(output[5], str), "Score HTML is string")
+except Exception as e:
+    results.check(False, "analyze_text execution", str(e))
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 12: Sample Texts")
+print("="*60)
+for name, text in SAMPLES.items():
+    result = classifier.classify(text)
+    results.check(len(result['predictions']) > 0,
+                  f"Sample '{name}' produces predictions")
+    results.check(result['confidence'] > 0,
+                  f"Sample '{name}' has valid confidence")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 13: Edge Cases")
+print("="*60)
+# Very short text
+result = classifier.classify("Hi")
+results.check('predictions' in result, "Very short text handled")
+# Very long text
+long_text = "Carbon emissions reduction. " * 100
+result = classifier.classify(long_text)
+results.check('predictions' in result, "Very long text handled")
+results.check('E' in result['predictions'], "Long environmental text detected")
+# Special characters
+special_text = "Carbon emissions (CO2) - renewable energy! 🌿"
+result = classifier.classify(special_text)
+results.check('predictions' in result, "Special characters handled")
+# Numbers in text
+num_text = "We reduced carbon emissions by 50% in 2024."
+result = classifier.classify(num_text)
+results.check('predictions' in result, "Numbers in text handled")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 14: Score Consistency")
+print("="*60)
+# Same text should produce same scores (deterministic)
+text = "Carbon emissions and renewable energy investments."
+result1 = classifier.classify(text)
+result2 = classifier.classify(text)
+results.check(result1['scores'] == result2['scores'],
+              "Same input produces consistent scores")
+results.check(result1['predictions'] == result2['predictions'],
+              "Same input produces consistent predictions")
+# ═══════════════════════════════════════════════════════════════════════════════
+print("\n" + "="*60)
+print("🧪 TEST 15: Threshold Behavior")
+print("="*60)
+# Test that predictions respect thresholds
+for _ in range(10):
+    text = np.random.choice(list(SAMPLES.values()))
+    result = classifier.classify(text)
+    for label in CONFIG.labels:
+        if label in result['predictions']:
+            # If predicted, score should be >= threshold
+            results.check(result['scores'][label] >= CONFIG.thresholds[label] * 0.95,  # small tolerance
+                          f"Threshold respected for {label}",
+                          f"Score {result['scores'][label]:.3f} < threshold {CONFIG.thresholds[label]}")
+            break  # Just test one per sample
+# ═══════════════════════════════════════════════════════════════════════════════
+# FINAL SUMMARY
+# ═══════════════════════════════════════════════════════════════════════════════
+success = results.summary()
+sys.exit(0 if success else 1)

test_batch.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+🧪 ESG App - Batch Processing Test
+"""
+import sys
+sys.path.insert(0, '/home/bechirdardouri/Downloads/esg_app')
+import pandas as pd
+from app import analyze_batch
+from unittest.mock import MagicMock
+class MockFile:
+    def __init__(self, path):
+        self.name = path
+print("="*60)
+print("🧪 BATCH PROCESSING TEST")
+print("="*60)
+# Test with CSV file
+file = MockFile('/home/bechirdardouri/Downloads/esg_app/test_data.csv')
+stats, table, fig1, fig2 = analyze_batch(file)
+print("\n📊 Stats HTML generated:", "✅" if stats and '<div' in stats else "❌")
+print("📋 Table generated:", "✅" if table is not None else "❌")
+print("📈 Distribution chart:", "✅" if fig1 is not None else "❌")
+print("📉 Trend chart:", "✅" if fig2 is not None else "❌")
+if table is not None:
+    print(f"\n📋 Processed {len(table)} documents")
+    print("\nResults Preview:")
+    print(table.to_string(index=False))
+# Count predictions
+e_count = table['E'].str.contains('✓').sum()
+s_count = table['S'].str.contains('✓').sum()
+g_count = table['G'].str.contains('✓').sum()
+print(f"\n📊 Classification Summary:")
+print(f"  🌿 Environmental: {e_count}/10")
+print(f"  👥 Social: {s_count}/10")
+print(f"  ⚖️ Governance: {g_count}/10")
+# Verify expected classifications
+expected = {
+    1: 'E',  # carbon emissions, renewable energy
+    2: 'S',  # diversity, employee
+    3: 'G',  # Board, governance, anti-corruption
+    4: 'non_ESG',  # revenue, product sales
+    5: 'E',  # waste, water conservation
+    6: 'S',  # Worker safety, community, training
+    7: 'G',  # Executive compensation, audit
+    8: 'non_ESG',  # merger, synergies
+    9: 'E',  # Solar, wind, energy
+    10: 'S',  # Human rights, labor
+}
+print("\n🔍 Detailed Verification:")
+correct = 0
+for idx, row in table.iterrows():
+    doc_id = row['ID']
+    labels = row['Labels']
+    exp = expected.get(doc_id, 'non_ESG')
+    match = exp in labels or (exp == 'non_ESG' and 'non_ESG' in labels)
+    status = "✅" if match else "⚠️"
+    correct += 1 if match else 0
+    print(f"  Doc {doc_id}: Expected {exp}, Got {labels} {status}")
+print(f"\n📊 Accuracy: {correct}/10 ({100*correct/10:.0f}%)")
+# Test None file
+print("\n🧪 Testing None file input...")
+result = analyze_batch(None)
+print("  None file handled:", "✅" if "upload" in result[0].lower() else "❌")
+print("\n" + "="*60)
+print("✅ BATCH PROCESSING TESTS COMPLETE")
+print("="*60)

test_data.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+text
+Our company reduced carbon emissions by 40% through renewable energy investments.
+We expanded our diversity and inclusion programs for employee wellbeing.
+The Board approved new governance policies and anti-corruption measures.
+Q3 revenue grew 15% year-over-year driven by product sales.
+Sustainability efforts include waste reduction and water conservation.
+Worker safety and community training programs were enhanced.
+Executive compensation transparency and audit compliance improved.
+The merger created synergies and expanded market presence.
+Solar and wind power now provide 60% of our energy needs.
+Human rights and fair labor practices guide our supply chain.