bechir09 commited on
Commit
ed6580f
Β·
verified Β·
1 Parent(s): 4d1bb75

Upload folder using huggingface_hub

Browse files
__pycache__/app.cpython-313.pyc ADDED
Binary file (27.7 kB). View file
 
requirements.txt CHANGED
@@ -1,11 +1,4 @@
1
- # ESG Intelligence Platform
2
- # Required packages
3
-
4
  gradio>=4.0.0
5
  plotly>=5.18.0
6
  pandas>=2.0.0
7
  numpy>=1.24.0
8
- torch>=2.0.0
9
- scikit-learn>=1.3.0
10
- transformers>=4.51.0
11
- accelerate>=0.25.0
 
 
 
 
1
  gradio>=4.0.0
2
  plotly>=5.18.0
3
  pandas>=2.0.0
4
  numpy>=1.24.0
 
 
 
 
test_app.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ πŸ§ͺ ESG Intelligence Platform - Comprehensive Test Suite
3
+ Tests all functionality: classification, visualization, edge cases
4
+ """
5
+
6
+ import sys
7
+ sys.path.insert(0, '/home/bechirdardouri/Downloads/esg_app')
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ from collections import Counter
12
+
13
+ # Import the app components
14
+ from app import (
15
+ ESGClassifier, CONFIG, PATTERNS, SAMPLES,
16
+ create_radar, create_bars, create_batch_charts,
17
+ analyze_text, analyze_batch
18
+ )
19
+
20
+ class TestResults:
21
+ def __init__(self):
22
+ self.passed = 0
23
+ self.failed = 0
24
+ self.errors = []
25
+
26
+ def check(self, condition, test_name, details=""):
27
+ if condition:
28
+ self.passed += 1
29
+ print(f" βœ… {test_name}")
30
+ else:
31
+ self.failed += 1
32
+ self.errors.append(f"{test_name}: {details}")
33
+ print(f" ❌ {test_name} - {details}")
34
+
35
+ def summary(self):
36
+ total = self.passed + self.failed
37
+ print(f"\n{'='*60}")
38
+ print(f"πŸ“Š TEST SUMMARY: {self.passed}/{total} passed ({100*self.passed/total:.1f}%)")
39
+ if self.errors:
40
+ print(f"\n❌ FAILURES:")
41
+ for e in self.errors:
42
+ print(f" - {e}")
43
+ print('='*60)
44
+ return self.failed == 0
45
+
46
+ results = TestResults()
47
+
48
+ # ═══════════════════════════════════════════════════════════════════════════════
49
+ print("\n" + "="*60)
50
+ print("πŸ§ͺ TEST 1: Configuration Validation")
51
+ print("="*60)
52
+
53
+ # Test CONFIG initialization
54
+ results.check(CONFIG.labels == ['E', 'S', 'G', 'non_ESG'],
55
+ "Labels defined correctly")
56
+ results.check(all(l in CONFIG.thresholds for l in CONFIG.labels),
57
+ "Thresholds defined for all labels")
58
+ results.check(all(l in CONFIG.colors for l in CONFIG.labels),
59
+ "Colors defined for all labels")
60
+ results.check(all(l in CONFIG.icons for l in CONFIG.labels),
61
+ "Icons defined for all labels")
62
+ results.check(len(CONFIG.keywords['E']) > 10,
63
+ "Environmental keywords list is populated")
64
+ results.check(len(CONFIG.keywords['S']) > 10,
65
+ "Social keywords list is populated")
66
+ results.check(len(CONFIG.keywords['G']) > 10,
67
+ "Governance keywords list is populated")
68
+
69
+ # Test thresholds are reasonable
70
+ for label, thresh in CONFIG.thresholds.items():
71
+ results.check(0.0 < thresh < 1.0,
72
+ f"Threshold for {label} is valid ({thresh})")
73
+
74
+ # ═══════════════════════════════════════════════════════════════════════════════
75
+ print("\n" + "="*60)
76
+ print("πŸ§ͺ TEST 2: Classifier Basic Functionality")
77
+ print("="*60)
78
+
79
+ classifier = ESGClassifier()
80
+
81
+ # Test empty input
82
+ result = classifier.classify("")
83
+ results.check(result['predictions'] == ['non_ESG'],
84
+ "Empty text returns non_ESG")
85
+ results.check(result['confidence'] > 0,
86
+ "Empty text has valid confidence")
87
+
88
+ # Test None-like input
89
+ result = classifier.classify(" ")
90
+ results.check(result['predictions'] == ['non_ESG'],
91
+ "Whitespace-only text returns non_ESG")
92
+
93
+ # Test score structure
94
+ result = classifier.classify("test text")
95
+ results.check(all(l in result['scores'] for l in CONFIG.labels),
96
+ "All labels have scores")
97
+ results.check(all(0 <= s <= 1 for s in result['scores'].values()),
98
+ "All scores are in [0, 1] range")
99
+ results.check('predictions' in result and 'confidence' in result,
100
+ "Result has predictions and confidence")
101
+
102
+ # ═══════════════════════════════════════════════════════════════════════════════
103
+ print("\n" + "="*60)
104
+ print("πŸ§ͺ TEST 3: Environmental Classification")
105
+ print("="*60)
106
+
107
+ env_texts = [
108
+ "We are committed to reducing carbon emissions by 50% by 2030.",
109
+ "Our solar and wind renewable energy investments totaled $100 million.",
110
+ "The company achieved carbon neutrality through sustainable practices.",
111
+ "Deforestation in our supply chain has been reduced through conservation efforts.",
112
+ "Our waste management and recycling program diverted 90% from landfills.",
113
+ ]
114
+
115
+ for i, text in enumerate(env_texts):
116
+ result = classifier.classify(text)
117
+ has_E = 'E' in result['predictions']
118
+ results.check(has_E, f"Environmental text {i+1} detected as E",
119
+ f"Got: {result['predictions']}, Score: {result['scores']['E']:.3f}")
120
+
121
+ # ═══════════════════════════════════════════════════════════════════════════════
122
+ print("\n" + "="*60)
123
+ print("πŸ§ͺ TEST 4: Social Classification")
124
+ print("="*60)
125
+
126
+ social_texts = [
127
+ "Our diversity and inclusion initiatives increased female leadership to 40%.",
128
+ "Employee health and safety remains our top priority.",
129
+ "We invested in workforce training and community development programs.",
130
+ "The company supports human rights throughout our supply chain.",
131
+ "Worker welfare and fair labor practices are central to our operations.",
132
+ ]
133
+
134
+ for i, text in enumerate(social_texts):
135
+ result = classifier.classify(text)
136
+ has_S = 'S' in result['predictions']
137
+ results.check(has_S, f"Social text {i+1} detected as S",
138
+ f"Got: {result['predictions']}, Score: {result['scores']['S']:.3f}")
139
+
140
+ # ═══════════════════════════════════════════════════════════════════════════════
141
+ print("\n" + "="*60)
142
+ print("πŸ§ͺ TEST 5: Governance Classification")
143
+ print("="*60)
144
+
145
+ gov_texts = [
146
+ "The Board of Directors approved new governance policies.",
147
+ "Our anti-corruption and ethics compliance program was enhanced.",
148
+ "Executive compensation is now tied to transparency metrics.",
149
+ "Independent audit committee oversight was strengthened.",
150
+ "Shareholder accountability mechanisms were improved.",
151
+ ]
152
+
153
+ for i, text in enumerate(gov_texts):
154
+ result = classifier.classify(text)
155
+ has_G = 'G' in result['predictions']
156
+ results.check(has_G, f"Governance text {i+1} detected as G",
157
+ f"Got: {result['predictions']}, Score: {result['scores']['G']:.3f}")
158
+
159
+ # ═══════════════════════════════════════════════════════════════════════════════
160
+ print("\n" + "="*60)
161
+ print("πŸ§ͺ TEST 6: Non-ESG Classification")
162
+ print("="*60)
163
+
164
+ non_esg_texts = [
165
+ "Q3 revenue increased by 15% compared to last year.",
166
+ "The company acquired TechCorp for $500 million.",
167
+ "Our new product launch exceeded sales expectations.",
168
+ "Operating margins improved due to cost optimization.",
169
+ "The merger will create significant synergies.",
170
+ ]
171
+
172
+ for i, text in enumerate(non_esg_texts):
173
+ result = classifier.classify(text)
174
+ has_non_esg = 'non_ESG' in result['predictions']
175
+ esg_detected = any(l in result['predictions'] for l in ['E', 'S', 'G'])
176
+ results.check(has_non_esg or not esg_detected,
177
+ f"Non-ESG text {i+1} correctly classified",
178
+ f"Got: {result['predictions']}")
179
+
180
+ # ═══════════════════════════════════════════════════════════════════════════════
181
+ print("\n" + "="*60)
182
+ print("πŸ§ͺ TEST 7: Multi-Label Classification")
183
+ print("="*60)
184
+
185
+ multi_texts = [
186
+ ("Our sustainability report covers environmental emissions and board governance oversight.",
187
+ ['E', 'G']),
188
+ ("Employee diversity programs and carbon reduction targets were achieved.",
189
+ ['E', 'S']),
190
+ ("The board approved new worker safety and environmental policies.",
191
+ ['E', 'S', 'G']),
192
+ ]
193
+
194
+ for text, expected in multi_texts:
195
+ result = classifier.classify(text)
196
+ detected = [l for l in ['E', 'S', 'G'] if l in result['predictions']]
197
+ # Check if at least some expected labels are detected
198
+ overlap = set(detected) & set(expected)
199
+ results.check(len(overlap) > 0,
200
+ f"Multi-label: detected {detected}",
201
+ f"Expected some of {expected}")
202
+
203
+ # ═══════════════════════════════════════════════════════════════════════════════
204
+ print("\n" + "="*60)
205
+ print("πŸ§ͺ TEST 8: Keyword Finding")
206
+ print("="*60)
207
+
208
+ text = "Our carbon emissions were reduced through renewable energy and solar power investments."
209
+ keywords = classifier.find_keywords(text)
210
+
211
+ results.check('E' in keywords, "Environmental keywords found")
212
+ results.check(any(k in keywords.get('E', []) for k in ['carbon', 'emission', 'renewable', 'solar', 'energy']),
213
+ "Correct E keywords identified")
214
+
215
+ text2 = "Employee diversity and workforce training programs expanded."
216
+ keywords2 = classifier.find_keywords(text2)
217
+ results.check('S' in keywords2, "Social keywords found")
218
+
219
+ # ═══════════════════════════════════════════════════════════════════════════════
220
+ print("\n" + "="*60)
221
+ print("πŸ§ͺ TEST 9: Text Highlighting")
222
+ print("="*60)
223
+
224
+ text = "Carbon emissions and renewable energy"
225
+ keywords = {'E': ['carbon', 'renewable', 'energy']}
226
+ highlighted = classifier.highlight(text, keywords)
227
+
228
+ results.check('<span' in highlighted, "Highlighting adds span tags")
229
+ results.check('background:' in highlighted, "Highlighting includes background color")
230
+ results.check('border-radius' in highlighted, "Highlighting includes styling")
231
+
232
+ # ═══════════════════════════════════════════════════════════════════════════════
233
+ print("\n" + "="*60)
234
+ print("πŸ§ͺ TEST 10: Visualization Functions")
235
+ print("="*60)
236
+
237
+ test_scores = {'E': 0.7, 'S': 0.5, 'G': 0.3, 'non_ESG': 0.2}
238
+ test_preds = ['E', 'S']
239
+
240
+ # Test radar chart
241
+ try:
242
+ radar = create_radar(test_scores)
243
+ results.check(radar is not None, "Radar chart created successfully")
244
+ results.check(hasattr(radar, 'data'), "Radar chart has data attribute")
245
+ except Exception as e:
246
+ results.check(False, "Radar chart creation", str(e))
247
+
248
+ # Test bar chart
249
+ try:
250
+ bars = create_bars(test_scores, test_preds)
251
+ results.check(bars is not None, "Bar chart created successfully")
252
+ results.check(hasattr(bars, 'data'), "Bar chart has data attribute")
253
+ except Exception as e:
254
+ results.check(False, "Bar chart creation", str(e))
255
+
256
+ # Test batch charts
257
+ try:
258
+ test_results = [
259
+ {'scores': {'E': 0.8, 'S': 0.3, 'G': 0.2, 'non_ESG': 0.1}, 'predictions': ['E']},
260
+ {'scores': {'E': 0.2, 'S': 0.7, 'G': 0.4, 'non_ESG': 0.2}, 'predictions': ['S', 'G']},
261
+ {'scores': {'E': 0.1, 'S': 0.1, 'G': 0.1, 'non_ESG': 0.8}, 'predictions': ['non_ESG']},
262
+ ]
263
+ fig1, fig2 = create_batch_charts(test_results)
264
+ results.check(fig1 is not None and fig2 is not None, "Batch charts created successfully")
265
+ except Exception as e:
266
+ results.check(False, "Batch charts creation", str(e))
267
+
268
+ # ═══════════════════════════════════════════════════════════════════════════════
269
+ print("\n" + "="*60)
270
+ print("πŸ§ͺ TEST 11: analyze_text Function")
271
+ print("="*60)
272
+
273
+ try:
274
+ output = analyze_text("Carbon emissions reduction through renewable energy.")
275
+ results.check(len(output) == 6, "analyze_text returns 6 outputs")
276
+ results.check(isinstance(output[0], str), "Pills output is string (HTML)")
277
+ results.check(isinstance(output[1], str), "Highlighted text is string (HTML)")
278
+ results.check(isinstance(output[2], str), "Explanation is string")
279
+ results.check(output[3] is not None, "Radar chart is not None")
280
+ results.check(output[4] is not None, "Bar chart is not None")
281
+ results.check(isinstance(output[5], str), "Score HTML is string")
282
+ except Exception as e:
283
+ results.check(False, "analyze_text execution", str(e))
284
+
285
+ # ═══════════════════════════════════════════════════════════════════════════════
286
+ print("\n" + "="*60)
287
+ print("πŸ§ͺ TEST 12: Sample Texts")
288
+ print("="*60)
289
+
290
+ for name, text in SAMPLES.items():
291
+ result = classifier.classify(text)
292
+ results.check(len(result['predictions']) > 0,
293
+ f"Sample '{name}' produces predictions")
294
+ results.check(result['confidence'] > 0,
295
+ f"Sample '{name}' has valid confidence")
296
+
297
+ # ═══════════════════════════════════════════════════════════════════════════════
298
+ print("\n" + "="*60)
299
+ print("πŸ§ͺ TEST 13: Edge Cases")
300
+ print("="*60)
301
+
302
+ # Very short text
303
+ result = classifier.classify("Hi")
304
+ results.check('predictions' in result, "Very short text handled")
305
+
306
+ # Very long text
307
+ long_text = "Carbon emissions reduction. " * 100
308
+ result = classifier.classify(long_text)
309
+ results.check('predictions' in result, "Very long text handled")
310
+ results.check('E' in result['predictions'], "Long environmental text detected")
311
+
312
+ # Special characters
313
+ special_text = "Carbon emissions (CO2) - renewable energy! 🌿"
314
+ result = classifier.classify(special_text)
315
+ results.check('predictions' in result, "Special characters handled")
316
+
317
+ # Numbers in text
318
+ num_text = "We reduced carbon emissions by 50% in 2024."
319
+ result = classifier.classify(num_text)
320
+ results.check('predictions' in result, "Numbers in text handled")
321
+
322
+ # ═══════════════════════════════════════════════════════════════════════════════
323
+ print("\n" + "="*60)
324
+ print("πŸ§ͺ TEST 14: Score Consistency")
325
+ print("="*60)
326
+
327
+ # Same text should produce same scores (deterministic)
328
+ text = "Carbon emissions and renewable energy investments."
329
+ result1 = classifier.classify(text)
330
+ result2 = classifier.classify(text)
331
+
332
+ results.check(result1['scores'] == result2['scores'],
333
+ "Same input produces consistent scores")
334
+ results.check(result1['predictions'] == result2['predictions'],
335
+ "Same input produces consistent predictions")
336
+
337
+ # ═══════════════════════════════════════════════════════════════════════════════
338
+ print("\n" + "="*60)
339
+ print("πŸ§ͺ TEST 15: Threshold Behavior")
340
+ print("="*60)
341
+
342
+ # Test that predictions respect thresholds
343
+ for _ in range(10):
344
+ text = np.random.choice(list(SAMPLES.values()))
345
+ result = classifier.classify(text)
346
+
347
+ for label in CONFIG.labels:
348
+ if label in result['predictions']:
349
+ # If predicted, score should be >= threshold
350
+ results.check(result['scores'][label] >= CONFIG.thresholds[label] * 0.95, # small tolerance
351
+ f"Threshold respected for {label}",
352
+ f"Score {result['scores'][label]:.3f} < threshold {CONFIG.thresholds[label]}")
353
+ break # Just test one per sample
354
+
355
+ # ═══════════════════════════════════════════════════════════════════════════════
356
+ # FINAL SUMMARY
357
+ # ═══════════════════════════════════════════════════════════════════════════════
358
+
359
+ success = results.summary()
360
+ sys.exit(0 if success else 1)
test_batch.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ πŸ§ͺ ESG App - Batch Processing Test
3
+ """
4
+ import sys
5
+ sys.path.insert(0, '/home/bechirdardouri/Downloads/esg_app')
6
+
7
+ import pandas as pd
8
+ from app import analyze_batch
9
+ from unittest.mock import MagicMock
10
+
11
+ class MockFile:
12
+ def __init__(self, path):
13
+ self.name = path
14
+
15
+ print("="*60)
16
+ print("πŸ§ͺ BATCH PROCESSING TEST")
17
+ print("="*60)
18
+
19
+ # Test with CSV file
20
+ file = MockFile('/home/bechirdardouri/Downloads/esg_app/test_data.csv')
21
+ stats, table, fig1, fig2 = analyze_batch(file)
22
+
23
+ print("\nπŸ“Š Stats HTML generated:", "βœ…" if stats and '<div' in stats else "❌")
24
+ print("πŸ“‹ Table generated:", "βœ…" if table is not None else "❌")
25
+ print("πŸ“ˆ Distribution chart:", "βœ…" if fig1 is not None else "❌")
26
+ print("πŸ“‰ Trend chart:", "βœ…" if fig2 is not None else "❌")
27
+
28
+ if table is not None:
29
+ print(f"\nπŸ“‹ Processed {len(table)} documents")
30
+ print("\nResults Preview:")
31
+ print(table.to_string(index=False))
32
+
33
+ # Count predictions
34
+ e_count = table['E'].str.contains('βœ“').sum()
35
+ s_count = table['S'].str.contains('βœ“').sum()
36
+ g_count = table['G'].str.contains('βœ“').sum()
37
+
38
+ print(f"\nπŸ“Š Classification Summary:")
39
+ print(f" 🌿 Environmental: {e_count}/10")
40
+ print(f" πŸ‘₯ Social: {s_count}/10")
41
+ print(f" βš–οΈ Governance: {g_count}/10")
42
+
43
+ # Verify expected classifications
44
+ expected = {
45
+ 1: 'E', # carbon emissions, renewable energy
46
+ 2: 'S', # diversity, employee
47
+ 3: 'G', # Board, governance, anti-corruption
48
+ 4: 'non_ESG', # revenue, product sales
49
+ 5: 'E', # waste, water conservation
50
+ 6: 'S', # Worker safety, community, training
51
+ 7: 'G', # Executive compensation, audit
52
+ 8: 'non_ESG', # merger, synergies
53
+ 9: 'E', # Solar, wind, energy
54
+ 10: 'S', # Human rights, labor
55
+ }
56
+
57
+ print("\nπŸ” Detailed Verification:")
58
+ correct = 0
59
+ for idx, row in table.iterrows():
60
+ doc_id = row['ID']
61
+ labels = row['Labels']
62
+ exp = expected.get(doc_id, 'non_ESG')
63
+ match = exp in labels or (exp == 'non_ESG' and 'non_ESG' in labels)
64
+ status = "βœ…" if match else "⚠️"
65
+ correct += 1 if match else 0
66
+ print(f" Doc {doc_id}: Expected {exp}, Got {labels} {status}")
67
+
68
+ print(f"\nπŸ“Š Accuracy: {correct}/10 ({100*correct/10:.0f}%)")
69
+
70
+ # Test None file
71
+ print("\nπŸ§ͺ Testing None file input...")
72
+ result = analyze_batch(None)
73
+ print(" None file handled:", "βœ…" if "upload" in result[0].lower() else "❌")
74
+
75
+ print("\n" + "="*60)
76
+ print("βœ… BATCH PROCESSING TESTS COMPLETE")
77
+ print("="*60)
test_data.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ text
2
+ Our company reduced carbon emissions by 40% through renewable energy investments.
3
+ We expanded our diversity and inclusion programs for employee wellbeing.
4
+ The Board approved new governance policies and anti-corruption measures.
5
+ Q3 revenue grew 15% year-over-year driven by product sales.
6
+ Sustainability efforts include waste reduction and water conservation.
7
+ Worker safety and community training programs were enhanced.
8
+ Executive compensation transparency and audit compliance improved.
9
+ The merger created synergies and expanded market presence.
10
+ Solar and wind power now provide 60% of our energy needs.
11
+ Human rights and fair labor practices guide our supply chain.