GirishaBuilds01 commited on
Commit
e4c527f
Β·
verified Β·
1 Parent(s): 1c87fd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +275 -158
app.py CHANGED
@@ -1,209 +1,326 @@
1
  """
2
- ESG Document Intelligence Prototype
3
- Qdrant vector search + Discourse Graph
4
  """
5
 
6
  import gradio as gr
7
  import re
8
  import json
9
  from pathlib import Path
 
10
 
11
- # ── lazy imports ──────────────────────────────────────────────────────────────
12
- def get_qdrant():
13
- from qdrant_client import QdrantClient
14
- from qdrant_client.models import Distance, VectorParams, PointStruct
15
- return QdrantClient, Distance, VectorParams, PointStruct
16
-
17
- def get_embedder():
18
- from sentence_transformers import SentenceTransformer
19
- return SentenceTransformer("all-MiniLM-L6-v2")
20
-
21
- def get_pdfplumber():
22
- import pdfplumber
23
- return pdfplumber
24
-
25
- # ── Config ────────────────────────────────────────────────────────────────────
26
- COLLECTION = "esg"
27
- DIM = 384
28
 
29
  GREENWASHING_KW = [
30
  "carbon neutral", "net-zero", "net zero", "zero emissions",
31
  "100% renewable", "carbon offset", "zero waste", "eco-friendly",
32
- "fully sustainable", "nature positive", "carbon negative"
 
33
  ]
34
 
35
- ESG_KW = {
36
- "🌿 Environmental": ["carbon","emission","climate","renewable","energy","water","waste","pollution","solar","biodiversity"],
37
- "πŸ‘₯ Social": ["employee","diversity","inclusion","health","safety","human rights","labor","gender","community"],
38
- "πŸ›οΈ Governance": ["board","audit","compliance","ethics","transparency","corruption","disclosure","regulation","policy"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
 
41
- # ── State ─────────────────────────────────────────────────────────────────────
42
- state = {"client": None, "embedder": None, "chunks": [], "name": "", "ready": False}
43
-
44
- # ── Init ──────────────────────────────────────────────────────────────────────
45
- def init():
46
- if state["embedder"] is None:
47
- state["embedder"] = get_embedder()
48
- if state["client"] is None:
49
- QdrantClient, Distance, VectorParams, _ = get_qdrant()
50
- c = QdrantClient(":memory:")
51
- c.recreate_collection(COLLECTION, vectors_config=VectorParams(size=DIM, distance=Distance.COSINE))
52
- state["client"] = c
53
-
54
- # ── PDF + chunking ────────────────────────────────────────────────────────────
55
- def load_pdf(path):
56
- pdfplumber = get_pdfplumber()
57
  pages = []
58
  with pdfplumber.open(path) as pdf:
59
  for i, p in enumerate(pdf.pages):
60
  t = (p.extract_text() or "").strip()
61
  if t:
62
- pages.append({"page": i+1, "text": t})
63
  return pages
64
 
65
- def chunk(pages, size=250):
66
- out = []
 
 
 
 
 
 
67
  for pg in pages:
68
- words = pg["text"].split()
69
- for s in range(0, len(words), size):
70
- t = " ".join(words[s:s+size])
71
- if len(t) > 30:
72
- out.append({"page": pg["page"], "text": t})
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  return out
74
 
75
- # ── Discourse role ────────────────────────────────────────────────────────────
76
- def role(text):
 
 
 
77
  t = text.lower()
78
- if any(k in t for k in GREENWASHING_KW): return "claim"
79
- if any(k in t for k in ["%","tonnes","kwh","mwh"]): return "evidence"
80
- if any(k in t for k in ["target","goal","by 2030","by 2050","we will","commitment"]): return "policy"
81
- if any(k in t for k in ["kpi","metric","indicator"]): return "metric"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  return "context"
83
 
84
- # ── Handlers ──────────────────────────────────────────────────────────────────
85
- def process(pdf):
86
- if pdf is None: return "⚠️ Upload a PDF first."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  try:
88
- init()
89
- _, _, _, PointStruct = get_qdrant()
90
- pages = load_pdf(pdf.name)
91
- chunks = chunk(pages)
92
- embeds = state["embedder"].encode([c["text"] for c in chunks], batch_size=32, normalize_embeddings=True)
93
- state["client"].recreate_collection(COLLECTION,
94
- vectors_config=__import__("qdrant_client").models.VectorParams(size=DIM,
95
- distance=__import__("qdrant_client").models.Distance.COSINE))
96
- pts = [PointStruct(id=i, vector=e.tolist(), payload={"page": c["page"], "text": c["text"]})
97
- for i,(c,e) in enumerate(zip(chunks, embeds))]
98
- state["client"].upsert(COLLECTION, pts)
99
- state["chunks"] = chunks
100
- state["name"] = Path(pdf.name).name
101
- state["ready"] = True
102
- roles = {}
103
- for c in chunks:
104
- r = role(c["text"]); roles[r] = roles.get(r,0)+1
105
- return (f"βœ… **{state['name']}** processed\n\n"
106
- f"- Pages: **{len(pages)}** | Chunks: **{len(chunks)}**\n"
107
- f"- Discourse nodes: `{json.dumps(roles)}`\n\n"
108
- "Explore the tabs β†’")
109
  except Exception as e:
110
- return f"❌ {e}"
111
-
112
- def ask(q):
113
- if not state["ready"]: return "⚠️ Upload a document first.", ""
114
- if not q.strip(): return "⚠️ Enter a question.", ""
115
- vec = state["embedder"].encode([q], normalize_embeddings=True)[0].tolist()
116
- hits = state["client"].search(COLLECTION, vec, limit=4, with_payload=True)
117
- ans = f"**Results from {state['name']}:**\n\n"
118
- evid = "### πŸ“Ž Retrieved Evidence\n\n"
 
 
 
 
 
 
 
 
 
119
  for i, h in enumerate(hits, 1):
120
- pg, txt = h.payload["page"], h.payload["text"]
121
- r = role(txt)
122
- ans += f"πŸ“„ **Page {pg}:** {txt[:280]}…\n\n"
123
- evid += f"**[{i}] Page {pg} | score {h.score:.3f} | role `{r}`**\n> {txt[:220]}…\n\n"
124
- return ans, evid
125
-
126
- def esg_scores():
127
- if not state["ready"]: return "⚠️ Upload a document first."
128
- text = " ".join(c["text"] for c in state["chunks"]).lower()
129
- counts = {k: sum(text.count(w) for w in ws) for k,ws in ESG_KW.items()}
130
- total = sum(counts.values()) or 1
131
- scores = {k: round(v/total*100,1) for k,v in counts.items()}
132
- overall = round(sum(scores.values())/3, 1)
133
- def bar(v): return "β–ˆ"*int(v/5) + "β–‘"*(20-int(v/5))
134
- rows = "\n".join(f"| {k} | {v}% | `{bar(v)}` |" for k,v in scores.items())
135
- return (f"## πŸ“Š ESG Scores β€” *{state['name']}*\n\n"
136
- f"| Pillar | Score | Bar |\n|--------|-------|-----|\n{rows}\n"
137
- f"| ⭐ Overall | **{overall}%** | `{bar(overall)}` |\n\n"
138
- "> Keyword-density proxy scores.")
139
-
140
- def greenwashing():
141
- if not state["ready"]: return "⚠️ Upload a document first."
142
- flags, seen = [], set()
143
- for c in state["chunks"]:
144
- t = c["text"].lower()
145
- matched = [k for k in GREENWASHING_KW if k in t]
146
- if matched:
147
- key = (c["page"], matched[0])
148
- if key not in seen:
149
- seen.add(key)
150
- verified = any(w in t for w in ["certified","verified","audited","third party","sbti"])
151
- flags.append({"page":c["page"],"kws":matched,"snip":c["text"][:200],"ok":verified})
152
- if not flags: return "βœ… No greenwashing keywords found."
 
 
 
 
 
153
  bad = [f for f in flags if not f["ok"]]
154
  good = [f for f in flags if f["ok"]]
155
- out = [f"## 🚨 Greenwashing β€” *{state['name']}*\n",
156
- f"{len(bad)} unverified ⚠️ | {len(good)} with evidence βœ…\n\n---\n"]
 
 
157
  if bad:
158
- out.append("### ⚠️ Unverified\n")
159
  for f in bad:
160
- out.append(f"πŸ“ **Page {f['page']}** β€” `{'`, `'.join(f['kws'])}`\n> {f['snip']}…\n")
 
161
  if good:
162
- out.append("\n### βœ… Evidenced\n")
163
  for f in good:
164
- out.append(f"πŸ“ **Page {f['page']}** β€” `{'`, `'.join(f['kws'])}`\n> {f['snip']}…\n")
 
165
  return "\n".join(out)
166
 
167
- def graph():
168
- if not state["ready"]: return "⚠️ Upload a document first."
169
- roles = {}
170
- for c in state["chunks"]:
171
- r = role(c["text"]); roles[r] = roles.get(r,0)+1
172
- rows = "\n".join(f"| `{r}` | {n} |" for r,n in sorted(roles.items(), key=lambda x:-x[1]))
173
- return (f"## πŸ•ΈοΈ Discourse Graph β€” *{state['name']}*\n\n"
174
- f"| Role | Chunks |\n|------|--------|\n{rows}\n\n"
175
- "**Edges modelled:**\n"
176
- "- `follows` β€” sequential chunks\n"
177
- "- `supported_by` β€” claim β†’ evidence\n"
178
- "- `measured_by` β€” policy β†’ metric\n\n"
179
- "> Graph expansion enables multi-hop HyperRAG retrieval.")
180
-
181
- # ── UI ────────────────────────────────────────────────────────────────────────
182
- with gr.Blocks(title="ESG Intelligence") as demo:
183
- gr.Markdown("# 🌿 ESG Document Intelligence\n*Qdrant semantic search · Discourse graph reasoning*")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  with gr.Tab("πŸ“€ Upload"):
186
- f = gr.File(label="ESG Report PDF", file_types=[".pdf"])
187
- btn = gr.Button("Process", variant="primary")
188
- out = gr.Markdown("Upload a PDF and click Process.")
189
- btn.click(process, f, out)
190
 
191
  with gr.Tab("πŸ’¬ Q&A"):
192
- q = gr.Textbox(label="Question", placeholder="What are the carbon reduction targets?")
193
- btn2 = gr.Button("Ask", variant="primary")
194
- ans = gr.Markdown()
195
- ev = gr.Markdown()
196
- gr.Examples([["What are Scope 1 and 2 emissions?"],["What diversity initiatives exist?"],
197
- ["What are the renewable energy targets?"],["What governance policies are in place?"]], q)
198
- btn2.click(ask, q, [ans, ev])
 
 
 
 
 
 
199
 
200
  with gr.Tab("πŸ“Š ESG Scores"):
201
- gr.Button("Compute", variant="primary").click(esg_scores, outputs=gr.Markdown())
 
 
202
 
203
  with gr.Tab("🚨 Greenwashing"):
204
- gr.Button("Detect", variant="primary").click(greenwashing, outputs=gr.Markdown())
 
 
205
 
206
- with gr.Tab("πŸ•ΈοΈ Discourse Graph"):
207
- gr.Button("Show", variant="primary").click(graph, outputs=gr.Markdown())
 
 
208
 
209
  demo.launch()
 
1
  """
2
+ ESG Report Analyser β€” working prototype for HuggingFace Spaces
3
+ No ML models. No vector DB. Just pdfplumber + Gradio. Fully functional.
4
  """
5
 
6
  import gradio as gr
7
  import re
8
  import json
9
  from pathlib import Path
10
+ from collections import Counter
11
 
12
+ # ─────────────────────────────────────────────────────────────────────────────
13
+ # CONFIG
14
+ # ─────────────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  GREENWASHING_KW = [
17
  "carbon neutral", "net-zero", "net zero", "zero emissions",
18
  "100% renewable", "carbon offset", "zero waste", "eco-friendly",
19
+ "fully sustainable", "nature positive", "carbon negative",
20
+ "climate positive", "green certified", "biodegradable"
21
  ]
22
 
23
+ ESG = {
24
+ "Environmental": ["carbon","emission","climate","renewable","energy","water",
25
+ "waste","pollution","solar","wind","biodiversity","greenhouse",
26
+ "deforestation","recycl","fossil"],
27
+ "Social": ["employee","diversity","inclusion","health","safety",
28
+ "human rights","labour","labor","gender","community",
29
+ "training","wellbeing","wage","stakeholder"],
30
+ "Governance": ["board","audit","compliance","ethics","transparent",
31
+ "corruption","disclosure","regulation","policy",
32
+ "shareholder","executive","accountability","risk"]
33
+ }
34
+
35
+ SECTORS = {
36
+ "Energy & Utilities": ["oil","gas","electricity","utility","power plant"],
37
+ "Finance & Banking": ["bank","investment","portfolio","loan","insurance"],
38
+ "Technology": ["software","data center","cloud","semiconductor"],
39
+ "Manufacturing": ["factory","manufacturing","production","supply chain"],
40
+ "Consumer Goods": ["retail","consumer","packaging","brand","fmcg"],
41
+ "Healthcare": ["pharmaceutical","medical","hospital","clinical"],
42
+ "Agriculture & Food": ["agriculture","food","farming","crop","livestock"],
43
+ "Transportation": ["aviation","shipping","fleet","transport","logistics"],
44
  }
45
 
46
+ # ─────────────────────────────────────────────��───────────────────────────────
47
+ # STATE
48
+ # ─────────────────────────────────────────────────────────────────────────────
49
+
50
+ doc = {"pages": [], "text": "", "name": ""} # always reset on new upload
51
+
52
+ # ─────────────────────────────────────────────────────────────────────────────
53
+ # PDF PARSING
54
+ # ─────────────────────────────────────────────────────────────────────────────
55
+
56
+ def parse_pdf(path):
57
+ import pdfplumber
 
 
 
 
58
  pages = []
59
  with pdfplumber.open(path) as pdf:
60
  for i, p in enumerate(pdf.pages):
61
  t = (p.extract_text() or "").strip()
62
  if t:
63
+ pages.append({"page": i + 1, "text": t})
64
  return pages
65
 
66
+ # ─────────────────────────────────────────────────────────────────────────────
67
+ # SEARCH (simple sentence-level keyword ranking β€” no model needed)
68
+ # ─────────────────────────────────────────────────────────────────────────────
69
+
70
+ def search(query, pages, top_k=5):
71
+ """Split every page into sentences, score by query word overlap, return best."""
72
+ q_words = set(re.sub(r"[^\w\s]", "", query.lower()).split())
73
+ candidates = []
74
  for pg in pages:
75
+ # split on period / newline
76
+ sentences = re.split(r"(?<=[.!?])\s+|\n", pg["text"])
77
+ for sent in sentences:
78
+ if len(sent.split()) < 5:
79
+ continue
80
+ score = sum(sent.lower().count(w) for w in q_words)
81
+ if score > 0:
82
+ candidates.append({"page": pg["page"], "text": sent.strip(), "score": score})
83
+ candidates.sort(key=lambda x: -x["score"])
84
+ # deduplicate by first 60 chars
85
+ seen, out = set(), []
86
+ for c in candidates:
87
+ key = c["text"][:60]
88
+ if key not in seen:
89
+ seen.add(key)
90
+ out.append(c)
91
+ if len(out) == top_k:
92
+ break
93
  return out
94
 
95
+ # ─────────────────────────────────────────────────────────────────────────────
96
+ # ANALYSIS HELPERS
97
+ # ─────────────────────────────────────────────────────────────────────────────
98
+
99
+ def esg_scores(text):
100
  t = text.lower()
101
+ raw = {k: sum(t.count(w) for w in ws) for k, ws in ESG.items()}
102
+ total = sum(raw.values()) or 1
103
+ return {k: round(v / total * 100, 1) for k, v in raw.items()}
104
+
105
+ def detect_sector(text):
106
+ t = text.lower()
107
+ hits = {s: sum(t.count(w) for w in ws) for s, ws in SECTORS.items()}
108
+ best = max(hits, key=hits.get)
109
+ return best if hits[best] > 0 else "General / Diversified"
110
+
111
+ def greenwash_flags(pages):
112
+ flags, seen = [], set()
113
+ for pg in pages:
114
+ t = pg["text"].lower()
115
+ matched = [kw for kw in GREENWASHING_KW if kw in t]
116
+ for kw in matched:
117
+ if (pg["page"], kw) not in seen:
118
+ seen.add((pg["page"], kw))
119
+ # grab the sentence containing the keyword
120
+ sentences = re.split(r"(?<=[.!?])\s+|\n", pg["text"])
121
+ snip = next((s for s in sentences if kw in s.lower()), pg["text"][:180])
122
+ verified = any(w in t for w in ["certified","verified","audited","third party","sbti","independently"])
123
+ flags.append({"page": pg["page"], "kw": kw, "snip": snip[:220], "ok": verified})
124
+ return flags
125
+
126
+ def classify_sentence(s):
127
+ t = s.lower()
128
+ if any(k in t for k in GREENWASHING_KW): return "claim"
129
+ if any(k in t for k in ["%","tonne","kwh","mwh","litre","gallon"]): return "evidence"
130
+ if any(k in t for k in ["target","goal","by 2030","by 2050","we will","commit"]): return "policy"
131
+ if any(k in t for k in ["kpi","metric","indicator","index"]): return "metric"
132
  return "context"
133
 
134
+ def build_graph_summary(pages):
135
+ role_counts = Counter()
136
+ edges = {"follows": 0, "claim→evidence": 0, "policy→metric": 0}
137
+ prev_role = None
138
+ for pg in pages:
139
+ sentences = re.split(r"(?<=[.!?])\s+|\n", pg["text"])
140
+ for sent in sentences:
141
+ if len(sent.split()) < 4:
142
+ continue
143
+ r = classify_sentence(sent)
144
+ role_counts[r] += 1
145
+ if prev_role:
146
+ edges["follows"] += 1
147
+ if prev_role == "claim" and r == "evidence":
148
+ edges["claim→evidence"] += 1
149
+ if prev_role == "policy" and r == "metric":
150
+ edges["policy→metric"] += 1
151
+ prev_role = r
152
+ return role_counts, edges
153
+
154
+ # ─────────────────────────────────────────────────────────────────────────────
155
+ # GRADIO HANDLERS
156
+ # ─────────────────────────────────────────────────────────────────────────────
157
+
158
+ def handle_upload(pdf):
159
+ if pdf is None:
160
+ return "⚠️ Upload a PDF file."
161
  try:
162
+ pages = parse_pdf(pdf.name)
163
+ if not pages:
164
+ return "❌ No text found. Make sure the PDF is not a scanned image."
165
+ doc["pages"] = pages
166
+ doc["text"] = " ".join(p["text"] for p in pages)
167
+ doc["name"] = Path(pdf.name).name
168
+ role_c, _ = build_graph_summary(pages)
169
+ return (
170
+ f"βœ… **{doc['name']}** loaded\n\n"
171
+ f"- **{len(pages)} pages** parsed\n"
172
+ f"- **{sum(role_c.values())} sentences** analysed\n"
173
+ f"- Node roles: `{dict(role_c)}`\n\n"
174
+ "Use the tabs above to explore the report."
175
+ )
 
 
 
 
 
 
 
176
  except Exception as e:
177
+ return f"❌ Error: {e}"
178
+
179
+
180
+ def handle_qa(question):
181
+ if not doc["pages"]:
182
+ return "⚠️ Upload a document first.", ""
183
+ if not question.strip():
184
+ return "⚠️ Type a question.", ""
185
+
186
+ hits = search(question, doc["pages"])
187
+ if not hits:
188
+ return "Nothing relevant found. Try different keywords.", ""
189
+
190
+ answer = f"### Answer β€” *{doc['name']}*\n\n"
191
+ for h in hits:
192
+ answer += f"**Page {h['page']}:** {h['text']}\n\n"
193
+
194
+ evidence = "### πŸ“Ž Matched Sentences\n\n"
195
  for i, h in enumerate(hits, 1):
196
+ r = classify_sentence(h["text"])
197
+ evidence += f"**[{i}] Page {h['page']} Β· role `{r}` Β· score {h['score']}**\n> {h['text']}\n\n"
198
+
199
+ return answer, evidence
200
+
201
+
202
+ def handle_scores():
203
+ if not doc["pages"]:
204
+ return "⚠️ Upload a document first."
205
+ scores = esg_scores(doc["text"])
206
+ sector = detect_sector(doc["text"])
207
+ overall = round(sum(scores.values()) / 3, 1)
208
+
209
+ def bar(v):
210
+ f = min(int(v / 5), 20)
211
+ return "β–ˆ" * f + "β–‘" * (20 - f)
212
+
213
+ icons = {"Environmental": "🌿", "Social": "πŸ‘₯", "Governance": "πŸ›οΈ"}
214
+ rows = "\n".join(
215
+ f"| {icons[k]} {k} | {v}% | `{bar(v)}` |"
216
+ for k, v in scores.items()
217
+ )
218
+ return (
219
+ f"## πŸ“Š ESG Scores β€” *{doc['name']}*\n\n"
220
+ f"| Pillar | Score | Bar |\n|--------|-------|-----|\n{rows}\n"
221
+ f"| ⭐ Overall | **{overall}%** | `{bar(overall)}` |\n\n"
222
+ f"**Sector detected:** {sector}\n\n"
223
+ "> Scores reflect keyword frequency across the report."
224
+ )
225
+
226
+
227
+ def handle_greenwash():
228
+ if not doc["pages"]:
229
+ return "⚠️ Upload a document first."
230
+ flags = greenwash_flags(doc["pages"])
231
+ if not flags:
232
+ return "βœ… No greenwashing keywords detected in this document."
233
+
234
  bad = [f for f in flags if not f["ok"]]
235
  good = [f for f in flags if f["ok"]]
236
+
237
+ out = [f"## 🚨 Greenwashing Scan β€” *{doc['name']}*\n",
238
+ f"**{len(bad)} unverified ⚠️** &nbsp;|&nbsp; **{len(good)} evidenced βœ…**\n\n---\n"]
239
+
240
  if bad:
241
+ out.append("### ⚠️ Unverified Claims\n")
242
  for f in bad:
243
+ out.append(f"πŸ“ **Page {f['page']}** β€” `{f['kw']}`\n> {f['snip']}\n")
244
+
245
  if good:
246
+ out.append("\n### βœ… Claims With Supporting Evidence\n")
247
  for f in good:
248
+ out.append(f"πŸ“ **Page {f['page']}** β€” `{f['kw']}`\n> {f['snip']}\n")
249
+
250
  return "\n".join(out)
251
 
252
+
253
+ def handle_graph():
254
+ if not doc["pages"]:
255
+ return "⚠️ Upload a document first."
256
+ role_c, edges = build_graph_summary(doc["pages"])
257
+ total_nodes = sum(role_c.values())
258
+ total_edges = sum(edges.values())
259
+
260
+ role_rows = "\n".join(
261
+ f"| `{r}` | {n} | {round(n/total_nodes*100,1)}% |"
262
+ for r, n in role_c.most_common()
263
+ )
264
+ edge_rows = "\n".join(f"| `{e}` | {n} |" for e, n in edges.items())
265
+
266
+ return (
267
+ f"## πŸ•ΈοΈ Discourse Graph β€” *{doc['name']}*\n\n"
268
+ f"**{total_nodes} nodes** (sentences) Β· **{total_edges} edges**\n\n"
269
+ f"### Node Roles\n| Role | Count | Share |\n|------|-------|-------|\n{role_rows}\n\n"
270
+ f"### Edge Types\n| Relation | Count |\n|----------|-------|\n{edge_rows}\n\n"
271
+ "**How edges are inferred:**\n"
272
+ "- Every consecutive sentence pair β†’ `follows`\n"
273
+ "- `claim` followed by `evidence` → `claim→evidence`\n"
274
+ "- `policy` followed by `metric` → `policy→metric`\n\n"
275
+ "> These relations power multi-hop retrieval: a question hitting a **claim** node "
276
+ "automatically expands to its linked **evidence** nodes."
277
+ )
278
+
279
+ # ─────────────────────────────────────────────────────────────────────────────
280
+ # UI
281
+ # ─────────────────────────────────────────────────────────────────────────────
282
+
283
+ with gr.Blocks(title="ESG Analyser") as demo:
284
+
285
+ gr.Markdown(
286
+ "# 🌿 ESG Report Analyser\n"
287
+ "Upload a sustainability / ESG report PDF and explore it instantly."
288
+ )
289
 
290
  with gr.Tab("πŸ“€ Upload"):
291
+ up_file = gr.File(label="ESG Report (PDF)", file_types=[".pdf"])
292
+ up_btn = gr.Button("Process Document", variant="primary")
293
+ up_out = gr.Markdown("Upload a PDF above and click **Process Document**.")
294
+ up_btn.click(handle_upload, up_file, up_out)
295
 
296
  with gr.Tab("πŸ’¬ Q&A"):
297
+ q_box = gr.Textbox(label="Ask anything about the report",
298
+ placeholder="e.g. What are the carbon reduction targets?")
299
+ q_btn = gr.Button("Ask", variant="primary")
300
+ q_ans = gr.Markdown()
301
+ q_ev = gr.Markdown()
302
+ gr.Examples([
303
+ ["What are the Scope 1 and 2 emissions?"],
304
+ ["What diversity and inclusion initiatives are mentioned?"],
305
+ ["What renewable energy commitments has the company made?"],
306
+ ["What governance and audit policies are described?"],
307
+ ["How does the company manage supply chain risks?"],
308
+ ], inputs=q_box)
309
+ q_btn.click(handle_qa, q_box, [q_ans, q_ev])
310
 
311
  with gr.Tab("πŸ“Š ESG Scores"):
312
+ s_btn = gr.Button("Compute ESG Scores", variant="primary")
313
+ s_out = gr.Markdown()
314
+ s_btn.click(handle_scores, outputs=s_out)
315
 
316
  with gr.Tab("🚨 Greenwashing"):
317
+ g_btn = gr.Button("Scan for Greenwashing", variant="primary")
318
+ g_out = gr.Markdown()
319
+ g_btn.click(handle_greenwash, outputs=g_out)
320
 
321
+ with gr.Tab("πŸ•ΈοΈ Graph"):
322
+ d_btn = gr.Button("Build Discourse Graph", variant="primary")
323
+ d_out = gr.Markdown()
324
+ d_btn.click(handle_graph, outputs=d_out)
325
 
326
  demo.launch()