rishabh5752 commited on
Commit
d9733a9
Β·
verified Β·
1 Parent(s): f50095b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -300
app.py CHANGED
@@ -1,319 +1,127 @@
1
- # app.py ──────────────────────────────────────────────────────────────────────
2
- import os, json, tempfile, unicodedata, textwrap, re
3
  import gradio as gr
4
- from fpdf import FPDF
5
- from transformers import pipeline
6
- from langchain.document_loaders import PyPDFLoader
7
- from langchain.vectorstores import FAISS
8
- from langchain.embeddings import HuggingFaceEmbeddings
9
- import plotly.graph_objects as go
10
 
11
- # ---------- 0 | External corpora ------------------------------------------------
12
- POLICY_URLS = {
13
- "DPDP Act 2023": "https://www.meity.gov.in/static/uploads/2024/06/2bf1f0e9f04e6fb4f8fef35e82c42aa5.pdf",
14
- "Responsible AI (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2021-08/Part2-Responsible-AI-12082021.pdf",
15
- "National AI Strategy (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2023-03/National-Strategy-for-Artificial-Intelligence.pdf",
16
- "IS 17428-1 (Data Privacy Assurance)": "https://archive.org/download/gov.in.is.17428.1.2020/gov.in.is.17428.1.2020.pdf",
17
- "RBI FREE-AI Framework 2025": "https://assets.kpmg.com/content/dam/kpmgsites/in/pdf/2025/08/rbi-free-ai-committee-report-on-framework-for-responsible-and-ethical-enablement-of-artificial-intelligence.pdf.coredownload.inline.pdf",
18
- "OECD AI Principles": "https://oecd.ai/en/assets/files/OECD-LEGAL-0449-en.pdf",
19
- "EU AI Act 2024": "https://eur-lex.europa.eu/resource.html?uri=cellar:99db59ed-3b7b-11ef-9e3c-01aa75ed71a1.0001.02/DOC_1&format=PDF",
20
- "ISO/IEC 42001:2023": "https://standards.iteh.ai/catalog/standards/iso/44d7188c-9cb8-4f0f-a358-06c7ce3e64f9/iso-iec-42001-2023.pdf",
21
- "ISO/IEC 23894:2023": "https://cdn.standards.iteh.ai/samples/77304/cb803ee4e9624430a5db177459158b24/ISO-IEC-23894-2023.pdf",
22
- }
23
-
24
- INDUSTRY_MAP = {
25
- "Finance": ["DPDP Act 2023", "RBI FREE-AI Framework 2025",
26
- "IS 17428-1 (Data Privacy Assurance)", "OECD AI Principles"],
27
- "Health Care": ["DPDP Act 2023", "Responsible AI (NITI Aayog)",
28
- "ISO/IEC 23894:2023", "OECD AI Principles"],
29
- "E-Commerce": ["DPDP Act 2023", "IS 17428-1 (Data Privacy Assurance)",
30
- "OECD AI Principles", "EU AI Act 2024"],
31
- "All": list(POLICY_URLS.keys()),
32
- }
33
-
34
- # ---------- 1 | Local-LLM & embedding models -----------------------------------
35
- print("Loading local language models …")
36
- rewriter = pipeline(
37
- "text2text-generation",
38
- model="google/flan-t5-large", # <-β€” self-hosted, no key needed
39
- tokenizer="google/flan-t5-large",
40
- device_map="auto",
41
- )
42
-
43
- embeddings = HuggingFaceEmbeddings(
44
- model_name="sentence-transformers/all-MiniLM-L6-v2"
45
- )
46
-
47
- # ---------- 2 | Build vector DB (one-off at start-up) --------------------------
48
- print("Indexing policy PDFs for RAG … (first run can take ~1-2 min)")
49
- docs = []
50
- for name, url in POLICY_URLS.items():
51
- # cached downloads by langchain, so subsequent runs are fast
52
- try:
53
- loader = PyPDFLoader(url)
54
- docs += loader.load_and_split(chunk_size=1000)
55
- except Exception as e:
56
- print(f"⚠️ Could not load {name}: {e}")
57
-
58
- vectordb = FAISS.from_documents(docs, embeddings)
59
-
60
- # ---------- 3 | Survey definition ---------------------------------------------
61
  QUESTIONS = [
62
- {"label": "Company / Project Name", "type": "text", "key": "company"},
63
- {"label": "Industry", "type": "dropdown", "choices": list(INDUSTRY_MAP.keys())[:-1], "key": "industry"},
64
- {"label": "1. Is data encryption implemented?", "type": "radio", "choices": ["Yes", "No", "Partially"], "key": "encrypt"},
65
- {"label": "2. Are regular security audits conducted?", "type": "radio", "choices": ["Yes", "No", "Partially"], "key": "audit"},
66
- {"label": "3. Is the privacy policy up to date?", "type": "radio", "choices": ["Yes", "No", "Partially"], "key": "privacy"},
67
- {"label": "4. Employee training conducted for:", "type": "checkbox", "choices": ["Technical Staff", "HR", "All Employees", "None"], "key": "training"},
68
- {"label": "5. Access-control maturity (1-5):", "type": "slider", "min": 1, "max": 5, "key": "access"},
69
- {"label": "6. Are third-party vendors assessed?", "type": "radio", "choices": ["Yes", "No", "Sometimes"], "key": "vendor"},
70
- {"label": "7. Additional notes (optional)", "type": "text_area", "key": "notes"},
 
 
 
 
 
 
71
  ]
72
 
73
- # ---------- 4 | Helper: PDF cleans --------------------------------------------
74
- def clean(txt: str) -> str:
75
- """Strip characters FPDF can't handle."""
76
- txt = (
77
- txt.replace("β€œ", '"').replace("”", '"')
78
- .replace("’", "'").replace("β€˜", "'")
79
- .replace("–", "-").replace("β€”", "-").replace("-", "-")
80
- )
81
- return unicodedata.normalize("NFKD", txt).encode("latin1", "ignore").decode("latin1")
82
-
83
- # ---------- 5 | Few-shot examples for the LLM chain ---------------------------
84
- SYSTEM = (
85
- "You are a senior AI governance & compliance analyst. "
86
- "Given raw survey answers, you must:\n"
87
- "1. Classify each answer into findings with title, severity (Low/Med/High), "
88
- " detail, likelihood (1-5) and impact (1-5).\n"
89
- "2. Draft one concrete remediation action per finding "
90
- " (title, priority P1-P3, owner role).\n"
91
- "3. Compute an overall maturity_score 0-100 (higher is better).\n"
92
- "4. Return ONLY valid JSON with schema:\n"
93
- "{"
94
- '"maturity_score": int,'
95
- '"findings":[{"title":str,"severity":str,"detail":str,"likelihood":int,"impact":int}],'
96
- '"actions":[{"title":str,"priority":str,"owner":str}]}'
97
- )
98
-
99
- EXAMPLES = [
100
- {
101
- "answers": {
102
- "encrypt": "No",
103
- "audit": "Partially",
104
- "privacy": "Yes",
105
- "training": ["Technical Staff"],
106
- "access": 2,
107
- "vendor": "Sometimes",
108
- "notes": ""
109
- },
110
- "output": {
111
- "maturity_score": 45,
112
- "findings": [
113
- {"title": "Unencrypted data at rest",
114
- "severity": "High", "detail": "Sensitive data is stored unencrypted.",
115
- "likelihood": 4, "impact": 5}
116
- ],
117
- "actions": [
118
- {"title": "Implement AES-256 at rest",
119
- "priority": "P1", "owner": "CISO"}
120
- ]
121
- }
122
- },
123
- {
124
- "answers": {
125
- "encrypt": "Yes",
126
- "audit": "Yes",
127
- "privacy": "Partially",
128
- "training": ["All Employees"],
129
- "access": 4,
130
- "vendor": "No",
131
- "notes": "Using many SaaS tools"
132
- },
133
- "output": {
134
- "maturity_score": 72,
135
- "findings": [
136
- {"title": "Outdated privacy policy",
137
- "severity": "Medium", "detail": "Policy not reviewed in last 18 months.",
138
- "likelihood": 3, "impact": 3}
139
- ],
140
- "actions": [
141
- {"title": "Refresh privacy notice",
142
- "priority": "P2", "owner": "Legal"}
143
- ]
144
- }
145
- }
146
- ]
147
-
148
- # ---------- 6 | LLM-driven analysis with RAG -----------------------------------
149
- def rag_context(industry: str) -> str:
150
- """Return up to three relevant doc snippets for the chosen industry."""
151
- framework_names = INDUSTRY_MAP.get(industry, INDUSTRY_MAP["All"])
152
- query = " , ".join(framework_names) + " AI compliance best practice"
153
- rel_docs = vectordb.similarity_search(query, k=3)
154
- return "\n\n".join(d.page_content for d in rel_docs)
155
-
156
- def llm_json(prompt: str, max_tokens=512) -> dict:
157
- """Generate JSON string via local T5, then safely parse."""
158
- raw = rewriter(prompt, max_new_tokens=max_tokens)[0]["generated_text"]
159
- # simple fallback: extract first {...} block
160
- try:
161
- json_txt = re.search(r"\{.*\}", raw, re.S).group(0)
162
- return json.loads(json_txt)
163
- except Exception:
164
- # best-effort cleanup
165
- json_txt = raw.split("}", 1)[0] + "}"
166
- return json.loads(json_txt)
167
-
168
- def analyse_llm(resp: dict):
169
- prompt = textwrap.dedent(f"""
170
- ### System
171
- {SYSTEM}
172
-
173
- ### Examples
174
- {json.dumps(EXAMPLES, indent=2)}
175
-
176
- ### Context (policy references)
177
- {rag_context(resp.get('industry','All'))}
178
-
179
- ### User Answers
180
- {json.dumps(resp, indent=2)}
181
-
182
- ### Task
183
- Produce the JSON schema described above.
184
- """)
185
- data = llm_json(prompt)
186
- findings = data.get("findings", [])
187
- actions = data.get("actions", [])
188
- maturity = data.get("maturity_score", 0)
189
-
190
- # Dynamic risk score = avg(likelihood Γ— impact) scaled 0-100
191
- if findings:
192
- risk = sum(f["likelihood"] * f["impact"] for f in findings) / (len(findings)*25) * 100
193
- else:
194
- risk = 0
195
- return findings, actions, maturity, risk, data
196
-
197
- # ---------- 7 | Renderers ------------------------------------------------------
198
- def findings_md(findings):
199
- out = []
200
- for f in findings:
201
- out.append(f"- **{f['title']}** ({f['severity']}) \n"
202
- f" Likelihood {f['likelihood']}/5 Β· Impact {f['impact']}/5 \n"
203
- f" {f['detail']}")
204
- return "\n".join(out)
205
-
206
- def actions_md(actions):
207
- out = []
208
- for a in actions:
209
- out.append(f"- **{a['title']}** β€” {a['priority']} Β· _Owner: {a['owner']}_")
210
- return "\n".join(out)
211
 
212
- def json_to_markdown(data, company):
213
- md = f"### πŸ“‹ Compliance Report β€” **{company}**\n"
214
- md += f"**Overall Maturity Score:** {data['maturity_score']}/100\n\n"
215
- md += "#### Key Findings\n" + findings_md(data["findings"]) + "\n\n"
216
- md += "#### Recommended Actions\n" + actions_md(data["actions"])
217
- return md
 
218
 
219
- def make_gauge(score: float):
220
- fig = go.Figure(go.Indicator(
221
- mode="gauge+number",
222
- value=score,
223
- gauge={
224
- "axis": {"range": [0, 100]},
225
- "bar": {"thickness": 0.3},
226
- "steps": [
227
- {"range": [0, 40], "color": "red"},
228
- {"range": [40, 70], "color": "yellow"},
229
- {"range": [70, 100],"color": "green"},
230
- ],
231
- },
232
- number={"suffix": "%"},
233
- title={"text": "Overall Risk"}
234
- ))
235
- fig.update_layout(height=250, margin=dict(t=20, b=0, l=0, r=0))
236
- return fig
237
 
238
- def to_pdf(markdown: str, company: str):
 
239
  pdf = FPDF()
240
  pdf.set_auto_page_break(auto=True, margin=15)
241
  pdf.add_page()
242
- pdf.set_font("Arial", "B", 14)
243
- pdf.multi_cell(0, 10, clean(f"Compliance Report – {company}"), align="C")
244
- pdf.ln(4)
245
- pdf.set_font("Arial", "", 11)
246
- for line in markdown.splitlines():
247
- pdf.multi_cell(0, 8, clean(line))
248
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
249
- pdf.output(tmp.name)
250
- return tmp.name
251
 
252
- # ---------- 8 | Gradio UI ------------------------------------------------------
253
- with gr.Blocks(title="πŸ›‘οΈ AI-Driven Compliance & Governance Assistant") as demo:
254
- gr.Markdown("## πŸ›‘οΈ Compliance Survey β†’ Dynamic AI Assessment\n"
255
- "Fill in the survey β†’ get a JSON-backed report, live risk gauge, "
256
- "policy-aware recommendations, and a follow-up chatbot.")
257
-
258
- widgets = {}
259
- with gr.Row():
260
- with gr.Column(scale=6):
261
- for q in QUESTIONS:
262
- if q["type"] == "text":
263
- widgets[q["key"]] = gr.Textbox(label=q["label"])
264
- elif q["type"] == "dropdown":
265
- widgets[q["key"]] = gr.Dropdown(
266
- q["choices"], label=q["label"], value=q["choices"][0])
267
- elif q["type"] == "radio":
268
- widgets[q["key"]] = gr.Radio(q["choices"], label=q["label"])
269
- elif q["type"] == "checkbox":
270
- widgets[q["key"]] = gr.CheckboxGroup(q["choices"], label=q["label"])
271
- elif q["type"] == "slider":
272
- widgets[q["key"]] = gr.Slider(
273
- q["min"], q["max"], 3, label=q["label"])
274
- else:
275
- widgets[q["key"]] = gr.Textbox(label=q["label"], lines=3)
276
-
277
- btn = gr.Button("πŸš€ Generate Report", size="lg")
278
 
279
- with gr.Column(scale=4):
280
- gauge_plot = gr.Plot(label="Overall Risk Gauge")
281
- out_report = gr.Markdown(label="πŸ“‘ AI-Generated Report")
282
- out_pdf = gr.File(label="πŸ“„ Download PDF")
 
 
283
 
284
- # --- Chatbot panel ---------------------------------------------------------
285
- chatbot = gr.Chatbot(label="πŸ’¬ Ask follow-up questions", height=250)
286
- chat_inp = gr.Textbox(
287
- placeholder="Ask about a finding, regulation link, etc. - hit Enter ↡",
288
- label="Type a question & press Enter")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
- # ---------- 9 | Callbacks --------------------------------------------------
291
- def run(*vals):
292
- data = dict(zip(widgets.keys(), vals))
293
- findings, actions, maturity, risk, full_json = analyse_llm(data)
294
 
295
- md = json_to_markdown(full_json, data.get("company", "[Unnamed]"))
296
- fig = make_gauge(risk)
297
- pdf_path = to_pdf(md, data.get("company", "[Unnamed]"))
298
- return fig, md, pdf_path
299
 
300
- btn.click(
301
- fn=run,
302
- inputs=list(widgets.values()),
303
- outputs=[gauge_plot, out_report, out_pdf],
304
  )
305
 
306
- # ----- Chatbot -------------------------------------------------------------
307
- def reply(user_msg, chat_history):
308
- # simple RAG for follow-ups
309
- context = rag_context("All")
310
- prompt = (f"Context snippets:\n{context}\n\n"
311
- f"User: {user_msg}\nAssistant:")
312
- answer = rewriter(prompt, max_new_tokens=256)[0]["generated_text"]
313
- chat_history.append((user_msg, answer.strip()))
314
- return "", chat_history
315
-
316
- chat_inp.submit(reply, [chat_inp, chatbot], [chat_inp, chatbot])
317
-
318
- if __name__ == "__main__":
319
- demo.launch()
 
1
+ import os, tempfile, datetime
 
2
  import gradio as gr
3
+ import pandas as pd
4
+ from fpdf import FPDF # pure-python PDF generator – no wkhtmltopdf needed
 
 
 
 
5
 
6
+ # ---------- Quiz Definition ---------- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  QUESTIONS = [
8
+ "1. Governance framework is documented and communicated across the organisation.",
9
+ "2. Roles & responsibilities for AI oversight are clearly assigned.",
10
+ "3. Data lineage is captured and auditable for all production models.",
11
+ "4. Privacy impact assessments are performed before every new AI use-case.",
12
+ "5. Model cards or equivalent documentation exist for each deployed model.",
13
+ "6. Bias / fairness metrics are monitored post-deployment.",
14
+ "7. Incident response playbooks cover AI system failures & ethics breaches.",
15
+ "8. Third-party models and datasets are licensed and risk-assessed.",
16
+ "9. KPIs link AI outcomes to business & societal value.",
17
+ "10. Continuous training keeps staff aware of AI policy updates.",
18
+ "11. Security controls protect model artefacts and inference endpoints.",
19
+ "12. Explainability techniques are applied commensurate with model impact.",
20
+ "13. Human-in-the-loop overrides exist for high-risk decisions.",
21
+ "14. End-of-life or rollback criteria are defined for all models.",
22
+ "15. Governance performance is reviewed by senior leadership at least quarterly.",
23
  ]
24
 
25
+ TIERS = {
26
+ "Initial": (1.0, 2.0),
27
+ "Repeatable": (2.01, 2.5),
28
+ "Defined": (2.51, 3.5),
29
+ "Managed": (3.51, 4.5),
30
+ "Optimized": (4.51, 5.0),
31
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ ACTIONS = {
34
+ "Initial": "Kick-off a cross-functional task-force, map critical use-cases, prioritise policy creation.",
35
+ "Repeatable": "Formalise processes; introduce mandatory model documentation & basic monitoring.",
36
+ "Defined": "Scale governance with automated lineage capture, bias dashboards, and internal audits.",
37
+ "Managed": "Integrate governance KPIs into OKRs; adopt continuous compliance tooling.",
38
+ "Optimized": "Benchmark externally (OECD, ISO 42001), publish transparency reports, champion open-governance.",
39
+ }
40
 
41
+ # ---------- Helper Functions ---------- #
42
+ def score_to_tier(avg: float) -> str:
43
+ """Map average score to maturity tier."""
44
+ for tier, (low, high) in TIERS.items():
45
+ if low <= avg <= high:
46
+ return tier
47
+ return "Unclassified"
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ def build_pdf(name: str, df: pd.DataFrame, avg: float, tier: str, file_path: str):
50
+ """Create a simple, policy-oriented PDF report."""
51
  pdf = FPDF()
52
  pdf.set_auto_page_break(auto=True, margin=15)
53
  pdf.add_page()
 
 
 
 
 
 
 
 
 
54
 
55
+ # Title
56
+ pdf.set_font("Helvetica", "B", 16)
57
+ pdf.cell(0, 10, "AI Governance Maturity Report", ln=1, align="C")
58
+ pdf.set_font("Helvetica", "", 12)
59
+ pdf.cell(0, 8, f"Generated on {datetime.date.today().isoformat()}", ln=1, align="C")
60
+ pdf.ln(4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # Summary
63
+ pdf.set_font("Helvetica", "B", 12)
64
+ pdf.cell(0, 8, f"Overall Score: {avg:.2f} | Tier: {tier}", ln=1)
65
+ pdf.set_font("Helvetica", "", 11)
66
+ pdf.multi_cell(0, 6, f"Next step recommendation: {ACTIONS[tier]}")
67
+ pdf.ln(4)
68
 
69
+ # Detailed table
70
+ pdf.set_font("Helvetica", "B", 11)
71
+ pdf.cell(10, 8, "#", 1)
72
+ pdf.cell(150, 8, "Question", 1)
73
+ pdf.cell(20, 8, "Score", 1, ln=1)
74
+ pdf.set_font("Helvetica", "", 10)
75
+
76
+ for idx, row in df.iterrows():
77
+ pdf.cell(10, 8, str(idx + 1), 1)
78
+ pdf.cell(150, 8, row["Question"][:65] + ("…" if len(row["Question"]) > 65 else ""), 1)
79
+ pdf.cell(20, 8, str(row["Score"]), 1, ln=1)
80
+
81
+ pdf.output(file_path)
82
+
83
+ def generate_report(*scores):
84
+ """Gradio callback β†’ returns Markdown summary + PDF path."""
85
+ scores = list(scores)
86
+ avg = sum(scores) / len(scores)
87
+ tier = score_to_tier(avg)
88
+
89
+ # DataFrame for table in PDF
90
+ df = pd.DataFrame({"Question": QUESTIONS, "Score": scores})
91
+
92
+ # Temporary PDF file
93
+ tmp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
94
+ build_pdf("Report", df, avg, tier, tmp_pdf.name)
95
+
96
+ md_summary = (
97
+ f"### βœ… Your AI Governance Tier: **{tier}** \n"
98
+ f"**Average score:** {avg:.2f} / 5.00 \n"
99
+ f"**Action plan:** {ACTIONS[tier]}"
100
+ )
101
+ return md_summary, tmp_pdf.name
102
+
103
+ # ---------- Gradio UI ---------- #
104
+ with gr.Blocks(title="Governance-GPT Quiz") as demo:
105
+ gr.Markdown(
106
+ """
107
+ # Governance-GPT Quiz
108
+ Rate each statement from **1 (Strongly Disagree)** to **5 (Strongly Agree)**.
109
+ The tool benchmarks your AI-governance maturity and produces a PDF action plan aligned with OECD AI Principles.
110
+ """
111
+ )
112
 
113
+ sliders = []
114
+ for q in QUESTIONS:
115
+ sliders.append(gr.Slider(1, 5, value=3, step=1, label=q))
 
116
 
117
+ generate_btn = gr.Button("Generate Report")
118
+ summary_md = gr.Markdown()
119
+ pdf_file = gr.File(label="⬇️ Download PDF")
 
120
 
121
+ generate_btn.click(
122
+ fn=generate_report,
123
+ inputs=sliders,
124
+ outputs=[summary_md, pdf_file],
125
  )
126
 
127
+ demo.launch()