Spaces:

rishabh5752
/

Compliance-Report_Generator

Sleeping

App Files Files Community

Compliance-Report_Generator / app.py

rishabh5752

Update app.py

eaf6ff2 verified 5 months ago

raw

history blame contribute delete

6.38 kB

	# app.py
	import os
	import datetime
	import tempfile
	import traceback
	import gradio as gr
	import pandas as pd
	from fpdf import FPDF
	import openai

	# Load API key from environment
	openai.api_key = os.getenv("OPENAI_API_KEY", "")

	# Governance assessment questions
	QUESTIONS = [
	"Governance framework is documented and communicated across the organisation.",
	"Roles & responsibilities for AI oversight are clearly assigned.",
	"KPIs link AI outcomes to business & societal value.",
	"Data lineage is captured and auditable for all production models.",
	"Privacy impact assessments are performed before every new AI use-case.",
	"Bias / fairness metrics are monitored post-deployment.",
	"Incident response playbooks cover AI system failures & ethics breaches.",
	"Third-party models and datasets are licensed and risk-assessed.",
	"Governance performance is reviewed by senior leadership at least quarterly.",
	"Security controls protect model artefacts and inference endpoints.",
	"Continuous training keeps staff aware of AI policy updates.",
	"Explainability techniques are applied commensurate with model impact.",
	"Human-in-the-loop overrides exist for high-risk decisions.",
	"End-of-life or rollback criteria are defined for all models.",
	"Model cards or equivalent documentation exist for each deployed model.",
	]

	# Map buckets to question indices
	BUCKETS = {
	"Governance & Strategy": [0, 1, 2],
	"Data & Privacy": [3, 4, 5],
	"Risk & Compliance": [6, 7, 8],
	"Security & Infrastructure": [9, 10, 11],
	"Lifecycle & Oversight": [12, 13, 14],
	}

	# Score thresholds per tier
	TIERS = {
	"Initial": (1.00, 2.00),
	"Repeatable": (2.01, 2.50),
	"Defined": (2.51, 3.50),
	"Managed": (3.51, 4.50),
	"Optimized": (4.51, 5.00),
	}

	def score_to_tier(x: float) -> str:
	# Match score to tier range
	for t, (lo, hi) in TIERS.items():
	if lo <= x <= hi:
	return t
	return "Unclassified"

	def latin1(t: str) -> str:
	# Normalize special characters for PDF encoding
	return (
	t.replace("–", "-").replace("—", "-").replace("•", "-")
	.encode("latin-1", "replace").decode("latin-1")
	)

	def llm_remediation(product: str, b_avgs: dict, overall_tier: str) -> str:
	# Generate summary using OpenAI or fallback if needed
	bucket_lines = "\n".join(f"{b}: {v:.2f}" for b, v in b_avgs.items())
	prompt = (
	f"Summarise the governance maturity for the product '{product}' at tier '{overall_tier}' "
	f"based on these bucket scores:\n{bucket_lines}\n\n"
	"First, write a one-sentence overall assessment. Then, provide 3-5 markdown bullets "
	"suggesting next actions for improvement, mentioning bucket names."
	)

	try:
	response = openai.ChatCompletion.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": "You are an expert in AI governance maturity."},
	{"role": "user", "content": prompt},
	],
	max_tokens=300,
	temperature=0.7,
	)
	out = (response["choices"][0]["message"]["content"] or "").strip()
	if len(out) > 20:
	return out
	except Exception as e:
	print("[OpenAI] ERROR:", e)
	traceback.print_exc()

	# Fallback recommendations if API call fails
	bullets = []
	for bucket, avg in b_avgs.items():
	tier = score_to_tier(avg)
	if tier in ["Initial", "Repeatable", "Defined"]:
	bullets.append(f"- {bucket}: Formalize policies, clarify owners, and address compliance gaps.")
	elif tier == "Managed":
	bullets.append(f"- {bucket}: Benchmark against peers and automate monitoring.")
	else:
	bullets.append(f"- {bucket}: Continue to optimize and share best practices.")
	return f"{product} is at '{overall_tier}' maturity. Focus on the following improvements:\n" + "\n".join(bullets)

	def build_pdf(product, df, avg, tier, path, summary):
	# Create PDF report with scores and summary
	pdf = FPDF()
	pdf.set_auto_page_break(auto=True, margin=15)
	pdf.add_page()
	pdf.set_font("Helvetica", "B", 16)
	pdf.cell(0, 10, latin1(f"AI Governance Maturity Report - {product}"), ln=1, align="C")
	pdf.set_font("Helvetica", "", 12)
	pdf.cell(0, 8, datetime.date.today().isoformat(), ln=1, align="C")
	pdf.ln(4)
	pdf.set_font("Helvetica", "B", 12)
	pdf.cell(0, 8, latin1(f"Overall Score: {avg:.2f} \| Tier: {tier}"), ln=1)
	pdf.set_font("Helvetica", "", 11)
	pdf.multi_cell(0, 6, latin1(summary))
	pdf.ln(4)
	pdf.set_font("Helvetica", "B", 11)
	pdf.cell(80, 8, "Bucket", 1); pdf.cell(35, 8, "Avg", 1); pdf.cell(35, 8, "Tier", 1, ln=1)
	pdf.set_font("Helvetica", "", 10)
	for _, r in df.iterrows():
	pdf.cell(80, 8, latin1(str(r.Bucket)[:40]), 1)
	pdf.cell(35, 8, f"{float(r.Avg):.2f}", 1)
	pdf.cell(35, 8, str(r.Tier), 1, ln=1)
	pdf.cell(80, 8, "Overall", 1); pdf.cell(35, 8, f"{avg:.2f}", 1); pdf.cell(35, 8, tier, 1, ln=1)
	pdf.output(path)

	def generate_report(name, *scores):
	# Full evaluation pipeline: scoring → LLM → PDF
	product = (name or "").strip() or "your product"
	scores = list(scores)
	b_avgs = {b: sum(scores[i] for i in idx) / len(idx) for b, idx in BUCKETS.items()}
	avg = sum(scores) / len(scores)
	tier = score_to_tier(avg)
	df = pd.DataFrame({
	"Bucket": list(b_avgs.keys()),
	"Avg": list(b_avgs.values()),
	"Tier": [score_to_tier(v) for v in b_avgs.values()],
	})
	summary = llm_remediation(product, b_avgs, tier)
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
	build_pdf(product, df, avg, tier, tmp.name, summary)
	return summary, tmp.name

	# UI layout using Gradio
	with gr.Blocks(title="Governance-GPT Quiz") as demo:
	gr.Markdown("# Governance-GPT Quiz")
	pname = gr.Textbox(label="Product / System Name", placeholder="e.g. AcmeAI Recommender")
	sliders = [gr.Slider(1, 5, 3, 1, label=q) for q in QUESTIONS]
	btn = gr.Button("Generate PDF Report")
	md_out = gr.Markdown()
	file_out = gr.File(label="⬇️ Download PDF")
	btn.click(generate_report, [pname] + sliders, [md_out, file_out])

	# Start the app
	if __name__ == "__main__":
	demo.launch()