Spaces:

rishabh5752
/

Compliance-Report_Generator

Sleeping

App Files Files Community

rishabh5752 commited on Sep 14, 2025

Commit

d9733a9

verified ·

1 Parent(s): f50095b

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -300

app.py CHANGED Viewed

@@ -1,319 +1,127 @@
-# app.py  ──────────────────────────────────────────────────────────────────────
-import os, json, tempfile, unicodedata, textwrap, re
 import gradio as gr
-from fpdf import FPDF
-from transformers import pipeline
-from langchain.document_loaders import PyPDFLoader
-from langchain.vectorstores import FAISS
-from langchain.embeddings import HuggingFaceEmbeddings
-import plotly.graph_objects as go
-# ---------- 0 | External corpora ------------------------------------------------
-POLICY_URLS = {
-    "DPDP Act 2023": "https://www.meity.gov.in/static/uploads/2024/06/2bf1f0e9f04e6fb4f8fef35e82c42aa5.pdf",
-    "Responsible AI (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2021-08/Part2-Responsible-AI-12082021.pdf",
-    "National AI Strategy (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2023-03/National-Strategy-for-Artificial-Intelligence.pdf",
-    "IS 17428-1 (Data Privacy Assurance)": "https://archive.org/download/gov.in.is.17428.1.2020/gov.in.is.17428.1.2020.pdf",
-    "RBI FREE-AI Framework 2025": "https://assets.kpmg.com/content/dam/kpmgsites/in/pdf/2025/08/rbi-free-ai-committee-report-on-framework-for-responsible-and-ethical-enablement-of-artificial-intelligence.pdf.coredownload.inline.pdf",
-    "OECD AI Principles": "https://oecd.ai/en/assets/files/OECD-LEGAL-0449-en.pdf",
-    "EU AI Act 2024": "https://eur-lex.europa.eu/resource.html?uri=cellar:99db59ed-3b7b-11ef-9e3c-01aa75ed71a1.0001.02/DOC_1&format=PDF",
-    "ISO/IEC 42001:2023": "https://standards.iteh.ai/catalog/standards/iso/44d7188c-9cb8-4f0f-a358-06c7ce3e64f9/iso-iec-42001-2023.pdf",
-    "ISO/IEC 23894:2023": "https://cdn.standards.iteh.ai/samples/77304/cb803ee4e9624430a5db177459158b24/ISO-IEC-23894-2023.pdf",
-}
-INDUSTRY_MAP = {
-    "Finance":   ["DPDP Act 2023", "RBI FREE-AI Framework 2025",
-                  "IS 17428-1 (Data Privacy Assurance)", "OECD AI Principles"],
-    "Health Care": ["DPDP Act 2023", "Responsible AI (NITI Aayog)",
-                    "ISO/IEC 23894:2023", "OECD AI Principles"],
-    "E-Commerce": ["DPDP Act 2023", "IS 17428-1 (Data Privacy Assurance)",
-                   "OECD AI Principles", "EU AI Act 2024"],
-    "All": list(POLICY_URLS.keys()),
-}
-# ---------- 1 | Local-LLM & embedding models -----------------------------------
-print("Loading local language models …")
-rewriter = pipeline(
-    "text2text-generation",
-    model="google/flan-t5-large",      # <-— self-hosted, no key needed
-    tokenizer="google/flan-t5-large",
-    device_map="auto",
-)
-embeddings = HuggingFaceEmbeddings(
-    model_name="sentence-transformers/all-MiniLM-L6-v2"
-)
-# ---------- 2 | Build vector DB (one-off at start-up) --------------------------
-print("Indexing policy PDFs for RAG … (first run can take ~1-2 min)")
-docs = []
-for name, url in POLICY_URLS.items():
-    # cached downloads by langchain, so subsequent runs are fast
-    try:
-        loader = PyPDFLoader(url)
-        docs += loader.load_and_split(chunk_size=1000)
-    except Exception as e:
-        print(f"⚠️  Could not load {name}: {e}")
-vectordb = FAISS.from_documents(docs, embeddings)
-# ---------- 3 | Survey definition ---------------------------------------------
 QUESTIONS = [
-    {"label": "Company / Project Name",                     "type": "text",      "key": "company"},
-    {"label": "Industry",                                   "type": "dropdown",  "choices": list(INDUSTRY_MAP.keys())[:-1], "key": "industry"},
-    {"label": "1. Is data encryption implemented?",         "type": "radio",     "choices": ["Yes", "No", "Partially"], "key": "encrypt"},
-    {"label": "2. Are regular security audits conducted?",  "type": "radio",     "choices": ["Yes", "No", "Partially"], "key": "audit"},
-    {"label": "3. Is the privacy policy up to date?",       "type": "radio",     "choices": ["Yes", "No", "Partially"], "key": "privacy"},
-    {"label": "4. Employee training conducted for:",        "type": "checkbox",  "choices": ["Technical Staff", "HR", "All Employees", "None"], "key": "training"},
-    {"label": "5. Access-control maturity (1-5):",          "type": "slider",    "min": 1, "max": 5, "key": "access"},
-    {"label": "6. Are third-party vendors assessed?",       "type": "radio",     "choices": ["Yes", "No", "Sometimes"], "key": "vendor"},
-    {"label": "7. Additional notes (optional)",             "type": "text_area", "key": "notes"},
 ]
-# ---------- 4 | Helper: PDF cleans --------------------------------------------
-def clean(txt: str) -> str:
-    """Strip characters FPDF can't handle."""
-    txt = (
-        txt.replace("“", '"').replace("”", '"')
-           .replace("’", "'").replace("‘", "'")
-           .replace("–", "-").replace("—", "-").replace("-", "-")
-    )
-    return unicodedata.normalize("NFKD", txt).encode("latin1", "ignore").decode("latin1")
-# ---------- 5 | Few-shot examples for the LLM chain ---------------------------
-SYSTEM = (
-    "You are a senior AI governance & compliance analyst. "
-    "Given raw survey answers, you must:\n"
-    "1. Classify each answer into findings with title, severity (Low/Med/High), "
-    "   detail, likelihood (1-5) and impact (1-5).\n"
-    "2. Draft one concrete remediation action per finding "
-    "   (title, priority P1-P3, owner role).\n"
-    "3. Compute an overall maturity_score 0-100 (higher is better).\n"
-    "4. Return ONLY valid JSON with schema:\n"
-    "{"
-    '"maturity_score": int,'
-    '"findings":[{"title":str,"severity":str,"detail":str,"likelihood":int,"impact":int}],'
-    '"actions":[{"title":str,"priority":str,"owner":str}]}'
-)
-EXAMPLES = [
-    {
-        "answers": {
-            "encrypt": "No",
-            "audit": "Partially",
-            "privacy": "Yes",
-            "training": ["Technical Staff"],
-            "access": 2,
-            "vendor": "Sometimes",
-            "notes": ""
-        },
-        "output": {
-            "maturity_score": 45,
-            "findings": [
-                {"title": "Unencrypted data at rest",
-                 "severity": "High", "detail": "Sensitive data is stored unencrypted.",
-                 "likelihood": 4, "impact": 5}
-            ],
-            "actions": [
-                {"title": "Implement AES-256 at rest",
-                 "priority": "P1", "owner": "CISO"}
-            ]
-        }
-    },
-    {
-        "answers": {
-            "encrypt": "Yes",
-            "audit": "Yes",
-            "privacy": "Partially",
-            "training": ["All Employees"],
-            "access": 4,
-            "vendor": "No",
-            "notes": "Using many SaaS tools"
-        },
-        "output": {
-            "maturity_score": 72,
-            "findings": [
-                {"title": "Outdated privacy policy",
-                 "severity": "Medium", "detail": "Policy not reviewed in last 18 months.",
-                 "likelihood": 3, "impact": 3}
-            ],
-            "actions": [
-                {"title": "Refresh privacy notice",
-                 "priority": "P2", "owner": "Legal"}
-            ]
-        }
-    }
-]
-# ---------- 6 | LLM-driven analysis with RAG -----------------------------------
-def rag_context(industry: str) -> str:
-    """Return up to three relevant doc snippets for the chosen industry."""
-    framework_names = INDUSTRY_MAP.get(industry, INDUSTRY_MAP["All"])
-    query = " , ".join(framework_names) + " AI compliance best practice"
-    rel_docs = vectordb.similarity_search(query, k=3)
-    return "\n\n".join(d.page_content for d in rel_docs)
-def llm_json(prompt: str, max_tokens=512) -> dict:
-    """Generate JSON string via local T5, then safely parse."""
-    raw = rewriter(prompt, max_new_tokens=max_tokens)[0]["generated_text"]
-    # simple fallback: extract first {...} block
-    try:
-        json_txt = re.search(r"\{.*\}", raw, re.S).group(0)
-        return json.loads(json_txt)
-    except Exception:
-        # best-effort cleanup
-        json_txt = raw.split("}", 1)[0] + "}"
-        return json.loads(json_txt)
-def analyse_llm(resp: dict):
-    prompt = textwrap.dedent(f"""
-        ### System
-        {SYSTEM}
-        ### Examples
-        {json.dumps(EXAMPLES, indent=2)}
-        ### Context (policy references)
-        {rag_context(resp.get('industry','All'))}
-        ### User Answers
-        {json.dumps(resp, indent=2)}
-        ### Task
-        Produce the JSON schema described above.
-    """)
-    data = llm_json(prompt)
-    findings = data.get("findings", [])
-    actions  = data.get("actions",  [])
-    maturity = data.get("maturity_score", 0)
-    # Dynamic risk score = avg(likelihood × impact) scaled 0-100
-    if findings:
-        risk = sum(f["likelihood"] * f["impact"] for f in findings) / (len(findings)*25) * 100
-    else:
-        risk = 0
-    return findings, actions, maturity, risk, data
-# ---------- 7 | Renderers ------------------------------------------------------
-def findings_md(findings):
-    out = []
-    for f in findings:
-        out.append(f"- **{f['title']}** ({f['severity']})  \n"
-                   f"  Likelihood {f['likelihood']}/5 · Impact {f['impact']}/5  \n"
-                   f"  {f['detail']}")
-    return "\n".join(out)
-def actions_md(actions):
-    out = []
-    for a in actions:
-        out.append(f"- **{a['title']}** — {a['priority']} · _Owner: {a['owner']}_")
-    return "\n".join(out)
-def json_to_markdown(data, company):
-    md = f"### 📋 Compliance Report — **{company}**\n"
-    md += f"**Overall Maturity Score:** {data['maturity_score']}/100\n\n"
-    md += "#### Key Findings\n" + findings_md(data["findings"]) + "\n\n"
-    md += "#### Recommended Actions\n" + actions_md(data["actions"])
-    return md
-def make_gauge(score: float):
-    fig = go.Figure(go.Indicator(
-        mode="gauge+number",
-        value=score,
-        gauge={
-            "axis": {"range": [0, 100]},
-            "bar": {"thickness": 0.3},
-            "steps": [
-                {"range": [0, 40],  "color": "red"},
-                {"range": [40, 70], "color": "yellow"},
-                {"range": [70, 100],"color": "green"},
-            ],
-        },
-        number={"suffix": "%"},
-        title={"text": "Overall Risk"}
-    ))
-    fig.update_layout(height=250, margin=dict(t=20, b=0, l=0, r=0))
-    return fig
-def to_pdf(markdown: str, company: str):
     pdf = FPDF()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.add_page()
-    pdf.set_font("Arial", "B", 14)
-    pdf.multi_cell(0, 10, clean(f"Compliance Report – {company}"), align="C")
-    pdf.ln(4)
-    pdf.set_font("Arial", "", 11)
-    for line in markdown.splitlines():
-        pdf.multi_cell(0, 8, clean(line))
-    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
-    pdf.output(tmp.name)
-    return tmp.name
-# ---------- 8 | Gradio UI ------------------------------------------------------
-with gr.Blocks(title="🛡️ AI-Driven Compliance & Governance Assistant") as demo:
-    gr.Markdown("## 🛡️ Compliance Survey → Dynamic AI Assessment\n"
-                "Fill in the survey → get a JSON-backed report, live risk gauge, "
-                "policy-aware recommendations, and a follow-up chatbot.")
-    widgets = {}
-    with gr.Row():
-        with gr.Column(scale=6):
-            for q in QUESTIONS:
-                if q["type"] == "text":
-                    widgets[q["key"]] = gr.Textbox(label=q["label"])
-                elif q["type"] == "dropdown":
-                    widgets[q["key"]] = gr.Dropdown(
-                        q["choices"], label=q["label"], value=q["choices"][0])
-                elif q["type"] == "radio":
-                    widgets[q["key"]] = gr.Radio(q["choices"], label=q["label"])
-                elif q["type"] == "checkbox":
-                    widgets[q["key"]] = gr.CheckboxGroup(q["choices"], label=q["label"])
-                elif q["type"] == "slider":
-                    widgets[q["key"]] = gr.Slider(
-                        q["min"], q["max"], 3, label=q["label"])
-                else:
-                    widgets[q["key"]] = gr.Textbox(label=q["label"], lines=3)
-            btn = gr.Button("🚀 Generate Report", size="lg")
-        with gr.Column(scale=4):
-            gauge_plot = gr.Plot(label="Overall Risk Gauge")
-            out_report = gr.Markdown(label="📑 AI-Generated Report")
-            out_pdf = gr.File(label="📄 Download PDF")
-    # --- Chatbot panel ---------------------------------------------------------
-    chatbot = gr.Chatbot(label="💬 Ask follow-up questions", height=250)
-    chat_inp = gr.Textbox(
-        placeholder="Ask about a finding, regulation link, etc. - hit Enter ↵",
-        label="Type a question & press Enter")
-    # ---------- 9 | Callbacks --------------------------------------------------
-    def run(*vals):
-        data = dict(zip(widgets.keys(), vals))
-        findings, actions, maturity, risk, full_json = analyse_llm(data)
-        md = json_to_markdown(full_json, data.get("company", "[Unnamed]"))
-        fig = make_gauge(risk)
-        pdf_path = to_pdf(md, data.get("company", "[Unnamed]"))
-        return fig, md, pdf_path
-    btn.click(
-        fn=run,
-        inputs=list(widgets.values()),
-        outputs=[gauge_plot, out_report, out_pdf],
     )
-    # ----- Chatbot -------------------------------------------------------------
-    def reply(user_msg, chat_history):
-        # simple RAG for follow-ups
-        context = rag_context("All")
-        prompt = (f"Context snippets:\n{context}\n\n"
-                  f"User: {user_msg}\nAssistant:")
-        answer = rewriter(prompt, max_new_tokens=256)[0]["generated_text"]
-        chat_history.append((user_msg, answer.strip()))
-        return "", chat_history
-    chat_inp.submit(reply, [chat_inp, chatbot], [chat_inp, chatbot])
-if __name__ == "__main__":
-    demo.launch()

+import os, tempfile, datetime
 import gradio as gr
+import pandas as pd
+from fpdf import FPDF  # pure-python PDF generator – no wkhtmltopdf needed
+# ---------- Quiz Definition ---------- #
 QUESTIONS = [
+    "1. Governance framework is documented and communicated across the organisation.",
+    "2. Roles & responsibilities for AI oversight are clearly assigned.",
+    "3. Data lineage is captured and auditable for all production models.",
+    "4. Privacy impact assessments are performed before every new AI use-case.",
+    "5. Model cards or equivalent documentation exist for each deployed model.",
+    "6. Bias / fairness metrics are monitored post-deployment.",
+    "7. Incident response playbooks cover AI system failures & ethics breaches.",
+    "8. Third-party models and datasets are licensed and risk-assessed.",
+    "9. KPIs link AI outcomes to business & societal value.",
+    "10. Continuous training keeps staff aware of AI policy updates.",
+    "11. Security controls protect model artefacts and inference endpoints.",
+    "12. Explainability techniques are applied commensurate with model impact.",
+    "13. Human-in-the-loop overrides exist for high-risk decisions.",
+    "14. End-of-life or rollback criteria are defined for all models.",
+    "15. Governance performance is reviewed by senior leadership at least quarterly.",
 ]
+TIERS = {
+    "Initial": (1.0, 2.0),
+    "Repeatable": (2.01, 2.5),
+    "Defined": (2.51, 3.5),
+    "Managed": (3.51, 4.5),
+    "Optimized": (4.51, 5.0),
+}
+ACTIONS = {
+    "Initial": "Kick-off a cross-functional task-force, map critical use-cases, prioritise policy creation.",
+    "Repeatable": "Formalise processes; introduce mandatory model documentation & basic monitoring.",
+    "Defined": "Scale governance with automated lineage capture, bias dashboards, and internal audits.",
+    "Managed": "Integrate governance KPIs into OKRs; adopt continuous compliance tooling.",
+    "Optimized": "Benchmark externally (OECD, ISO 42001), publish transparency reports, champion open-governance.",
+}
+# ---------- Helper Functions ---------- #
+def score_to_tier(avg: float) -> str:
+    """Map average score to maturity tier."""
+    for tier, (low, high) in TIERS.items():
+        if low <= avg <= high:
+            return tier
+    return "Unclassified"
+def build_pdf(name: str, df: pd.DataFrame, avg: float, tier: str, file_path: str):
+    """Create a simple, policy-oriented PDF report."""
     pdf = FPDF()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.add_page()
+    # Title
+    pdf.set_font("Helvetica", "B", 16)
+    pdf.cell(0, 10, "AI Governance Maturity Report", ln=1, align="C")
+    pdf.set_font("Helvetica", "", 12)
+    pdf.cell(0, 8, f"Generated on {datetime.date.today().isoformat()}", ln=1, align="C")
+    pdf.ln(4)
+    # Summary
+    pdf.set_font("Helvetica", "B", 12)
+    pdf.cell(0, 8, f"Overall Score: {avg:.2f}  |  Tier: {tier}", ln=1)
+    pdf.set_font("Helvetica", "", 11)
+    pdf.multi_cell(0, 6, f"Next step recommendation: {ACTIONS[tier]}")
+    pdf.ln(4)
+    # Detailed table
+    pdf.set_font("Helvetica", "B", 11)
+    pdf.cell(10, 8, "#", 1)
+    pdf.cell(150, 8, "Question", 1)
+    pdf.cell(20, 8, "Score", 1, ln=1)
+    pdf.set_font("Helvetica", "", 10)
+    for idx, row in df.iterrows():
+        pdf.cell(10, 8, str(idx + 1), 1)
+        pdf.cell(150, 8, row["Question"][:65] + ("…" if len(row["Question"]) > 65 else ""), 1)
+        pdf.cell(20, 8, str(row["Score"]), 1, ln=1)
+    pdf.output(file_path)
+def generate_report(*scores):
+    """Gradio callback → returns Markdown summary + PDF path."""
+    scores = list(scores)
+    avg = sum(scores) / len(scores)
+    tier = score_to_tier(avg)
+    # DataFrame for table in PDF
+    df = pd.DataFrame({"Question": QUESTIONS, "Score": scores})
+    # Temporary PDF file
+    tmp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
+    build_pdf("Report", df, avg, tier, tmp_pdf.name)
+    md_summary = (
+        f"### ✅ Your AI Governance Tier: **{tier}**  \n"
+        f"**Average score:** {avg:.2f} / 5.00  \n"
+        f"**Action plan:** {ACTIONS[tier]}"
+    )
+    return md_summary, tmp_pdf.name
+# ---------- Gradio UI ---------- #
+with gr.Blocks(title="Governance-GPT Quiz") as demo:
+    gr.Markdown(
+        """
+# Governance-GPT Quiz
+Rate each statement from **1 (Strongly Disagree)** to **5 (Strongly Agree)**.
+The tool benchmarks your AI-governance maturity and produces a PDF action plan aligned with OECD AI Principles.
+        """
+    )
+    sliders = []
+    for q in QUESTIONS:
+        sliders.append(gr.Slider(1, 5, value=3, step=1, label=q))
+    generate_btn = gr.Button("Generate Report")
+    summary_md = gr.Markdown()
+    pdf_file = gr.File(label="⬇️ Download PDF")
+    generate_btn.click(
+        fn=generate_report,
+        inputs=sliders,
+        outputs=[summary_md, pdf_file],
     )
+demo.launch()