Spaces:
Sleeping
Sleeping
| """ | |
| Compliance Co-Pilot β single-file Gradio Space | |
| Layout order: Summary β Score β Remediations (AI-worded) β Chatbot with suggested prompts. | |
| CPU-friendly default model. Max output tokens trimmed for speed. | |
| """ | |
| import os, tempfile, warnings, textwrap | |
| from functools import lru_cache | |
| from pathlib import Path | |
| import requests, gradio as gr | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.llms import HuggingFacePipeline | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.chains import ConversationalRetrievalChain | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| from transformers import AutoModelForSeq2SeqLM # add import | |
| # ----------------------------------------------------------- | |
| # 0. Policy corpus URLs | |
| # ----------------------------------------------------------- | |
| POLICY_URLS = { | |
| "DPDP Act 2023": "https://www.meity.gov.in/static/uploads/2024/06/2bf1f0e9f04e6fb4f8fef35e82c42aa5.pdf", | |
| "Responsible AI (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2021-08/Part2-Responsible-AI-12082021.pdf", | |
| "National AI Strategy (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2023-03/National-Strategy-for-Artificial-Intelligence.pdf", | |
| "RBI FREE-AI Framework 2025": "https://assets.kpmg.com/content/dam/kpmgsites/in/pdf/2025/08/rbi-free-ai-committee-report-on-framework-for-responsible-and-ethical-enablement-of-artificial-intelligence.pdf.coredownload.inline.pdf", | |
| "OECD AI Principles": "https://oecd.ai/en/assets/files/OECD-LEGAL-0449-en.pdf", | |
| } | |
| EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" | |
| LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "google/flan-t5-small") # Override with HF secret if GPU | |
| # ----------------------------------------------------------- | |
| # 1. Helpers β ingest & embed PDFs | |
| # ----------------------------------------------------------- | |
| def _download(url: str, out_dir: Path) -> Path: | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| fp = out_dir / Path(url).name | |
| if not fp.exists(): | |
| resp = requests.get(url, timeout=90) | |
| resp.raise_for_status() | |
| fp.write_bytes(resp.content) | |
| return fp | |
| def _vector_store(): | |
| tmp = Path(tempfile.gettempdir()) / "policies" | |
| pages = [] | |
| for title, url in POLICY_URLS.items(): | |
| try: | |
| for pg in PyPDFLoader(str(_download(url, tmp))).load(): | |
| pg.metadata["source"] = title | |
| pages.append(pg) | |
| except Exception as e: | |
| warnings.warn(f"Skipping {title}: {e}") | |
| chunks = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128).split_documents(pages) | |
| return FAISS.from_documents(chunks, HuggingFaceEmbeddings(model_name=EMBED_MODEL)) | |
| # ----------------------------------------------------------- | |
| # 2. LLM helpers | |
| # ----------------------------------------------------------- | |
| def _get_llm_pipe(max_new_tokens: int = 96): | |
| model_id = os.getenv("LLM_MODEL_ID", "google/flan-t5-small") | |
| tok = AutoTokenizer.from_pretrained(model_id) | |
| # use Seq2Seq loader for encoder-decoder models (e.g. T5) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_id, device_map="auto") | |
| return pipeline( | |
| "text2text-generation", # <-- task for seq2seq | |
| model=model, | |
| tokenizer=tok, | |
| max_new_tokens=max_new_tokens, | |
| do_sample=False, | |
| ) | |
| # ----------------------------------------------------------- | |
| # 3. Conversational RAG chain | |
| # ----------------------------------------------------------- | |
| def _get_chain(): | |
| llm = HuggingFacePipeline(pipeline=_get_llm_pipe(256)) | |
| retr = _vector_store().as_retriever(search_kwargs={"k": 4}) | |
| return ConversationalRetrievalChain.from_llm(llm, retriever=retr, return_source_documents=True) | |
| # ----------------------------------------------------------- | |
| # 4. Quiz / scoring / remediation | |
| # ----------------------------------------------------------- | |
| CATEGORIES = ["Governance", "Privacy", "Fairness", "Security", "Transparency"] | |
| QUESTIONS = [ | |
| {"text": "Do you perform DPIAs before deploying new AI features?", "options": [ | |
| {"label": "Always", "score": {"Privacy": 5, "Governance": 3}}, | |
| {"label": "Sometimes", "score": {"Privacy": 3, "Governance": 1}}, | |
| {"label": "Never", "score": {"Privacy": 0}}, | |
| ]}, | |
| {"text": "Is your training data regularly audited for bias?", "options": [ | |
| {"label": "Yes, every release", "score": {"Fairness": 5}}, | |
| {"label": "Occasionally", "score": {"Fairness": 2}}, | |
| {"label": "Not at all", "score": {"Fairness": 0}}, | |
| ]}, | |
| {"text": "Who can override an AI decision in production?", "options": [ | |
| {"label": "Designated human reviewers", "score": {"Governance": 5, "Transparency": 2}}, | |
| {"label": "Anyone on DevOps", "score": {"Governance": 2}}, | |
| {"label": "No one β fully automated", "score": {"Governance": 0}}, | |
| ]}, | |
| {"text": "How are model outputs logged?", "options": [ | |
| {"label": "Tamper-proof logs", "score": {"Security": 5, "Transparency": 3}}, | |
| {"label": "Plain-text logs", "score": {"Security": 2}}, | |
| {"label": "We donβt log", "score": {"Security": 0}}, | |
| ]}, | |
| {"text": "Can users delete their personal data?", "options": [ | |
| {"label": "Yes β self-service portal", "score": {"Privacy": 5, "Transparency": 3}}, | |
| {"label": "Yes β via email", "score": {"Privacy": 3}}, | |
| {"label": "No formal process", "score": {"Privacy": 0}}, | |
| ]}, | |
| ] | |
| REMEDIATIONS = { | |
| "Governance": "Establish an AI oversight committee and define escalation paths.", | |
| "Privacy": "Conduct DPIAs, implement data-deletion workflows and minimise PII.", | |
| "Fairness": "Run bias audits each training cycle and diversify data.", | |
| "Security": "Encrypt & tamper-proof logs; penetration-test quarterly.", | |
| "Transparency": "Publish model cards, decision logs and user-facing explanations.", | |
| } | |
| MAX_PER_CAT = 5 | |
| def _score(answers): | |
| s = {c: 0 for c in CATEGORIES} | |
| for a, q in zip(answers, QUESTIONS): | |
| for o in q["options"]: | |
| if o["label"] == a: | |
| for c, v in o["score"].items(): | |
| s[c] += v | |
| return s | |
| def _ai_paragraph(prompt: str) -> str: | |
| out = _get_llm_pipe(120)(prompt)[0]["generated_text"] | |
| return out[len(prompt):].strip() | |
| def grade_quiz(*answers): | |
| scores = _score(answers) | |
| ans_pairs = " ; ".join(f"{q['text']} => {a or 'No answer'}" for a, q in zip(answers, QUESTIONS)) | |
| summary_prompt = textwrap.dedent(f""" | |
| Write a concise two-paragraph summary of the following self-assessment answers, noting strengths and weaknesses: | |
| {ans_pairs} | |
| """) | |
| summary_txt = _ai_paragraph(summary_prompt) | |
| weak = [c for c, v in scores.items() if v < MAX_PER_CAT * 0.6] | |
| if weak: | |
| remed_prompt = textwrap.dedent(f""" | |
| In one paragraph, propose concrete remediation steps for these areas: {', '.join(weak)}. Base on best-practice AI governance. | |
| """) | |
| remed_txt = _ai_paragraph(remed_prompt) | |
| else: | |
| remed_txt = "You meet or exceed best-practice thresholds across all categories. Keep up the good work!" | |
| score_md = "### π Section Scores\n" + "\n".join(f"- **{c}**: {v}" for c, v in scores.items()) | |
| return f"### π Summary\n{summary_txt}", score_md, f"### π οΈ Remediations\n{remed_txt}" | |
| # ----------------------------------------------------------- | |
| # 5. Chat wrapper | |
| # ----------------------------------------------------------- | |
| _chain = _get_chain() | |
| def rag_chat(message, history): | |
| res = _chain.invoke({"question": message, "chat_history": history}) | |
| ans = res["answer"] | |
| srcs = {d.metadata.get("source", "") for d in res["source_documents"]} | |
| if srcs: | |
| ans += "\n\n**Sources:** " + ", ".join(sorted(srcs)) | |
| return ans | |
| SUGGESTED = [ | |
| "What are PII regulations?", | |
| "Steps for conducting a DPIA under DPDP Act?", | |
| "How can we improve transparency in AI systems?", | |
| ] | |
| # ----------------------------------------------------------- | |
| # 6. Gradio UI | |
| # ----------------------------------------------------------- | |
| with gr.Blocks(title="Compliance Co-Pilot") as app: | |
| gr.Markdown("## π‘οΈ Compliance Co-Pilot\nSelf-assessment + Reg-aware chatbot") | |
| # ----- Quiz tab ----- | |
| with gr.Tab("Take the Test"): | |
| radios = [gr.Radio(label=q["text"], choices=[o["label"] for o in q["options"]]) for q in QUESTIONS] | |
| submit = gr.Button("Submit") | |
| md_sum = gr.Markdown(); md_score = gr.Markdown(); md_remed = gr.Markdown() | |
| submit.click(grade_quiz, radios, [md_sum, md_score, md_remed]) | |
| # ----- Chat tab ----- | |
| with gr.Tab("Chat & Guidance"): | |
| gr.Markdown("**Suggested prompts:** click to insert β") | |
| gr.ChatInterface(fn=rag_chat, title="Ask the Co-Pilot", examples=SUGGESTED) | |
| # ----------------------------------------------------------- | |
| # 7. Launch | |
| # ----------------------------------------------------------- | |
| if __name__ == "__main__": | |
| app.queue() | |
| app.launch() |