rishabh5752's picture
Update app.py
5973477 verified
"""
Compliance Co-Pilot – single-file Gradio Space
Layout order: Summary β†’ Score β†’ Remediations (AI-worded) β†’ Chatbot with suggested prompts.
CPU-friendly default model. Max output tokens trimmed for speed.
"""
import os, tempfile, warnings, textwrap
from functools import lru_cache
from pathlib import Path
import requests, gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFacePipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from transformers import AutoModelForSeq2SeqLM # add import
# -----------------------------------------------------------
# 0. Policy corpus URLs
# -----------------------------------------------------------
POLICY_URLS = {
"DPDP Act 2023": "https://www.meity.gov.in/static/uploads/2024/06/2bf1f0e9f04e6fb4f8fef35e82c42aa5.pdf",
"Responsible AI (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2021-08/Part2-Responsible-AI-12082021.pdf",
"National AI Strategy (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2023-03/National-Strategy-for-Artificial-Intelligence.pdf",
"RBI FREE-AI Framework 2025": "https://assets.kpmg.com/content/dam/kpmgsites/in/pdf/2025/08/rbi-free-ai-committee-report-on-framework-for-responsible-and-ethical-enablement-of-artificial-intelligence.pdf.coredownload.inline.pdf",
"OECD AI Principles": "https://oecd.ai/en/assets/files/OECD-LEGAL-0449-en.pdf",
}
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "google/flan-t5-small") # Override with HF secret if GPU
# -----------------------------------------------------------
# 1. Helpers – ingest & embed PDFs
# -----------------------------------------------------------
def _download(url: str, out_dir: Path) -> Path:
out_dir.mkdir(parents=True, exist_ok=True)
fp = out_dir / Path(url).name
if not fp.exists():
resp = requests.get(url, timeout=90)
resp.raise_for_status()
fp.write_bytes(resp.content)
return fp
@lru_cache(maxsize=1)
def _vector_store():
tmp = Path(tempfile.gettempdir()) / "policies"
pages = []
for title, url in POLICY_URLS.items():
try:
for pg in PyPDFLoader(str(_download(url, tmp))).load():
pg.metadata["source"] = title
pages.append(pg)
except Exception as e:
warnings.warn(f"Skipping {title}: {e}")
chunks = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128).split_documents(pages)
return FAISS.from_documents(chunks, HuggingFaceEmbeddings(model_name=EMBED_MODEL))
# -----------------------------------------------------------
# 2. LLM helpers
# -----------------------------------------------------------
@lru_cache(maxsize=1)
def _get_llm_pipe(max_new_tokens: int = 96):
model_id = os.getenv("LLM_MODEL_ID", "google/flan-t5-small")
tok = AutoTokenizer.from_pretrained(model_id)
# use Seq2Seq loader for encoder-decoder models (e.g. T5)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id, device_map="auto")
return pipeline(
"text2text-generation", # <-- task for seq2seq
model=model,
tokenizer=tok,
max_new_tokens=max_new_tokens,
do_sample=False,
)
# -----------------------------------------------------------
# 3. Conversational RAG chain
# -----------------------------------------------------------
@lru_cache(maxsize=1)
def _get_chain():
llm = HuggingFacePipeline(pipeline=_get_llm_pipe(256))
retr = _vector_store().as_retriever(search_kwargs={"k": 4})
return ConversationalRetrievalChain.from_llm(llm, retriever=retr, return_source_documents=True)
# -----------------------------------------------------------
# 4. Quiz / scoring / remediation
# -----------------------------------------------------------
CATEGORIES = ["Governance", "Privacy", "Fairness", "Security", "Transparency"]
QUESTIONS = [
{"text": "Do you perform DPIAs before deploying new AI features?", "options": [
{"label": "Always", "score": {"Privacy": 5, "Governance": 3}},
{"label": "Sometimes", "score": {"Privacy": 3, "Governance": 1}},
{"label": "Never", "score": {"Privacy": 0}},
]},
{"text": "Is your training data regularly audited for bias?", "options": [
{"label": "Yes, every release", "score": {"Fairness": 5}},
{"label": "Occasionally", "score": {"Fairness": 2}},
{"label": "Not at all", "score": {"Fairness": 0}},
]},
{"text": "Who can override an AI decision in production?", "options": [
{"label": "Designated human reviewers", "score": {"Governance": 5, "Transparency": 2}},
{"label": "Anyone on DevOps", "score": {"Governance": 2}},
{"label": "No one – fully automated", "score": {"Governance": 0}},
]},
{"text": "How are model outputs logged?", "options": [
{"label": "Tamper-proof logs", "score": {"Security": 5, "Transparency": 3}},
{"label": "Plain-text logs", "score": {"Security": 2}},
{"label": "We don’t log", "score": {"Security": 0}},
]},
{"text": "Can users delete their personal data?", "options": [
{"label": "Yes – self-service portal", "score": {"Privacy": 5, "Transparency": 3}},
{"label": "Yes – via email", "score": {"Privacy": 3}},
{"label": "No formal process", "score": {"Privacy": 0}},
]},
]
REMEDIATIONS = {
"Governance": "Establish an AI oversight committee and define escalation paths.",
"Privacy": "Conduct DPIAs, implement data-deletion workflows and minimise PII.",
"Fairness": "Run bias audits each training cycle and diversify data.",
"Security": "Encrypt & tamper-proof logs; penetration-test quarterly.",
"Transparency": "Publish model cards, decision logs and user-facing explanations.",
}
MAX_PER_CAT = 5
def _score(answers):
s = {c: 0 for c in CATEGORIES}
for a, q in zip(answers, QUESTIONS):
for o in q["options"]:
if o["label"] == a:
for c, v in o["score"].items():
s[c] += v
return s
def _ai_paragraph(prompt: str) -> str:
out = _get_llm_pipe(120)(prompt)[0]["generated_text"]
return out[len(prompt):].strip()
def grade_quiz(*answers):
scores = _score(answers)
ans_pairs = " ; ".join(f"{q['text']} => {a or 'No answer'}" for a, q in zip(answers, QUESTIONS))
summary_prompt = textwrap.dedent(f"""
Write a concise two-paragraph summary of the following self-assessment answers, noting strengths and weaknesses:
{ans_pairs}
""")
summary_txt = _ai_paragraph(summary_prompt)
weak = [c for c, v in scores.items() if v < MAX_PER_CAT * 0.6]
if weak:
remed_prompt = textwrap.dedent(f"""
In one paragraph, propose concrete remediation steps for these areas: {', '.join(weak)}. Base on best-practice AI governance.
""")
remed_txt = _ai_paragraph(remed_prompt)
else:
remed_txt = "You meet or exceed best-practice thresholds across all categories. Keep up the good work!"
score_md = "### πŸ“Š Section Scores\n" + "\n".join(f"- **{c}**: {v}" for c, v in scores.items())
return f"### πŸ“‹ Summary\n{summary_txt}", score_md, f"### πŸ› οΈ Remediations\n{remed_txt}"
# -----------------------------------------------------------
# 5. Chat wrapper
# -----------------------------------------------------------
_chain = _get_chain()
def rag_chat(message, history):
res = _chain.invoke({"question": message, "chat_history": history})
ans = res["answer"]
srcs = {d.metadata.get("source", "") for d in res["source_documents"]}
if srcs:
ans += "\n\n**Sources:** " + ", ".join(sorted(srcs))
return ans
SUGGESTED = [
"What are PII regulations?",
"Steps for conducting a DPIA under DPDP Act?",
"How can we improve transparency in AI systems?",
]
# -----------------------------------------------------------
# 6. Gradio UI
# -----------------------------------------------------------
with gr.Blocks(title="Compliance Co-Pilot") as app:
gr.Markdown("## πŸ›‘οΈ Compliance Co-Pilot\nSelf-assessment + Reg-aware chatbot")
# ----- Quiz tab -----
with gr.Tab("Take the Test"):
radios = [gr.Radio(label=q["text"], choices=[o["label"] for o in q["options"]]) for q in QUESTIONS]
submit = gr.Button("Submit")
md_sum = gr.Markdown(); md_score = gr.Markdown(); md_remed = gr.Markdown()
submit.click(grade_quiz, radios, [md_sum, md_score, md_remed])
# ----- Chat tab -----
with gr.Tab("Chat & Guidance"):
gr.Markdown("**Suggested prompts:** click to insert β†’")
gr.ChatInterface(fn=rag_chat, title="Ask the Co-Pilot", examples=SUGGESTED)
# -----------------------------------------------------------
# 7. Launch
# -----------------------------------------------------------
if __name__ == "__main__":
app.queue()
app.launch()