Spaces:

rishabh5752
/

Compliance_Co-Pilot

Sleeping

App Files Files Community

Compliance_Co-Pilot / app.py

rishabh5752

Update app.py

5973477 verified 7 months ago

raw

history blame contribute delete

9.37 kB

	"""
	Compliance Co-Pilot – single-file Gradio Space
	Layout order: Summary → Score → Remediations (AI-worded) → Chatbot with suggested prompts.
	CPU-friendly default model. Max output tokens trimmed for speed.
	"""

	import os, tempfile, warnings, textwrap
	from functools import lru_cache
	from pathlib import Path
	import requests, gradio as gr

	from langchain_community.document_loaders import PyPDFLoader
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_community.llms import HuggingFacePipeline
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains import ConversationalRetrievalChain
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	from transformers import AutoModelForSeq2SeqLM # add import

	# -----------------------------------------------------------
	# 0. Policy corpus URLs
	# -----------------------------------------------------------
	POLICY_URLS = {
	"DPDP Act 2023": "https://www.meity.gov.in/static/uploads/2024/06/2bf1f0e9f04e6fb4f8fef35e82c42aa5.pdf",
	"Responsible AI (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2021-08/Part2-Responsible-AI-12082021.pdf",
	"National AI Strategy (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2023-03/National-Strategy-for-Artificial-Intelligence.pdf",
	"RBI FREE-AI Framework 2025": "https://assets.kpmg.com/content/dam/kpmgsites/in/pdf/2025/08/rbi-free-ai-committee-report-on-framework-for-responsible-and-ethical-enablement-of-artificial-intelligence.pdf.coredownload.inline.pdf",
	"OECD AI Principles": "https://oecd.ai/en/assets/files/OECD-LEGAL-0449-en.pdf",
	}

	EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
	LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "google/flan-t5-small") # Override with HF secret if GPU

	# -----------------------------------------------------------
	# 1. Helpers – ingest & embed PDFs
	# -----------------------------------------------------------

	def _download(url: str, out_dir: Path) -> Path:
	out_dir.mkdir(parents=True, exist_ok=True)
	fp = out_dir / Path(url).name
	if not fp.exists():
	resp = requests.get(url, timeout=90)
	resp.raise_for_status()
	fp.write_bytes(resp.content)
	return fp

	@lru_cache(maxsize=1)
	def _vector_store():
	tmp = Path(tempfile.gettempdir()) / "policies"
	pages = []
	for title, url in POLICY_URLS.items():
	try:
	for pg in PyPDFLoader(str(_download(url, tmp))).load():
	pg.metadata["source"] = title
	pages.append(pg)
	except Exception as e:
	warnings.warn(f"Skipping {title}: {e}")
	chunks = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128).split_documents(pages)
	return FAISS.from_documents(chunks, HuggingFaceEmbeddings(model_name=EMBED_MODEL))

	# -----------------------------------------------------------
	# 2. LLM helpers
	# -----------------------------------------------------------
	@lru_cache(maxsize=1)

	def _get_llm_pipe(max_new_tokens: int = 96):
	model_id = os.getenv("LLM_MODEL_ID", "google/flan-t5-small")
	tok = AutoTokenizer.from_pretrained(model_id)

	# use Seq2Seq loader for encoder-decoder models (e.g. T5)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_id, device_map="auto")

	return pipeline(
	"text2text-generation", # <-- task for seq2seq
	model=model,
	tokenizer=tok,
	max_new_tokens=max_new_tokens,
	do_sample=False,
	)

	# -----------------------------------------------------------
	# 3. Conversational RAG chain
	# -----------------------------------------------------------
	@lru_cache(maxsize=1)
	def _get_chain():
	llm = HuggingFacePipeline(pipeline=_get_llm_pipe(256))
	retr = _vector_store().as_retriever(search_kwargs={"k": 4})
	return ConversationalRetrievalChain.from_llm(llm, retriever=retr, return_source_documents=True)

	# -----------------------------------------------------------
	# 4. Quiz / scoring / remediation
	# -----------------------------------------------------------
	CATEGORIES = ["Governance", "Privacy", "Fairness", "Security", "Transparency"]
	QUESTIONS = [
	{"text": "Do you perform DPIAs before deploying new AI features?", "options": [
	{"label": "Always", "score": {"Privacy": 5, "Governance": 3}},
	{"label": "Sometimes", "score": {"Privacy": 3, "Governance": 1}},
	{"label": "Never", "score": {"Privacy": 0}},
	]},
	{"text": "Is your training data regularly audited for bias?", "options": [
	{"label": "Yes, every release", "score": {"Fairness": 5}},
	{"label": "Occasionally", "score": {"Fairness": 2}},
	{"label": "Not at all", "score": {"Fairness": 0}},
	]},
	{"text": "Who can override an AI decision in production?", "options": [
	{"label": "Designated human reviewers", "score": {"Governance": 5, "Transparency": 2}},
	{"label": "Anyone on DevOps", "score": {"Governance": 2}},
	{"label": "No one – fully automated", "score": {"Governance": 0}},
	]},
	{"text": "How are model outputs logged?", "options": [
	{"label": "Tamper-proof logs", "score": {"Security": 5, "Transparency": 3}},
	{"label": "Plain-text logs", "score": {"Security": 2}},
	{"label": "We don’t log", "score": {"Security": 0}},
	]},
	{"text": "Can users delete their personal data?", "options": [
	{"label": "Yes – self-service portal", "score": {"Privacy": 5, "Transparency": 3}},
	{"label": "Yes – via email", "score": {"Privacy": 3}},
	{"label": "No formal process", "score": {"Privacy": 0}},
	]},
	]

	REMEDIATIONS = {
	"Governance": "Establish an AI oversight committee and define escalation paths.",
	"Privacy": "Conduct DPIAs, implement data-deletion workflows and minimise PII.",
	"Fairness": "Run bias audits each training cycle and diversify data.",
	"Security": "Encrypt & tamper-proof logs; penetration-test quarterly.",
	"Transparency": "Publish model cards, decision logs and user-facing explanations.",
	}
	MAX_PER_CAT = 5


	def _score(answers):
	s = {c: 0 for c in CATEGORIES}
	for a, q in zip(answers, QUESTIONS):
	for o in q["options"]:
	if o["label"] == a:
	for c, v in o["score"].items():
	s[c] += v
	return s


	def _ai_paragraph(prompt: str) -> str:
	out = _get_llm_pipe(120)(prompt)[0]["generated_text"]
	return out[len(prompt):].strip()


	def grade_quiz(*answers):
	scores = _score(answers)
	ans_pairs = " ; ".join(f"{q['text']} => {a or 'No answer'}" for a, q in zip(answers, QUESTIONS))

	summary_prompt = textwrap.dedent(f"""
	Write a concise two-paragraph summary of the following self-assessment answers, noting strengths and weaknesses:
	{ans_pairs}
	""")
	summary_txt = _ai_paragraph(summary_prompt)

	weak = [c for c, v in scores.items() if v < MAX_PER_CAT * 0.6]
	if weak:
	remed_prompt = textwrap.dedent(f"""
	In one paragraph, propose concrete remediation steps for these areas: {', '.join(weak)}. Base on best-practice AI governance.
	""")
	remed_txt = _ai_paragraph(remed_prompt)
	else:
	remed_txt = "You meet or exceed best-practice thresholds across all categories. Keep up the good work!"

	score_md = "### 📊 Section Scores\n" + "\n".join(f"- {c}: {v}" for c, v in scores.items())
	return f"### 📋 Summary\n{summary_txt}", score_md, f"### 🛠️ Remediations\n{remed_txt}"

	# -----------------------------------------------------------
	# 5. Chat wrapper
	# -----------------------------------------------------------
	_chain = _get_chain()

	def rag_chat(message, history):
	res = _chain.invoke({"question": message, "chat_history": history})
	ans = res["answer"]
	srcs = {d.metadata.get("source", "") for d in res["source_documents"]}
	if srcs:
	ans += "\n\nSources: " + ", ".join(sorted(srcs))
	return ans

	SUGGESTED = [
	"What are PII regulations?",
	"Steps for conducting a DPIA under DPDP Act?",
	"How can we improve transparency in AI systems?",
	]

	# -----------------------------------------------------------
	# 6. Gradio UI
	# -----------------------------------------------------------
	with gr.Blocks(title="Compliance Co-Pilot") as app:
	gr.Markdown("## 🛡️ Compliance Co-Pilot\nSelf-assessment + Reg-aware chatbot")

	# ----- Quiz tab -----
	with gr.Tab("Take the Test"):
	radios = [gr.Radio(label=q["text"], choices=[o["label"] for o in q["options"]]) for q in QUESTIONS]
	submit = gr.Button("Submit")
	md_sum = gr.Markdown(); md_score = gr.Markdown(); md_remed = gr.Markdown()
	submit.click(grade_quiz, radios, [md_sum, md_score, md_remed])

	# ----- Chat tab -----
	with gr.Tab("Chat & Guidance"):
	gr.Markdown("Suggested prompts: click to insert →")
	gr.ChatInterface(fn=rag_chat, title="Ask the Co-Pilot", examples=SUGGESTED)

	# -----------------------------------------------------------
	# 7. Launch
	# -----------------------------------------------------------
	if __name__ == "__main__":
	app.queue()
	app.launch()