Spaces:

rishabh5752
/

Compliance_Chatbot

Sleeping

App Files Files Community

rishabh5752 commited on Sep 12, 2025

Commit

d2180f0

verified ·

1 Parent(s): a970da9

Create app.py

Browse files

Files changed (1) hide show

app.py +158 -0

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import os, pathlib, tempfile, requests, json, textwrap, traceback
+from functools import lru_cache
+import gradio as gr
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.docstore.document import Document
+from transformers import pipeline
+import pypdf
+# ---------------------------------------------------------------------
+# 1️⃣  Reference corpus (add/remove as required)
+# ---------------------------------------------------------------------
+POLICY_URLS = {
+    # 🇮🇳 India‑specific
+    "DPDP Act 2023": "https://www.meity.gov.in/static/uploads/2024/06/2bf1f0e9f04e6fb4f8fef35e82c42aa5.pdf",
+    "Responsible AI (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2021-08/Part2-Responsible-AI-12082021.pdf",
+    "National AI Strategy (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2023-03/National-Strategy-for-Artificial-Intelligence.pdf",
+    "IS 17428‑1 (Data Privacy Assurance)": "https://archive.org/download/gov.in.is.17428.1.2020/gov.in.is.17428.1.2020.pdf",
+    "RBI FREE‑AI Framework 2025": "https://assets.kpmg.com/content/dam/kpmgsites/in/pdf/2025/08/rbi-free-ai-committee-report-on-framework-for-responsible-and-ethical-enablement-of-artificial-intelligence.pdf.coredownload.inline.pdf",
+    # 🌐 Global
+    "OECD AI Principles": "https://oecd.ai/en/assets/files/OECD-LEGAL-0449-en.pdf",
+    "EU AI Act (Reg. 2024/1689)": "https://eur-lex.europa.eu/resource.html?uri=cellar:99db59ed-3b7b-11ef-9e3c-01aa75ed71a1.0001.02/DOC_1&format=PDF",  # consolidated text
+    "ISO 42001 (AI MS)": "https://standards.iteh.ai/catalog/standards/iso/44d7188c-9cb8-4f0f-a358-06c7ce3e64f9/iso-iec-42001-2023.pdf",
+    "ISO 23894 (AI Risk Mgmt)": "https://cdn.standards.iteh.ai/samples/77304/cb803ee4e9624430a5db177459158b24/ISO-IEC-23894-2023.pdf",
+}
+INDUSTRY_MAP = {
+    "Finance": ["DPDP Act 2023", "RBI FREE‑AI Framework 2025", "IS 17428‑1 (Data Privacy Assurance)", "OECD AI Principles"],
+    "Health Care": ["DPDP Act 2023", "Responsible AI (NITI Aayog)", "ISO 23894 (AI Risk Mgmt)", "OECD AI Principles"],
+    "E‑Commerce": ["DPDP Act 2023", "IS 17428‑1 (Data Privacy Assurance)", "OECD AI Principles", "EU AI Act (Reg. 2024/1689)"],
+    "All": list(POLICY_URLS.keys()),
+}
+# ---------------------------------------------------------------------
+# 2️⃣  Utility functions
+# ---------------------------------------------------------------------
+def download_file(url: str, path: pathlib.Path):
+    if path.exists():
+        return path
+    path.parent.mkdir(parents=True, exist_ok=True)
+    r = requests.get(url, timeout=60)
+    r.raise_for_status()
+    path.write_bytes(r.content)
+    return path
+def extract_text_from_pdf(pdf_path: pathlib.Path) -> str:
+    text = []
+    with pdf_path.open("rb") as f:
+        reader = pypdf.PdfReader(f)
+        for page in reader.pages:
+            txt = page.extract_text() or ""
+            text.append(txt)
+    return "\n".join(text)
+@lru_cache(maxsize=1)
+def build_vector_store(selected_sources=tuple(POLICY_URLS.keys())):
+    print("⏳ Building vector store …")
+    documents = []
+    splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
+    for name in selected_sources:
+        url = POLICY_URLS[name]
+        pdf_path = pathlib.Path(tempfile.gettempdir()) / "policygpt" / f"{name}.pdf"
+        try:
+            download_file(url, pdf_path)
+            raw_text = extract_text_from_pdf(pdf_path)
+            chunks = splitter.split_text(raw_text)
+            for chunk in chunks:
+                documents.append(Document(page_content=chunk, metadata={"source": name}))
+            print(f"✔  Loaded {name} ({len(chunks)} chunks)")
+        except Exception as e:
+            print(f"✖  Failed to process {name}: {e}")
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    store = FAISS.from_documents(documents, embedding=embeddings)
+    return store
+# Light‑weight generative model for answers
+qa_pipeline = pipeline(
+    "text-generation",
+    model="google/flan-t5-small",
+    max_length=256,
+    do_sample=False,
+)
+def rag_answer(question: str, industry: str = "All") -> str:
+    # Build / get the store
+    if industry == "All":
+        store = build_vector_store(tuple(POLICY_URLS.keys()))
+    else:
+        store = build_vector_store(tuple(INDUSTRY_MAP[industry]))
+    # Retrieve top‑k chunks
+    docs = store.similarity_search(question, k=4)
+    context = "\n\n".join([d.page_content for d in docs])
+    prompt = textwrap.dedent(f"""\
+        You are PolicyGPT, an assistant that answers queries about AI governance and data protection
+        using the CONTEXT below. Provide concise, actionable guidance (≤150 words) and cite the
+        policy source name in brackets. If the answer is not in context, say "I don’t know."
+        CONTEXT:
+        {context}
+        Question: {question}
+        Answer:
+    """)
+    try:
+        response = qa_pipeline(prompt, truncate=True)[0]["generated_text"]
+    except Exception as e:
+        response = f"Error generating answer: {e}\n{traceback.format_exc()}"
+    return response.strip()
+# Very naive risk scoring
+def compliance_score(answer: str) -> str:
+    answer_low = answer.lower()
+    if any(w in answer_low for w in ["prohibited", "penalty", "violation"]):
+        return "High"
+    if any(w in answer_low for w in ["must", "should", "shall"]):
+        return "Medium"
+    return "Low"
+# ---------------------------------------------------------------------
+# 3️⃣  Gradio UI
+# ---------------------------------------------------------------------
+def chat(question, industry):
+    answer = rag_answer(question, industry)
+    score = compliance_score(answer)
+    return answer, f"Estimated compliance risk: **{score}**"
+with gr.Blocks(title="PolicyGPT 🇮🇳 (AI & Data Governance)") as demo:
+    gr.Markdown(
+        """
+        # PolicyGPT 🇮🇳
+        Ask anything about AI & Data Governance policies (DPDP Act, RBI FREE‑AI, ISO 42001, OECD, EU AI Act, etc.).
+        """)
+    with gr.Row():
+        industry_dd = gr.Dropdown(
+            choices=list(INDUSTRY_MAP.keys()),
+            label="Select your industry",
+            value="All",
+        )
+        user_input = gr.Textbox(label="Your question")
+    answer_out = gr.Markdown()
+    risk_out = gr.Markdown()
+    user_input.submit(chat, [user_input, industry_dd], [answer_out, risk_out])
+if __name__ == "__main__":
+    demo.launch()