Spaces:
Sleeping
Sleeping
File size: 4,110 Bytes
88335c8 f20b1e9 d2180f0 88335c8 d2180f0 88335c8 d2180f0 88335c8 d2180f0 88335c8 d2180f0 88335c8 88fde0c 88335c8 d2180f0 88335c8 d2180f0 f20b1e9 88fde0c 88335c8 d2180f0 88335c8 88fde0c d2180f0 88335c8 f20b1e9 88335c8 d2180f0 88335c8 d2180f0 88335c8 d2180f0 88fde0c f20b1e9 88335c8 d2180f0 88335c8 d2180f0 88335c8 d2180f0 88335c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# app.py – PolicyGPT 🇮🇳 (error-free)
import pathlib, tempfile, textwrap, traceback, requests
from functools import lru_cache
import gradio as gr
from langchain_community.embeddings import HuggingFaceEmbeddings # new import
from langchain_community.vectorstores import FAISS # new import
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from transformers import pipeline
import pypdf
# ---------- 1. Policy corpus ----------
POLICY_URLS = {
"DPDP Act 2023":
"https://www.meity.gov.in/static/uploads/2024/06/2bf1f0e9f04e6fb4f8fef35e82c42aa5.pdf",
"Responsible AI (NITI Aayog)":
"https://www.niti.gov.in/sites/default/files/2021-08/Part2-Responsible-AI-12082021.pdf",
# … keep the rest …
}
INDUSTRY_MAP = {
"Health Care": ["DPDP Act 2023", "Responsible AI (NITI Aayog)"],
"All": list(POLICY_URLS.keys()),
}
# ---------- 2. Helpers ----------
def download(url: str, path: pathlib.Path):
if not path.exists():
path.parent.mkdir(parents=True, exist_ok=True)
r = requests.get(url, timeout=120)
r.raise_for_status()
path.write_bytes(r.content)
return path
def pdf_text(path: pathlib.Path) -> str:
out = []
with path.open("rb") as f:
for p in pypdf.PdfReader(f).pages:
out.append(p.extract_text() or "")
return "\n".join(out)
@lru_cache(maxsize=1)
def store(srcs=tuple(POLICY_URLS.keys())):
splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
docs = []
for name in srcs:
path = pathlib.Path(tempfile.gettempdir()) / "policygpt" / f"{name}.pdf"
try:
for chunk in splitter.split_text(pdf_text(download(POLICY_URLS[name], path))):
docs.append(Document(page_content=chunk, metadata={"src": name}))
except Exception as e:
print("❌", name, e)
embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
return FAISS.from_documents(docs, embed)
GEN = pipeline( # ✅ use text2text-generation
"text2text-generation",
model="google/flan-t5-small",
max_new_tokens=200,
do_sample=False,
)
def rag(q: str, industry: str):
db = store(tuple(POLICY_URLS.keys()) if industry == "All" else tuple(INDUSTRY_MAP[industry]))
ctx = "\n\n".join(d.page_content for d in db.similarity_search(q, k=4))[:3500]
prompt = textwrap.dedent(f"""
You are PolicyGPT. Using CONTEXT, answer QUESTION (≤150 words)
and cite source names in brackets. If unsure, say I don’t know.
CONTEXT:
{ctx}
QUESTION: {q}
ANSWER:
""")
try:
return GEN(prompt)[0]["generated_text"].strip() or "I don’t know."
except Exception as e:
return f"⚠️ Generation error: {e}"
def risk(text: str):
low = text.lower()
if any(k in low for k in ("violation", "prohibited", "penalty")):
return "High"
if any(k in low for k in ("must", "should", "shall")):
return "Medium"
return "Low"
# ---------- 3. Gradio UI ----------
def answer_fn(q, ind):
a = rag(q, ind)
return a, f"**Estimated compliance risk:** {risk(a)}", gr.update(interactive=True)
with gr.Blocks(title="PolicyGPT 🇮🇳") as demo:
gr.Markdown("# PolicyGPT 🇮🇳 — ask about AI & Data-governance laws")
ind = gr.Dropdown(list(INDUSTRY_MAP.keys()), label="Select industry", value="All")
qbox = gr.Textbox(lines=2, label="Your question",
placeholder="e.g. What PII rules apply to hospitals?")
ask = gr.Button("Ask")
ans = gr.Markdown(); rsk = gr.Markdown()
# Disable button while processing
ask.click(lambda: gr.update(interactive=False), None, ask, queue=False)
ask.click(answer_fn, [qbox, ind], [ans, rsk, ask])
qbox.submit(lambda: gr.update(interactive=False), None, ask, queue=False)
qbox.submit(answer_fn, [qbox, ind], [ans, rsk, ask])
# Gradio 4+: no concurrency_count param
if __name__ == "__main__":
demo.queue().launch() |