rishabh5752 commited on
Commit
88335c8
·
verified ·
1 Parent(s): 88fde0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -97
app.py CHANGED
@@ -1,134 +1,112 @@
 
 
1
  import pathlib, tempfile, textwrap, traceback, requests
2
  from functools import lru_cache
3
 
4
  import gradio as gr
 
 
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.embeddings import HuggingFaceEmbeddings
7
- from langchain.vectorstores import FAISS
8
  from langchain.docstore.document import Document
9
  from transformers import pipeline
10
  import pypdf
11
 
12
- # --------------------------------------------------
13
- # 1️⃣ Policy corpus
14
- # --------------------------------------------------
15
  POLICY_URLS = {
16
- "DPDP Act 2023": "https://www.meity.gov.in/static/uploads/2024/06/2bf1f0e9f04e6fb4f8fef35e82c42aa5.pdf",
17
- "Responsible AI (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2021-08/Part2-Responsible-AI-12082021.pdf",
18
- "National AI Strategy (NITI Aayog)": "https://www.niti.gov.in/sites/default/files/2023-03/National-Strategy-for-Artificial-Intelligence.pdf",
19
- "IS 17428-1 (Data Privacy Assurance)": "https://archive.org/download/gov.in.is.17428.1.2020/gov.in.is.17428.1.2020.pdf",
20
- "RBI FREE-AI Framework 2025": "https://assets.kpmg.com/content/dam/kpmgsites/in/pdf/2025/08/rbi-free-ai-committee-report-on-framework-for-responsible-and-ethical-enablement-of-artificial-intelligence.pdf.coredownload.inline.pdf",
21
-
22
- "OECD AI Principles": "https://oecd.ai/en/assets/files/OECD-LEGAL-0449-en.pdf",
23
- "EU AI Act 2024": "https://eur-lex.europa.eu/resource.html?uri=cellar:99db59ed-3b7b-11ef-9e3c-01aa75ed71a1.0001.02/DOC_1&format=PDF",
24
- "ISO/IEC 42001:2023": "https://standards.iteh.ai/catalog/standards/iso/44d7188c-9cb8-4f0f-a358-06c7ce3e64f9/iso-iec-42001-2023.pdf",
25
- "ISO/IEC 23894:2023": "https://cdn.standards.iteh.ai/samples/77304/cb803ee4e9624430a5db177459158b24/ISO-IEC-23894-2023.pdf",
26
  }
27
 
28
  INDUSTRY_MAP = {
29
- "Finance": ["DPDP Act 2023", "RBI FREE-AI Framework 2025", "IS 17428-1 (Data Privacy Assurance)", "OECD AI Principles"],
30
- "Health Care": ["DPDP Act 2023", "Responsible AI (NITI Aayog)", "ISO/IEC 23894:2023", "OECD AI Principles"],
31
- "E-Commerce": ["DPDP Act 2023", "IS 17428-1 (Data Privacy Assurance)", "OECD AI Principles", "EU AI Act 2024"],
32
  "All": list(POLICY_URLS.keys()),
33
  }
34
 
35
- # --------------------------------------------------
36
- # 2️⃣ Helpers
37
- # --------------------------------------------------
38
-
39
- def download_file(url: str, dest: pathlib.Path):
40
- if dest.exists():
41
- return dest
42
- dest.parent.mkdir(parents=True, exist_ok=True)
43
- r = requests.get(url, timeout=120)
44
- r.raise_for_status()
45
- dest.write_bytes(r.content)
46
- return dest
47
-
48
-
49
- def pdf_to_text(path: pathlib.Path) -> str:
50
- txt = []
51
  with path.open("rb") as f:
52
- rdr = pypdf.PdfReader(f)
53
- for p in rdr.pages:
54
- txt.append(p.extract_text() or "")
55
- return "\n".join(txt)
56
-
57
 
58
  @lru_cache(maxsize=1)
59
- def get_store(srcs=tuple(POLICY_URLS.keys())):
60
- print("Building FAISS index… (cached)")
61
  splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
62
  docs = []
63
  for name in srcs:
64
- p = pathlib.Path(tempfile.gettempdir()) / "policygpt" / f"{name}.pdf"
65
  try:
66
- download_file(POLICY_URLS[name], p)
67
- for chunk in splitter.split_text(pdf_to_text(p)):
68
  docs.append(Document(page_content=chunk, metadata={"src": name}))
69
- print("✓", name)
70
  except Exception as e:
71
- print("", name, e)
72
- emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
73
- return FAISS.from_documents(docs, emb)
74
-
75
- GEN = pipeline("text-generation", model="google/flan-t5-small", max_new_tokens=200, do_sample=False)
76
-
77
-
78
- def rag(question: str, industry: str):
79
- store = get_store(tuple(POLICY_URLS.keys()) if industry == "All" else tuple(INDUSTRY_MAP[industry]))
80
- ctx = store.similarity_search(question, k=4)
81
- context = "\n\n".join(d.page_content for d in ctx)[:3500]
 
 
 
82
  prompt = textwrap.dedent(f"""
83
- You are PolicyGPT. Using CONTEXT, answer QUESTION in ≤150 words, citing source names in brackets. If unsure, say I don’t know.
 
 
 
 
84
 
85
- CONTEXT:\n{context}\n\nQUESTION: {question}\nANSWER:
 
86
  """)
87
  try:
88
- raw = GEN(prompt)[0]["generated_text"]
89
- ans = raw.split("ANSWER:")[-1].strip()
90
- return ans if ans else "I don’t know."
91
  except Exception as e:
92
  return f"⚠️ Generation error: {e}"
93
 
94
-
95
- def tag(text: str) -> str:
96
- t = text.lower()
97
- if any(w in t for w in ("violation", "prohibited", "penalty")):
98
  return "High"
99
- if any(w in t for w in ("must", "should", "shall", "mandatory")):
100
  return "Medium"
101
  return "Low"
102
 
103
- # --------------------------------------------------
104
- # 3️⃣ Gradio UI (button disabled while processing)
105
- # --------------------------------------------------
106
-
107
- def answer_fn(question, industry):
108
- ans = rag(question, industry)
109
- return ans, f"**Estimated compliance risk:** {tag(ans)}", gr.update(interactive=True)
110
-
111
- with gr.Blocks(title="PolicyGPT 🇮🇳 (AI & Data Governance)") as demo:
112
- gr.Markdown("""# PolicyGPT 🇮🇳\nAsk about Indian & global AI governance (DPDP, RBI FREE‑AI, ISO 42001, EU AI Act …).""")
113
-
114
- industry_dd = gr.Dropdown(label="Select industry", choices=list(INDUSTRY_MAP.keys()), value="All")
115
- question_tb = gr.Textbox(label="Your question", lines=2, placeholder="e.g. What rules govern patient PII?")
116
- ask_btn = gr.Button("Ask")
117
- with gr.Row():
118
- answer_md = gr.Markdown()
119
- risk_md = gr.Markdown()
120
-
121
- # Disable button immediately, run processing, then re‑enable
122
- def disable_btn():
123
- return gr.update(interactive=False)
124
-
125
- ask_btn.click(disable_btn, None, ask_btn, queue=False)
126
- ask_btn.click(answer_fn, [question_tb, industry_dd], [answer_md, risk_md, ask_btn], queue=True)
127
-
128
- # Enter key triggers same flow
129
- question_tb.submit(disable_btn, None, ask_btn, queue=False)
130
- question_tb.submit(answer_fn, [question_tb, industry_dd], [answer_md, risk_md, ask_btn], queue=True)
131
-
132
- # Enable queuing so button auto‑disabled while async runs
133
  if __name__ == "__main__":
134
- demo.queue(concurrency_count=4).launch()
 
1
+ # app.py – PolicyGPT 🇮🇳 (error-free)
2
+
3
  import pathlib, tempfile, textwrap, traceback, requests
4
  from functools import lru_cache
5
 
6
  import gradio as gr
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings # new import
8
+ from langchain_community.vectorstores import FAISS # new import
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
10
  from langchain.docstore.document import Document
11
  from transformers import pipeline
12
  import pypdf
13
 
14
+ # ---------- 1. Policy corpus ----------
 
 
15
  POLICY_URLS = {
16
+ "DPDP Act 2023":
17
+ "https://www.meity.gov.in/static/uploads/2024/06/2bf1f0e9f04e6fb4f8fef35e82c42aa5.pdf",
18
+ "Responsible AI (NITI Aayog)":
19
+ "https://www.niti.gov.in/sites/default/files/2021-08/Part2-Responsible-AI-12082021.pdf",
20
+ # keep the rest …
 
 
 
 
 
21
  }
22
 
23
  INDUSTRY_MAP = {
24
+ "Health Care": ["DPDP Act 2023", "Responsible AI (NITI Aayog)"],
 
 
25
  "All": list(POLICY_URLS.keys()),
26
  }
27
 
28
+ # ---------- 2. Helpers ----------
29
+ def download(url: str, path: pathlib.Path):
30
+ if not path.exists():
31
+ path.parent.mkdir(parents=True, exist_ok=True)
32
+ r = requests.get(url, timeout=120)
33
+ r.raise_for_status()
34
+ path.write_bytes(r.content)
35
+ return path
36
+
37
+ def pdf_text(path: pathlib.Path) -> str:
38
+ out = []
 
 
 
 
 
39
  with path.open("rb") as f:
40
+ for p in pypdf.PdfReader(f).pages:
41
+ out.append(p.extract_text() or "")
42
+ return "\n".join(out)
 
 
43
 
44
  @lru_cache(maxsize=1)
45
+ def store(srcs=tuple(POLICY_URLS.keys())):
 
46
  splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
47
  docs = []
48
  for name in srcs:
49
+ path = pathlib.Path(tempfile.gettempdir()) / "policygpt" / f"{name}.pdf"
50
  try:
51
+ for chunk in splitter.split_text(pdf_text(download(POLICY_URLS[name], path))):
 
52
  docs.append(Document(page_content=chunk, metadata={"src": name}))
 
53
  except Exception as e:
54
+ print("", name, e)
55
+ embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
56
+ return FAISS.from_documents(docs, embed)
57
+
58
+ GEN = pipeline( # use text2text-generation
59
+ "text2text-generation",
60
+ model="google/flan-t5-small",
61
+ max_new_tokens=200,
62
+ do_sample=False,
63
+ )
64
+
65
+ def rag(q: str, industry: str):
66
+ db = store(tuple(POLICY_URLS.keys()) if industry == "All" else tuple(INDUSTRY_MAP[industry]))
67
+ ctx = "\n\n".join(d.page_content for d in db.similarity_search(q, k=4))[:3500]
68
  prompt = textwrap.dedent(f"""
69
+ You are PolicyGPT. Using CONTEXT, answer QUESTION (≤150 words)
70
+ and cite source names in brackets. If unsure, say I don’t know.
71
+
72
+ CONTEXT:
73
+ {ctx}
74
 
75
+ QUESTION: {q}
76
+ ANSWER:
77
  """)
78
  try:
79
+ return GEN(prompt)[0]["generated_text"].strip() or "I don’t know."
 
 
80
  except Exception as e:
81
  return f"⚠️ Generation error: {e}"
82
 
83
+ def risk(text: str):
84
+ low = text.lower()
85
+ if any(k in low for k in ("violation", "prohibited", "penalty")):
 
86
  return "High"
87
+ if any(k in low for k in ("must", "should", "shall")):
88
  return "Medium"
89
  return "Low"
90
 
91
+ # ---------- 3. Gradio UI ----------
92
+ def answer_fn(q, ind):
93
+ a = rag(q, ind)
94
+ return a, f"**Estimated compliance risk:** {risk(a)}", gr.update(interactive=True)
95
+
96
+ with gr.Blocks(title="PolicyGPT 🇮🇳") as demo:
97
+ gr.Markdown("# PolicyGPT 🇮🇳 ask about AI & Data-governance laws")
98
+ ind = gr.Dropdown(list(INDUSTRY_MAP.keys()), label="Select industry", value="All")
99
+ qbox = gr.Textbox(lines=2, label="Your question",
100
+ placeholder="e.g. What PII rules apply to hospitals?")
101
+ ask = gr.Button("Ask")
102
+ ans = gr.Markdown(); rsk = gr.Markdown()
103
+
104
+ # Disable button while processing
105
+ ask.click(lambda: gr.update(interactive=False), None, ask, queue=False)
106
+ ask.click(answer_fn, [qbox, ind], [ans, rsk, ask])
107
+ qbox.submit(lambda: gr.update(interactive=False), None, ask, queue=False)
108
+ qbox.submit(answer_fn, [qbox, ind], [ans, rsk, ask])
109
+
110
+ # Gradio 4+: no concurrency_count param
 
 
 
 
 
 
 
 
 
 
111
  if __name__ == "__main__":
112
+ demo.queue().launch()