AnupalanKarta / src /compliance_lib.py
anmol11p's picture
Improve input flow and reporting
a401b06 verified
raw
history blame
4.17 kB
import re
from huggingface_hub import InferenceClient
import os
import requests as req
from bs4 import BeautifulSoup
import streamlit as st
from dotenv import load_dotenv
load_dotenv()
RULES={
"GDPR":[
("Lawful basis documented", r"lawful\s+basis"),
("Data-subject rights process", r"right\s+to\s+access|erasure"),
("72-hour breach notice plan", r"72\s*hour"),
],
"EU_AI_ACT":[
("High-risk AI DPIA", r"risk\s+assessment"),
("Training data governance", r"data\s+governance"),
],
"ISO_27001":[
("Annex A control list", r"annex\s*a"),
("Statement of Applicability", r"statement\s+of\s+applicability"),
]
}
def run_check(text,framework):
# print(text,framework) #array from me aata hai framework
results={}
for fw in framework:
results[fw]=[] #store particular fw data
# one work as label & one work as pattern e.g==>label: Training data governance pattern: data\s+governance
for label, pattern in RULES[fw]:
match = re.search(pattern, text, re.I) # re.I = re.IGNORECASE
results[fw].append((label, bool(match)))
return results
AI_REPORT_PROMPT = """
You are an expert compliance consultant with deep experience in GDPR, the EU AI Act, ISO 27001, and related global data‑privacy and security standards. You have just received a concise checklist summary showing, for each framework, how many controls passed and which specific items failed.
Your task is to produce a **clear, actionable report** tailored to a technical audience. Structure it as follows:
1. **Executive Summary**
- One or two sentences on overall compliance posture
- Highest‑level takeaways
2. **Key Issues Identified**
- For each framework with failures, list:
- The specific failed control(s) by label
- A brief description of why that control matters
- Use bullet points and group by framework
3. **Redemption Strategies**
- For each key issue above, recommend a **concrete next step** or mitigation strategy
- Prioritize actions by risk/impact (e.g., “High‑priority: Encrypt data at rest to meet ISO 27001 A.10.1”)
4. **Additional Resources & Next Steps**
- A short paragraph on how deeper expert review can streamline remediation
- A call‑out promoting AnkTechSol’s professional compliance consulting (e.g., “For a full policy audit, tailored gap analysis, and implementation roadmap, visit anktechsol.com or contact our team at [contact link].”)
5. **Appendix (Optional)**
- Raw bullet list of “Framework: X passed/total, Y failed/total”
Make sure to:
- Use clear headings (`## Executive Summary`, etc.)
- Keep each section brief but substantive (no more than 4–6 bullets per section)
- Write in a confident, consultative tone
Here are the raw results to incorporate:
{bullet}
Generate the report as markdown.
"""
HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
def generate_report(prompt,max_tokens=600):
token = os.getenv("HF_TOKEN")
if not token:
raise EnvironmentError("token is not found in env issue")
client = InferenceClient(
provider="together",
api_key=token,
)
try:
response = client.chat.completions.create(
model=HF_MODEL,
messages=[ {
"role": "user",
"content": prompt
}]
)
return response.choices[0].message.content
except Exception as e:
return "Error: Failed to generate report."
def fetchText(url):
try:
response = req.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
main_content = soup.find('main')
if main_content:
text = main_content.get_text(separator='\n', strip=True)
else:
text = soup.body.get_text(separator='\n', strip=True)
return text.strip(), None # No error
except Exception as e:
return "", f"Error fetching URL: {e}"
__all__=["RULES","run_check","AI_REPORT_PROMPT","generate_report","fetchText"]