AnupalanKarta / src /compliance_lib.py
anmol11p's picture
fix token problem
b57a279 verified
raw
history blame
4.16 kB
import re
import os
import requests as req
from bs4 import BeautifulSoup
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import torch
RULES = {
"GDPR": [
("Lawful basis documented", r"lawful\s+basis"),
("Data-subject rights process", r"right\s+to\s+access|erasure"),
("72-hour breach notice plan", r"72\s*hour"),
],
"EU_AI_ACT": [
("High-risk AI DPIA", r"risk\s+assessment"),
("Training data governance", r"data\s+governance"),
],
"ISO_27001": [
("Annex A control list", r"annex\s*a"),
("Statement of Applicability", r"statement\s+of\s+applicability"),
]
}
def run_check(text, framework):
results = {}
for fw in framework:
results[fw] = []
for label, pattern in RULES[fw]:
match = re.search(pattern, text, re.I)
results[fw].append((label, bool(match)))
return results
AI_REPORT_PROMPT = """
You are an expert compliance consultant with deep experience in GDPR, the EU AI Act, ISO 27001, and related global data‑privacy and security standards. You have just received a concise checklist summary showing, for each framework, how many controls passed and which specific items failed.
Your task is to produce a **clear, actionable report** tailored to a technical audience. Structure it as follows:
1. **Executive Summary**
- One or two sentences on overall compliance posture
- Highest‑level takeaways
2. **Key Issues Identified**
- For each framework with failures, list:
- The specific failed control(s) by label
- A brief description of why that control matters
- Use bullet points and group by framework
3. **Redemption Strategies**
- For each key issue above, recommend a **concrete next step** or mitigation strategy
- Prioritize actions by risk/impact (e.g., “High‑priority: Encrypt data at rest to meet ISO 27001 A.10.1”)
4. **Additional Resources & Next Steps**
- A short paragraph on how deeper expert review can streamline remediation
- A call‑out promoting AnkTechSol’s professional compliance consulting (e.g., “For a full policy audit, tailored gap analysis, and implementation roadmap, visit anktechsol.com or contact our team at [contact link].”)
5. **Appendix (Optional)**
- Raw bullet list of “Framework: X passed/total, Y failed/total”
Make sure to:
- Use clear headings (`## Executive Summary`, etc.)
- Keep each section brief but substantive (no more than 4–6 bullets per section)
- Write in a confident, consultative tone
Here are the raw results to incorporate:
{bullet}
Generate the report as markdown.
"""
HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
# Load the text generation pipeline
def load_pipeline():
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
model = AutoModelForCausalLM.from_pretrained(
HF_MODEL,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto"
)
return pipe
generator = load_pipeline()
def generate_report(prompt, max_tokens=600):
try:
response = generator(
prompt,
max_new_tokens=max_tokens,
do_sample=True,
temperature=0.7,
top_p=0.95,
return_full_text=False
)
return response[0]["generated_text"]
except Exception as e:
return f"Error: {e}"
def fetchText(url):
try:
response = req.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
main_content = soup.find('main')
if main_content:
text = main_content.get_text(separator='\n', strip=True)
else:
text = soup.body.get_text(separator='\n', strip=True)
return text.strip(), None
except Exception as e:
return "", f"Error fetching URL: {e}"
# Exported functions
__all__ = ["RULES", "run_check", "AI_REPORT_PROMPT", "generate_report", "fetchText"]