Spaces:

anktechsol
/

AnupalanKarta

Paused

App Files Files Community

AnupalanKarta / src /compliance_lib.py

anmol11p

fix token problem

b57a279 verified 6 months ago

raw

history blame

4.16 kB

	import re
	import os
	import requests as req
	from bs4 import BeautifulSoup
	from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
	import torch

	RULES = {
	"GDPR": [
	("Lawful basis documented", r"lawful\s+basis"),
	("Data-subject rights process", r"right\s+to\s+access\|erasure"),
	("72-hour breach notice plan", r"72\s*hour"),
	],
	"EU_AI_ACT": [
	("High-risk AI DPIA", r"risk\s+assessment"),
	("Training data governance", r"data\s+governance"),
	],
	"ISO_27001": [
	("Annex A control list", r"annex\s*a"),
	("Statement of Applicability", r"statement\s+of\s+applicability"),
	]
	}


	def run_check(text, framework):
	results = {}
	for fw in framework:
	results[fw] = []
	for label, pattern in RULES[fw]:
	match = re.search(pattern, text, re.I)
	results[fw].append((label, bool(match)))
	return results


	AI_REPORT_PROMPT = """
	You are an expert compliance consultant with deep experience in GDPR, the EU AI Act, ISO 27001, and related global data‑privacy and security standards. You have just received a concise checklist summary showing, for each framework, how many controls passed and which specific items failed.

	Your task is to produce a clear, actionable report tailored to a technical audience. Structure it as follows:

	1. Executive Summary
	- One or two sentences on overall compliance posture
	- Highest‑level takeaways

	2. Key Issues Identified
	- For each framework with failures, list:
	- The specific failed control(s) by label
	- A brief description of why that control matters
	- Use bullet points and group by framework

	3. Redemption Strategies
	- For each key issue above, recommend a concrete next step or mitigation strategy
	- Prioritize actions by risk/impact (e.g., “High‑priority: Encrypt data at rest to meet ISO 27001 A.10.1”)

	4. Additional Resources & Next Steps
	- A short paragraph on how deeper expert review can streamline remediation
	- A call‑out promoting AnkTechSol’s professional compliance consulting (e.g., “For a full policy audit, tailored gap analysis, and implementation roadmap, visit anktechsol.com or contact our team at [contact link].”)

	5. Appendix (Optional)
	- Raw bullet list of “Framework: X passed/total, Y failed/total”

	Make sure to:
	- Use clear headings (`## Executive Summary`, etc.)
	- Keep each section brief but substantive (no more than 4–6 bullets per section)
	- Write in a confident, consultative tone

	Here are the raw results to incorporate:

	{bullet}

	Generate the report as markdown.
	"""

	HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"

	# Load the text generation pipeline
	def load_pipeline():
	tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
	model = AutoModelForCausalLM.from_pretrained(
	HF_MODEL,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)
	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	device_map="auto"
	)
	return pipe

	generator = load_pipeline()


	def generate_report(prompt, max_tokens=600):
	try:
	response = generator(
	prompt,
	max_new_tokens=max_tokens,
	do_sample=True,
	temperature=0.7,
	top_p=0.95,
	return_full_text=False
	)
	return response[0]["generated_text"]
	except Exception as e:
	return f"Error: {e}"


	def fetchText(url):
	try:
	response = req.get(url)
	response.raise_for_status()
	soup = BeautifulSoup(response.text, 'html.parser')
	main_content = soup.find('main')
	if main_content:
	text = main_content.get_text(separator='\n', strip=True)
	else:
	text = soup.body.get_text(separator='\n', strip=True)
	return text.strip(), None
	except Exception as e:
	return "", f"Error fetching URL: {e}"

	# Exported functions
	__all__ = ["RULES", "run_check", "AI_REPORT_PROMPT", "generate_report", "fetchText"]