Spaces:
Paused
Paused
| import re | |
| import os | |
| import requests as req | |
| from bs4 import BeautifulSoup | |
| from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| RULES = { | |
| "GDPR": [ | |
| ("Lawful basis documented", r"lawful\s+basis"), | |
| ("Data-subject rights process", r"right\s+to\s+access|erasure"), | |
| ("72-hour breach notice plan", r"72\s*hour"), | |
| ], | |
| "EU_AI_ACT": [ | |
| ("High-risk AI DPIA", r"risk\s+assessment"), | |
| ("Training data governance", r"data\s+governance"), | |
| ], | |
| "ISO_27001": [ | |
| ("Annex A control list", r"annex\s*a"), | |
| ("Statement of Applicability", r"statement\s+of\s+applicability"), | |
| ] | |
| } | |
| def run_check(text, framework): | |
| results = {} | |
| for fw in framework: | |
| results[fw] = [] | |
| for label, pattern in RULES[fw]: | |
| match = re.search(pattern, text, re.I) | |
| results[fw].append((label, bool(match))) | |
| return results | |
| AI_REPORT_PROMPT = """ | |
| You are an expert compliance consultant with deep experience in GDPR, the EU AI Act, ISO 27001, and related global data‑privacy and security standards. You have just received a concise checklist summary showing, for each framework, how many controls passed and which specific items failed. | |
| Your task is to produce a **clear, actionable report** tailored to a technical audience. Structure it as follows: | |
| 1. **Executive Summary** | |
| - One or two sentences on overall compliance posture | |
| - Highest‑level takeaways | |
| 2. **Key Issues Identified** | |
| - For each framework with failures, list: | |
| - The specific failed control(s) by label | |
| - A brief description of why that control matters | |
| - Use bullet points and group by framework | |
| 3. **Redemption Strategies** | |
| - For each key issue above, recommend a **concrete next step** or mitigation strategy | |
| - Prioritize actions by risk/impact (e.g., “High‑priority: Encrypt data at rest to meet ISO 27001 A.10.1”) | |
| 4. **Additional Resources & Next Steps** | |
| - A short paragraph on how deeper expert review can streamline remediation | |
| - A call‑out promoting AnkTechSol’s professional compliance consulting (e.g., “For a full policy audit, tailored gap analysis, and implementation roadmap, visit anktechsol.com or contact our team at [contact link].”) | |
| 5. **Appendix (Optional)** | |
| - Raw bullet list of “Framework: X passed/total, Y failed/total” | |
| Make sure to: | |
| - Use clear headings (`## Executive Summary`, etc.) | |
| - Keep each section brief but substantive (no more than 4–6 bullets per section) | |
| - Write in a confident, consultative tone | |
| Here are the raw results to incorporate: | |
| {bullet} | |
| Generate the report as markdown. | |
| """ | |
| HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
| # Load the text generation pipeline | |
| def load_pipeline(): | |
| tokenizer = AutoTokenizer.from_pretrained(HF_MODEL) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| HF_MODEL, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| device_map="auto" | |
| ) | |
| return pipe | |
| generator = load_pipeline() | |
| def generate_report(prompt, max_tokens=600): | |
| try: | |
| response = generator( | |
| prompt, | |
| max_new_tokens=max_tokens, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.95, | |
| return_full_text=False | |
| ) | |
| return response[0]["generated_text"] | |
| except Exception as e: | |
| return f"Error: {e}" | |
| def fetchText(url): | |
| try: | |
| response = req.get(url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| main_content = soup.find('main') | |
| if main_content: | |
| text = main_content.get_text(separator='\n', strip=True) | |
| else: | |
| text = soup.body.get_text(separator='\n', strip=True) | |
| return text.strip(), None | |
| except Exception as e: | |
| return "", f"Error fetching URL: {e}" | |
| # Exported functions | |
| __all__ = ["RULES", "run_check", "AI_REPORT_PROMPT", "generate_report", "fetchText"] | |