import re from huggingface_hub import InferenceClient import os import requests as req from bs4 import BeautifulSoup import streamlit as st from dotenv import load_dotenv load_dotenv() RULES={ "GDPR":[ ("Lawful basis documented", r"lawful\s+basis"), ("Data-subject rights process", r"right\s+to\s+access|erasure"), ("72-hour breach notice plan", r"72\s*hour"), ], "EU_AI_ACT":[ ("High-risk AI DPIA", r"risk\s+assessment"), ("Training data governance", r"data\s+governance"), ], "ISO_27001":[ ("Annex A control list", r"annex\s*a"), ("Statement of Applicability", r"statement\s+of\s+applicability"), ] } def run_check(text,framework): # print(text,framework) #array from me aata hai framework results={} for fw in framework: results[fw]=[] #store particular fw data # one work as label & one work as pattern e.g==>label: Training data governance pattern: data\s+governance for label, pattern in RULES[fw]: match = re.search(pattern, text, re.I) # re.I = re.IGNORECASE results[fw].append((label, bool(match))) return results AI_REPORT_PROMPT = """ You are an expert compliance consultant with deep experience in GDPR, the EU AI Act, ISO 27001, and related global data‑privacy and security standards. You have just received a concise checklist summary showing, for each framework, how many controls passed and which specific items failed. Your task is to produce a **clear, actionable report** tailored to a technical audience. Structure it as follows: 1. **Executive Summary** - One or two sentences on overall compliance posture - Highest‑level takeaways 2. **Key Issues Identified** - For each framework with failures, list: - The specific failed control(s) by label - A brief description of why that control matters - Use bullet points and group by framework 3. **Redemption Strategies** - For each key issue above, recommend a **concrete next step** or mitigation strategy - Prioritize actions by risk/impact (e.g., “High‑priority: Encrypt data at rest to meet ISO 27001 A.10.1”) 4. **Additional Resources & Next Steps** - A short paragraph on how deeper expert review can streamline remediation - A call‑out promoting AnkTechSol’s professional compliance consulting (e.g., “For a full policy audit, tailored gap analysis, and implementation roadmap, visit anktechsol.com or contact our team at [contact link].”) 5. **Appendix (Optional)** - Raw bullet list of “Framework: X passed/total, Y failed/total” Make sure to: - Use clear headings (`## Executive Summary`, etc.) - Keep each section brief but substantive (no more than 4–6 bullets per section) - Write in a confident, consultative tone Here are the raw results to incorporate: {bullet} Generate the report as markdown. """ HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1" def generate_report(prompt,max_tokens=600): token = os.getenv("HF_TOKEN") if not token: raise EnvironmentError("token is not found in env issue") client = InferenceClient( provider="together", api_key=token, ) try: response = client.chat.completions.create( model=HF_MODEL, messages=[ { "role": "user", "content": prompt }] ) return response.choices[0].message.content except Exception as e: return "Error: Failed to generate report." def fetchText(url): try: response = req.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') main_content = soup.find('main') if main_content: text = main_content.get_text(separator='\n', strip=True) else: text = soup.body.get_text(separator='\n', strip=True) return text.strip(), None # No error except Exception as e: return "", f"Error fetching URL: {e}" __all__=["RULES","run_check","AI_REPORT_PROMPT","generate_report","fetchText"]