Spaces:

anktechsol
/

AnupalanKarta

Paused

App Files Files Community

Improve input flow and reporting

by anmol11p - opened Jul 16, 2025

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+108

-46

Files changed (1) hide show

src/compliance_lib.py +108 -46

src/compliance_lib.py CHANGED Viewed

@@ -1,58 +1,120 @@
-import requests, re, bs4, json, os
-from functools import lru_cache
-from datetime import datetime, timedelta
-# ---- 1. simple web scraper --------------------------------------------------
-HEADERS = {"User-Agent": "anupalankarta/1.0"}
-@lru_cache(maxsize=128)
-def fetch_text(url: str, ttl_hours: int = 12) -> str:
-    """Download & cache plain-text from a URL for `ttl_hours`."""
-    cache_path = f".cache_{re.sub(r'[^A-Za-z0-9]', '_', url)}.txt"
-    if os.path.exists(cache_path):
-        mtime = datetime.fromtimestamp(os.path.getmtime(cache_path))
-        if datetime.utcnow() - mtime < timedelta(hours=ttl_hours):
-            return open(cache_path, encoding="utf-8").read()
-    r = requests.get(url, headers=HEADERS, timeout=20)
-    soup = bs4.BeautifulSoup(r.text, "html.parser")
-    text = " ".join(t.get_text(" ", strip=True) for t in soup.find_all(["p", "li"]))
-    open(cache_path, "w", encoding="utf-8").write(text)
-    return text
-# ---- 2. minimal rule base ---------------------------------------------------
-RULES = {
-    "GDPR": [
-        ("Lawful basis documented", r"lawful\s+basis"),
         ("Data-subject rights process", r"right\s+to\s+access|erasure"),
         ("72-hour breach notice plan", r"72\s*hour"),
     ],
-    "EU_AI_Act": [
-        ("High-risk AI DPIA", r"risk\s+assessment"),
         ("Training data governance", r"data\s+governance"),
     ],
-    "ISO_27001": [
         ("Annex A control list", r"annex\s*a"),
         ("Statement of Applicability", r"statement\s+of\s+applicability"),
-    ],
-    # extend as needed
-}
-def run_check(text: str) -> dict:
-    """Return dict{framework: list[(item, pass_bool)]}."""
-    results = {}
-    for fw, tests in RULES.items():
-        framework_res = []
-        for label, pattern in tests:
-            framework_res.append((label, bool(re.search(pattern, text, re.I))))
-        results[fw] = framework_res
     return results
-# ---- 3. Hugging Face model wrapper -----------------------------------------
-from huggingface_hub import InferenceClient
 HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-def generate_report(prompt: str, max_tokens=600) -> str:
-    client = InferenceClient(model=HF_MODEL, token=os.environ.get("HF_TOKEN"))
-    return client.text_generation(prompt, max_new_tokens=max_tokens,
-                                  temperature=0.4, top_p=0.9)

+import re
+from huggingface_hub import InferenceClient
+import os
+import requests as req
+from bs4 import BeautifulSoup
+import streamlit as st
+from dotenv import load_dotenv
+load_dotenv()
+RULES={
+    "GDPR":[
+     ("Lawful basis documented", r"lawful\s+basis"),
         ("Data-subject rights process", r"right\s+to\s+access|erasure"),
         ("72-hour breach notice plan", r"72\s*hour"),
     ],
+    "EU_AI_ACT":[
+          ("High-risk AI DPIA", r"risk\s+assessment"),
         ("Training data governance", r"data\s+governance"),
     ],
+    "ISO_27001":[
         ("Annex A control list", r"annex\s*a"),
         ("Statement of Applicability", r"statement\s+of\s+applicability"),
+    ]
+    }
+def run_check(text,framework):
+    # print(text,framework) #array from me aata hai framework
+    results={}
+    for fw in framework:
+        results[fw]=[]  #store particular fw data
+            # one work as label & one work as pattern e.g==>label: Training data governance pattern: data\s+governance
+        for label, pattern in RULES[fw]:
+           match = re.search(pattern, text, re.I)  # re.I = re.IGNORECASE
+           results[fw].append((label, bool(match)))
     return results
+AI_REPORT_PROMPT = """
+You are an expert compliance consultant with deep experience in GDPR, the EU AI Act, ISO 27001, and related global data‑privacy and security standards. You have just received a concise checklist summary showing, for each framework, how many controls passed and which specific items failed.
+Your task is to produce a **clear, actionable report** tailored to a technical audience. Structure it as follows:
+1. **Executive Summary**
+   - One or two sentences on overall compliance posture
+   - Highest‑level takeaways
+2. **Key Issues Identified**
+   - For each framework with failures, list:
+     - The specific failed control(s) by label
+     - A brief description of why that control matters
+   - Use bullet points and group by framework
+3. **Redemption Strategies**
+   - For each key issue above, recommend a **concrete next step** or mitigation strategy
+   - Prioritize actions by risk/impact (e.g., “High‑priority: Encrypt data at rest to meet ISO 27001 A.10.1”)
+4. **Additional Resources & Next Steps**
+   - A short paragraph on how deeper expert review can streamline remediation
+   - A call‑out promoting AnkTechSol’s professional compliance consulting (e.g., “For a full policy audit, tailored gap analysis, and implementation roadmap, visit anktechsol.com or contact our team at [contact link].”)
+5. **Appendix (Optional)**
+   - Raw bullet list of “Framework: X passed/total, Y failed/total”
+Make sure to:
+- Use clear headings (`## Executive Summary`, etc.)
+- Keep each section brief but substantive (no more than 4–6 bullets per section)
+- Write in a confident, consultative tone
+Here are the raw results to incorporate:
+{bullet}
+Generate the report as markdown.
+"""
 HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+def generate_report(prompt,max_tokens=600):
+     token = os.getenv("HF_TOKEN")
+     if not token:
+        raise EnvironmentError("token is not found in env issue")
+     client = InferenceClient(
+    provider="together",
+    api_key=token,
+)
+     try:
+      response = client.chat.completions.create(
+            model=HF_MODEL,
+            messages=[ {
+            "role": "user",
+            "content": prompt
+        }]
+        )
+      return response.choices[0].message.content
+     except Exception as e:
+        return "Error: Failed to generate report."
+def fetchText(url):
+    try:
+        response = req.get(url)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        main_content = soup.find('main')
+        if main_content:
+            text = main_content.get_text(separator='\n', strip=True)
+        else:
+            text = soup.body.get_text(separator='\n', strip=True)
+        return text.strip(), None  # No error
+    except Exception as e:
+        return "", f"Error fetching URL: {e}"
+__all__=["RULES","run_check","AI_REPORT_PROMPT","generate_report","fetchText"]