Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| import gradio as gr | |
| from datetime import datetime | |
| from transformers import pipeline | |
| qa_pipeline = pipeline("question-answering", model="deepset/deberta-v3-large-squad2") | |
| # Function to analyze each section with prompts | |
| def prompt_based_analysis(question, context): | |
| try: | |
| response = qa_pipeline(question=question, context=context) | |
| return response['answer'] | |
| except Exception as e: | |
| return f"Error analyzing data: {str(e)}" | |
| # Function to analyze audit data for GDPR compliance using the GDPR framework | |
| def analyze_gdpr_compliance(audit_data): | |
| # GDPR Principles | |
| principles = { | |
| "Lawfulness, Fairness, and Transparency": "Ensure that data processing is done lawfully, fairly, and in a transparent manner.", | |
| "Purpose Limitation": "Ensure that data collected is for specified, explicit, and legitimate purposes.", | |
| "Data Minimization": "Ensure that data collected is adequate, relevant, and limited to what is necessary.", | |
| "Accuracy": "Ensure that personal data is accurate and up to date.", | |
| "Storage Limitation": "Ensure that personal data is kept no longer than necessary.", | |
| "Integrity and Confidentiality": "Ensure that personal data is processed securely to prevent unauthorized access, loss, or destruction." | |
| } | |
| # Prompt context from audit data | |
| system_info_context = f"OS Version: {audit_data.get('os_version', 'Unknown')}, Architecture: {audit_data.get('architecture', 'Unknown')}, Memory: {audit_data.get('memory', 'Unknown')}" | |
| disk_usage_context = f"Disk Usage: {audit_data.get('disk_usage', {}).get('usage_percent', 'Unknown')}%" | |
| network_info_context = f"Interfaces: {', '.join(audit_data.get('network_info', {}).get('interfaces', []))}" | |
| security_measures_context = f"Encryption: {audit_data.get('security_measures', {}).get('encryption', False)}, Data Anonymization: {audit_data.get('security_measures', {}).get('data_anonymization', False)}" | |
| processes_context = f"Running Processes: {', '.join(audit_data.get('running_processes', []))}" | |
| software_inventory_context = f"Software Installed: {', '.join(audit_data.get('software_inventory', []))}" | |
| # Prompts for GDPR principles applied to sections | |
| system_info_analysis = prompt_based_analysis( | |
| f"Evaluate the system information in terms of GDPR compliance focusing on {principles['Lawfulness, Fairness, and Transparency']} and {principles['Purpose Limitation']}.", system_info_context | |
| ) | |
| disk_usage_analysis = prompt_based_analysis( | |
| f"Evaluate the disk usage under the {principles['Storage Limitation']} principle and ensure compliance.", disk_usage_context | |
| ) | |
| network_info_analysis = prompt_based_analysis( | |
| f"Evaluate the network interfaces with respect to {principles['Integrity and Confidentiality']}, identifying any potential security risks.", network_info_context | |
| ) | |
| security_measures_analysis = prompt_based_analysis( | |
| f"Analyze the encryption and anonymization methods under the {principles['Integrity and Confidentiality']} principle, identifying any weaknesses.", security_measures_context | |
| ) | |
| processes_analysis = prompt_based_analysis( | |
| f"Evaluate the running processes for GDPR compliance under {principles['Lawfulness, Fairness, and Transparency']}, focusing on unauthorized or risky processes.", processes_context | |
| ) | |
| software_inventory_analysis = prompt_based_analysis( | |
| f"Assess the installed software for GDPR compliance focusing on {principles['Accuracy']} and {principles['Integrity and Confidentiality']}.", software_inventory_context | |
| ) | |
| # Findings organized by section | |
| findings = { | |
| "system_info": system_info_analysis, | |
| "disk_usage": disk_usage_analysis, | |
| "network_info": network_info_analysis, | |
| "security_measures": security_measures_analysis, | |
| "running_processes": processes_analysis, | |
| "software_inventory": software_inventory_analysis, | |
| } | |
| # Detailed Recommendations based on the findings | |
| recommendations = [] | |
| if "Unknown" in system_info_context: | |
| recommendations.append("Review System Information: Ensure that the OS, architecture, and memory configurations are well documented and up to date in accordance with GDPR transparency requirements.") | |
| if 'not encrypted' in security_measures_analysis.lower() or 'no encryption' in security_measures_analysis.lower(): | |
| recommendations.append("Implement Encryption: Ensure that both stored and transmitted data are encrypted to meet GDPR security requirements.") | |
| if 'not anonymized' in security_measures_analysis.lower(): | |
| recommendations.append("Implement Data Anonymization: Ensure that sensitive data is anonymized during storage to comply with GDPR's confidentiality principle.") | |
| if 'outdated' in software_inventory_analysis.lower() or 'vulnerable' in software_inventory_analysis.lower(): | |
| recommendations.append("Update Software: Ensure that all installed software is up to date and free from known vulnerabilities to maintain the integrity and confidentiality of personal data.") | |
| if 'vulnerable processes' in processes_analysis.lower() or 'unauthorized processes' in processes_analysis.lower(): | |
| recommendations.append("Review Running Processes: Regularly audit running processes and ensure that no unauthorized or risky processes are running to maintain GDPR compliance.") | |
| return findings, recommendations | |
| # Generate GDPR Compliance Report with advanced prompts for each section using the GDPR framework | |
| def generate_gdpr_report(audit_data, company_name="Company Name", system_name="System Name"): | |
| findings, recommendations = analyze_gdpr_compliance(audit_data) | |
| report_content = """ | |
| GDPR Compliance Evaluation Report | |
| Title: GDPR Compliance Evaluation Report | |
| Date: {date} | |
| Prepared by: [Your Name] | |
| For: {company_name} | |
| Executive Summary: | |
| This report evaluates the compliance of {company_name} with the General Data Protection Regulation (GDPR). | |
| Based on the system audit and analysis of data handling processes, this report provides findings, identifies compliance gaps, | |
| and suggests recommendations to enhance GDPR adherence based on the key principles of GDPR, such as Lawfulness, Fairness, Transparency, Purpose Limitation, Data Minimization, Accuracy, Storage Limitation, and Integrity & Confidentiality. | |
| Key Findings: | |
| System Information Analysis: | |
| {system_info} | |
| Disk Usage Analysis: | |
| {disk_usage} | |
| Network Info Analysis: | |
| {network_info} | |
| Security Measures Analysis: | |
| {security_measures} | |
| Running Processes Analysis: | |
| {running_processes} | |
| Software Inventory Analysis: | |
| {software_inventory} | |
| Recommendations: | |
| {recommendations} | |
| Conclusion: | |
| The analysis shows that while {company_name} has some strong protective measures in place, there are several areas for improvement. Implementing the suggested recommendations will enhance {company_name}'s compliance with GDPR and reduce potential risks of non-compliance. | |
| References: | |
| - GDPR Regulation (EU) 2016/679 | |
| - System Audit Report, {date} | |
| """.format( | |
| date=datetime.now().strftime('%Y-%m-%d'), | |
| company_name=company_name, | |
| system_info=findings['system_info'], | |
| disk_usage=findings['disk_usage'], | |
| network_info=findings['network_info'], | |
| security_measures=findings['security_measures'], | |
| running_processes=findings['running_processes'], | |
| software_inventory=findings['software_inventory'], | |
| recommendations=''.join(f'- {rec}\n' for rec in recommendations) | |
| ) | |
| return report_content | |
| # Analyze CSV file input and convert it to JSON-like dictionary for processing | |
| def analyze_csv_file(file_obj): | |
| # Read the CSV file into a pandas DataFrame | |
| try: | |
| df = pd.read_csv(file_obj) | |
| except Exception as e: | |
| raise ValueError(f"Error reading CSV file: {str(e)}") | |
| # Convert DataFrame to dictionary for processing | |
| audit_data = df.to_dict(orient='records') | |
| # Convert the dictionary to a JSON-like structure suitable for analysis | |
| audit_data_json = {} | |
| for record in audit_data: | |
| audit_data_json.update(record) | |
| return audit_data_json | |
| # Gradio Interface | |
| with gr.Blocks() as demo: | |
| with gr.Column(): | |
| gr.Markdown("# GDPR Compliance Evaluation\n### Upload Audit Data in CSV Format") | |
| csv_file = gr.File(label="Upload CSV file") | |
| gdpr_compliance = gr.Textbox(lines=10, placeholder="GDPR Compliance Analysis...", label="GDPR Compliance Analysis") | |
| def run_compliance_checks(csv_file): | |
| if csv_file is None: | |
| return "No file uploaded" | |
| audit_data = analyze_csv_file(csv_file) | |
| gdpr_report = generate_gdpr_report(audit_data) | |
| return gdpr_report | |
| check_compliance_btn = gr.Button("Run Compliance Checks") | |
| check_compliance_btn.click(run_compliance_checks, inputs=[csv_file], outputs=[gdpr_compliance]) | |
| clear_btn = gr.Button("Clear") | |
| clear_btn.click(lambda: "", None, [gdpr_compliance]) | |
| demo.launch(share=True) | |