Spaces:
Sleeping
Sleeping
| """ | |
| Utility functions for the AIBOM Generator. | |
| """ | |
| import json | |
| import logging | |
| import os | |
| import re | |
| import uuid | |
| from typing import Dict, List, Optional, Any, Union, Tuple | |
| from enum import Enum | |
| logger = logging.getLogger(__name__) | |
| # Validation severity levels | |
| class ValidationSeverity(Enum): | |
| ERROR = "error" | |
| WARNING = "warning" | |
| INFO = "info" | |
| # Field classification based on documentation value (silently aligned with SPDX) | |
| FIELD_CLASSIFICATION = { | |
| # Critical fields (silently aligned with SPDX mandatory fields) | |
| "bomFormat": {"tier": "critical", "weight": 3, "category": "required_fields"}, | |
| "specVersion": {"tier": "critical", "weight": 3, "category": "required_fields"}, | |
| "serialNumber": {"tier": "critical", "weight": 3, "category": "required_fields"}, | |
| "version": {"tier": "critical", "weight": 3, "category": "required_fields"}, | |
| "name": {"tier": "critical", "weight": 4, "category": "component_basic"}, | |
| "downloadLocation": {"tier": "critical", "weight": 4, "category": "external_references"}, | |
| "primaryPurpose": {"tier": "critical", "weight": 3, "category": "metadata"}, | |
| "suppliedBy": {"tier": "critical", "weight": 4, "category": "metadata"}, | |
| # Important fields (aligned with key SPDX optional fields) | |
| "type": {"tier": "important", "weight": 2, "category": "component_basic"}, | |
| "purl": {"tier": "important", "weight": 4, "category": "component_basic"}, | |
| "description": {"tier": "important", "weight": 4, "category": "component_basic"}, | |
| "licenses": {"tier": "important", "weight": 4, "category": "component_basic"}, | |
| "energyConsumption": {"tier": "important", "weight": 3, "category": "component_model_card"}, | |
| "hyperparameter": {"tier": "important", "weight": 3, "category": "component_model_card"}, | |
| "limitation": {"tier": "important", "weight": 3, "category": "component_model_card"}, | |
| "safetyRiskAssessment": {"tier": "important", "weight": 3, "category": "component_model_card"}, | |
| "typeOfModel": {"tier": "important", "weight": 3, "category": "component_model_card"}, | |
| # Supplementary fields (aligned with remaining SPDX optional fields) | |
| "modelExplainability": {"tier": "supplementary", "weight": 2, "category": "component_model_card"}, | |
| "standardCompliance": {"tier": "supplementary", "weight": 2, "category": "metadata"}, | |
| "domain": {"tier": "supplementary", "weight": 2, "category": "metadata"}, | |
| "energyQuantity": {"tier": "supplementary", "weight": 2, "category": "component_model_card"}, | |
| "energyUnit": {"tier": "supplementary", "weight": 2, "category": "component_model_card"}, | |
| "informationAboutTraining": {"tier": "supplementary", "weight": 2, "category": "component_model_card"}, | |
| "informationAboutApplication": {"tier": "supplementary", "weight": 2, "category": "component_model_card"}, | |
| "metric": {"tier": "supplementary", "weight": 2, "category": "component_model_card"}, | |
| "metricDecisionThreshold": {"tier": "supplementary", "weight": 2, "category": "component_model_card"}, | |
| "modelDataPreprocessing": {"tier": "supplementary", "weight": 2, "category": "component_model_card"}, | |
| "autonomyType": {"tier": "supplementary", "weight": 1, "category": "metadata"}, | |
| "useSensitivePersonalInformation": {"tier": "supplementary", "weight": 2, "category": "component_model_card"} | |
| } | |
| # Completeness profiles (silently aligned with SPDX requirements) | |
| COMPLETENESS_PROFILES = { | |
| "basic": { | |
| "description": "Minimal fields required for identification", | |
| "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name"], | |
| "minimum_score": 40 | |
| }, | |
| "standard": { | |
| "description": "Comprehensive fields for proper documentation", | |
| "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name", | |
| "downloadLocation", "primaryPurpose", "suppliedBy"], | |
| "minimum_score": 70 | |
| }, | |
| "advanced": { | |
| "description": "Extensive documentation for maximum transparency", | |
| "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name", | |
| "downloadLocation", "primaryPurpose", "suppliedBy", | |
| "type", "purl", "description", "licenses", "hyperparameter", "limitation", | |
| "energyConsumption", "safetyRiskAssessment", "typeOfModel"], | |
| "minimum_score": 85 | |
| } | |
| } | |
| # Validation messages framed as best practices | |
| VALIDATION_MESSAGES = { | |
| "name": { | |
| "missing": "Missing critical field: name - essential for model identification", | |
| "recommendation": "Add a descriptive name for the model" | |
| }, | |
| "downloadLocation": { | |
| "missing": "Missing critical field: downloadLocation - needed for artifact retrieval", | |
| "recommendation": "Add information about where the model can be downloaded" | |
| }, | |
| "primaryPurpose": { | |
| "missing": "Missing critical field: primaryPurpose - important for understanding model intent", | |
| "recommendation": "Add information about the primary purpose of this model" | |
| }, | |
| "suppliedBy": { | |
| "missing": "Missing critical field: suppliedBy - needed for provenance tracking", | |
| "recommendation": "Add information about who supplied this model" | |
| }, | |
| "energyConsumption": { | |
| "missing": "Missing important field: energyConsumption - helpful for environmental impact assessment", | |
| "recommendation": "Consider documenting energy consumption metrics for better transparency" | |
| }, | |
| "hyperparameter": { | |
| "missing": "Missing important field: hyperparameter - valuable for reproducibility", | |
| "recommendation": "Document key hyperparameters used in training" | |
| }, | |
| "limitation": { | |
| "missing": "Missing important field: limitation - important for responsible use", | |
| "recommendation": "Document known limitations of the model to guide appropriate usage" | |
| } | |
| } | |
| def setup_logging(level=logging.INFO): | |
| logging.basicConfig( | |
| level=level, | |
| format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
| datefmt="%Y-%m-%d %H:%M:%S", | |
| ) | |
| def ensure_directory(directory_path): | |
| if not os.path.exists(directory_path): | |
| os.makedirs(directory_path) | |
| return directory_path | |
| def generate_uuid(): | |
| return str(uuid.uuid4()) | |
| def normalize_license_id(license_text): | |
| license_mappings = { | |
| "mit": "MIT", | |
| "apache": "Apache-2.0", | |
| "apache 2": "Apache-2.0", | |
| "apache 2.0": "Apache-2.0", | |
| "apache-2": "Apache-2.0", | |
| "apache-2.0": "Apache-2.0", | |
| "gpl": "GPL-3.0-only", | |
| "gpl-3": "GPL-3.0-only", | |
| "gpl-3.0": "GPL-3.0-only", | |
| "gpl3": "GPL-3.0-only", | |
| "gpl v3": "GPL-3.0-only", | |
| "gpl-2": "GPL-2.0-only", | |
| "gpl-2.0": "GPL-2.0-only", | |
| "gpl2": "GPL-2.0-only", | |
| "gpl v2": "GPL-2.0-only", | |
| "lgpl": "LGPL-3.0-only", | |
| "lgpl-3": "LGPL-3.0-only", | |
| "lgpl-3.0": "LGPL-3.0-only", | |
| "bsd": "BSD-3-Clause", | |
| "bsd-3": "BSD-3-Clause", | |
| "bsd-3-clause": "BSD-3-Clause", | |
| "bsd-2": "BSD-2-Clause", | |
| "bsd-2-clause": "BSD-2-Clause", | |
| "cc": "CC-BY-4.0", | |
| "cc-by": "CC-BY-4.0", | |
| "cc-by-4.0": "CC-BY-4.0", | |
| "cc-by-sa": "CC-BY-SA-4.0", | |
| "cc-by-sa-4.0": "CC-BY-SA-4.0", | |
| "cc-by-nc": "CC-BY-NC-4.0", | |
| "cc-by-nc-4.0": "CC-BY-NC-4.0", | |
| "cc0": "CC0-1.0", | |
| "cc0-1.0": "CC0-1.0", | |
| "public domain": "CC0-1.0", | |
| "unlicense": "Unlicense", | |
| "proprietary": "NONE", | |
| "commercial": "NONE", | |
| } | |
| if not license_text: | |
| return None | |
| normalized = re.sub(r'[^\w\s-]', '', license_text.lower()) | |
| if normalized in license_mappings: | |
| return license_mappings[normalized] | |
| for key, value in license_mappings.items(): | |
| if key in normalized: | |
| return value | |
| return license_text | |
| def validate_spdx(license_entry): | |
| spdx_licenses = [ | |
| "MIT", "Apache-2.0", "GPL-3.0-only", "GPL-2.0-only", "LGPL-3.0-only", | |
| "BSD-3-Clause", "BSD-2-Clause", "CC-BY-4.0", "CC-BY-SA-4.0", "CC0-1.0", | |
| "Unlicense", "NONE" | |
| ] | |
| if isinstance(license_entry, list): | |
| return all(lic in spdx_licenses for lic in license_entry) | |
| return license_entry in spdx_licenses | |
| def check_field_in_aibom(aibom: Dict[str, Any], field: str) -> bool: | |
| """ | |
| Check if a field is present in the AIBOM. | |
| Args: | |
| aibom: The AIBOM to check | |
| field: The field name to check | |
| Returns: | |
| True if the field is present, False otherwise | |
| """ | |
| # Check in root level | |
| if field in aibom: | |
| return True | |
| # Check in metadata | |
| if "metadata" in aibom: | |
| metadata = aibom["metadata"] | |
| if field in metadata: | |
| return True | |
| # Check in metadata properties | |
| if "properties" in metadata: | |
| for prop in metadata["properties"]: | |
| if prop.get("name") == f"spdx:{field}" or prop.get("name") == field: | |
| return True | |
| # Check in components | |
| if "components" in aibom and aibom["components"]: | |
| component = aibom["components"][0] # Use first component | |
| if field in component: | |
| return True | |
| # Check in component properties | |
| if "properties" in component: | |
| for prop in component["properties"]: | |
| if prop.get("name") == f"spdx:{field}" or prop.get("name") == field: | |
| return True | |
| # Check in model card | |
| if "modelCard" in component: | |
| model_card = component["modelCard"] | |
| if field in model_card: | |
| return True | |
| # Check in model parameters | |
| if "modelParameters" in model_card: | |
| if field in model_card["modelParameters"]: | |
| return True | |
| # Check in model parameters properties | |
| if "properties" in model_card["modelParameters"]: | |
| for prop in model_card["modelParameters"]["properties"]: | |
| if prop.get("name") == f"spdx:{field}" or prop.get("name") == field: | |
| return True | |
| # Check in considerations | |
| if "considerations" in model_card: | |
| if field in model_card["considerations"]: | |
| return True | |
| # Check in specific consideration sections | |
| for section in ["technicalLimitations", "ethicalConsiderations", "environmentalConsiderations"]: | |
| if section in model_card["considerations"]: | |
| if field == "limitation" and section == "technicalLimitations": | |
| return True | |
| if field == "safetyRiskAssessment" and section == "ethicalConsiderations": | |
| return True | |
| if field == "energyConsumption" and section == "environmentalConsiderations": | |
| return True | |
| # Check in external references | |
| if field == "downloadLocation" and "externalReferences" in aibom: | |
| for ref in aibom["externalReferences"]: | |
| if ref.get("type") == "distribution": | |
| return True | |
| return False | |
| def determine_completeness_profile(aibom: Dict[str, Any], score: float) -> Dict[str, Any]: | |
| """ | |
| Determine which completeness profile the AIBOM satisfies. | |
| Args: | |
| aibom: The AIBOM to check | |
| score: The calculated score | |
| Returns: | |
| Dictionary with profile information | |
| """ | |
| satisfied_profiles = [] | |
| for profile_name, profile in COMPLETENESS_PROFILES.items(): | |
| # Check if all required fields are present | |
| all_required_present = all(check_field_in_aibom(aibom, field) for field in profile["required_fields"]) | |
| # Check if score meets minimum | |
| score_sufficient = score >= profile["minimum_score"] | |
| if all_required_present and score_sufficient: | |
| satisfied_profiles.append(profile_name) | |
| # Return the highest satisfied profile | |
| if "advanced" in satisfied_profiles: | |
| return { | |
| "name": "advanced", | |
| "description": COMPLETENESS_PROFILES["advanced"]["description"], | |
| "satisfied": True | |
| } | |
| elif "standard" in satisfied_profiles: | |
| return { | |
| "name": "standard", | |
| "description": COMPLETENESS_PROFILES["standard"]["description"], | |
| "satisfied": True | |
| } | |
| elif "basic" in satisfied_profiles: | |
| return { | |
| "name": "basic", | |
| "description": COMPLETENESS_PROFILES["basic"]["description"], | |
| "satisfied": True | |
| } | |
| else: | |
| return { | |
| "name": "incomplete", | |
| "description": "Does not satisfy any completeness profile", | |
| "satisfied": False | |
| } | |
| def apply_completeness_penalties(original_score: float, missing_fields: Dict[str, List[str]]) -> Dict[str, Any]: | |
| """ | |
| Apply penalties based on missing critical fields. | |
| Args: | |
| original_score: The original calculated score | |
| missing_fields: Dictionary of missing fields by tier | |
| Returns: | |
| Dictionary with penalty information | |
| """ | |
| # Count missing fields by tier | |
| missing_critical_count = len(missing_fields["critical"]) | |
| missing_important_count = len(missing_fields["important"]) | |
| # Calculate penalty based on missing critical fields | |
| if missing_critical_count > 3: | |
| penalty_factor = 0.8 # 20% penalty | |
| penalty_reason = "Multiple critical fields missing" | |
| elif missing_critical_count > 0: | |
| penalty_factor = 0.9 # 10% penalty | |
| penalty_reason = "Some critical fields missing" | |
| elif missing_important_count > 5: | |
| penalty_factor = 0.95 # 5% penalty | |
| penalty_reason = "Several important fields missing" | |
| else: | |
| # No penalty | |
| penalty_factor = 1.0 | |
| penalty_reason = None | |
| adjusted_score = original_score * penalty_factor | |
| return { | |
| "adjusted_score": round(adjusted_score, 1), # Round to 1 decimal place | |
| "penalty_applied": penalty_reason is not None, | |
| "penalty_reason": penalty_reason, | |
| "penalty_factor": penalty_factor | |
| } | |
| def generate_field_recommendations(missing_fields: Dict[str, List[str]]) -> List[Dict[str, Any]]: | |
| """ | |
| Generate recommendations for missing fields. | |
| Args: | |
| missing_fields: Dictionary of missing fields by tier | |
| Returns: | |
| List of recommendations | |
| """ | |
| recommendations = [] | |
| # Prioritize critical fields | |
| for field in missing_fields["critical"]: | |
| if field in VALIDATION_MESSAGES: | |
| recommendations.append({ | |
| "priority": "high", | |
| "field": field, | |
| "message": VALIDATION_MESSAGES[field]["missing"], | |
| "recommendation": VALIDATION_MESSAGES[field]["recommendation"] | |
| }) | |
| else: | |
| recommendations.append({ | |
| "priority": "high", | |
| "field": field, | |
| "message": f"Missing critical field: {field}", | |
| "recommendation": f"Add {field} to improve documentation completeness" | |
| }) | |
| # Then important fields | |
| for field in missing_fields["important"]: | |
| if field in VALIDATION_MESSAGES: | |
| recommendations.append({ | |
| "priority": "medium", | |
| "field": field, | |
| "message": VALIDATION_MESSAGES[field]["missing"], | |
| "recommendation": VALIDATION_MESSAGES[field]["recommendation"] | |
| }) | |
| else: | |
| recommendations.append({ | |
| "priority": "medium", | |
| "field": field, | |
| "message": f"Missing important field: {field}", | |
| "recommendation": f"Consider adding {field} for better documentation" | |
| }) | |
| # Finally supplementary fields (limit to top 5) | |
| supplementary_count = 0 | |
| for field in missing_fields["supplementary"]: | |
| if supplementary_count >= 5: | |
| break | |
| recommendations.append({ | |
| "priority": "low", | |
| "field": field, | |
| "message": f"Missing supplementary field: {field}", | |
| "recommendation": f"Consider adding {field} for comprehensive documentation" | |
| }) | |
| supplementary_count += 1 | |
| return recommendations | |
| def _validate_ai_requirements(aibom: Dict[str, Any]) -> List[Dict[str, Any]]: | |
| """ | |
| Validate AI-specific requirements for an AIBOM. | |
| Args: | |
| aibom: The AIBOM to validate | |
| Returns: | |
| List of validation issues | |
| """ | |
| issues = [] | |
| issue_codes = set() | |
| # Check required fields | |
| for field in ["bomFormat", "specVersion", "serialNumber", "version"]: | |
| if field not in aibom: | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": f"MISSING_{field.upper()}", | |
| "message": f"Missing required field: {field}", | |
| "path": f"$.{field}" | |
| }) | |
| issue_codes.add(f"MISSING_{field.upper()}") | |
| # Check bomFormat | |
| if "bomFormat" in aibom and aibom["bomFormat"] != "CycloneDX": | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "INVALID_BOM_FORMAT", | |
| "message": f"Invalid bomFormat: {aibom['bomFormat']}. Must be 'CycloneDX'", | |
| "path": "$.bomFormat" | |
| }) | |
| issue_codes.add("INVALID_BOM_FORMAT") | |
| # Check specVersion | |
| if "specVersion" in aibom and aibom["specVersion"] != "1.6": | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "INVALID_SPEC_VERSION", | |
| "message": f"Invalid specVersion: {aibom['specVersion']}. Must be '1.6'", | |
| "path": "$.specVersion" | |
| }) | |
| issue_codes.add("INVALID_SPEC_VERSION") | |
| # Check serialNumber | |
| if "serialNumber" in aibom and not aibom["serialNumber"].startswith("urn:uuid:"): | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "INVALID_SERIAL_NUMBER", | |
| "message": f"Invalid serialNumber format: {aibom['serialNumber']}. Must start with 'urn:uuid:'", | |
| "path": "$.serialNumber" | |
| }) | |
| issue_codes.add("INVALID_SERIAL_NUMBER") | |
| # Check version | |
| if "version" in aibom: | |
| if not isinstance(aibom["version"], int): | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "INVALID_VERSION_TYPE", | |
| "message": f"Invalid version type: {type(aibom['version'])}. Must be an integer", | |
| "path": "$.version" | |
| }) | |
| issue_codes.add("INVALID_VERSION_TYPE") | |
| elif aibom["version"] <= 0: | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "INVALID_VERSION_VALUE", | |
| "message": f"Invalid version value: {aibom['version']}. Must be positive", | |
| "path": "$.version" | |
| }) | |
| issue_codes.add("INVALID_VERSION_VALUE") | |
| # Check metadata | |
| if "metadata" not in aibom: | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "MISSING_METADATA", | |
| "message": "Missing metadata section", | |
| "path": "$.metadata" | |
| }) | |
| issue_codes.add("MISSING_METADATA") | |
| else: | |
| metadata = aibom["metadata"] | |
| # Check timestamp | |
| if "timestamp" not in metadata: | |
| issues.append({ | |
| "severity": ValidationSeverity.WARNING.value, | |
| "code": "MISSING_TIMESTAMP", | |
| "message": "Missing timestamp in metadata", | |
| "path": "$.metadata.timestamp" | |
| }) | |
| issue_codes.add("MISSING_TIMESTAMP") | |
| # Check tools | |
| if "tools" not in metadata or not metadata["tools"] or len(metadata["tools"]) == 0: | |
| issues.append({ | |
| "severity": ValidationSeverity.WARNING.value, | |
| "code": "MISSING_TOOLS", | |
| "message": "Missing tools in metadata", | |
| "path": "$.metadata.tools" | |
| }) | |
| issue_codes.add("MISSING_TOOLS") | |
| # Check authors | |
| if "authors" not in metadata or not metadata["authors"] or len(metadata["authors"]) == 0: | |
| issues.append({ | |
| "severity": ValidationSeverity.WARNING.value, | |
| "code": "MISSING_AUTHORS", | |
| "message": "Missing authors in metadata", | |
| "path": "$.metadata.authors" | |
| }) | |
| issue_codes.add("MISSING_AUTHORS") | |
| else: | |
| # Check author properties | |
| for i, author in enumerate(metadata["authors"]): | |
| if "url" in author: | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "INVALID_AUTHOR_PROPERTY", | |
| "message": "Author objects should not contain 'url' property, use 'email' instead", | |
| "path": f"$.metadata.authors[{i}].url" | |
| }) | |
| issue_codes.add("INVALID_AUTHOR_PROPERTY") | |
| # Check properties | |
| if "properties" not in metadata or not metadata["properties"] or len(metadata["properties"]) == 0: | |
| issues.append({ | |
| "severity": ValidationSeverity.INFO.value, | |
| "code": "MISSING_PROPERTIES", | |
| "message": "Missing properties in metadata", | |
| "path": "$.metadata.properties" | |
| }) | |
| issue_codes.add("MISSING_PROPERTIES") | |
| # Check components | |
| if "components" not in aibom or not aibom["components"] or len(aibom["components"]) == 0: | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "MISSING_COMPONENTS", | |
| "message": "Missing components section or empty components array", | |
| "path": "$.components" | |
| }) | |
| issue_codes.add("MISSING_COMPONENTS") | |
| else: | |
| components = aibom["components"] | |
| # Check first component (AI model) | |
| component = components[0] | |
| # Check type | |
| if "type" not in component: | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "MISSING_COMPONENT_TYPE", | |
| "message": "Missing type in first component", | |
| "path": "$.components[0].type" | |
| }) | |
| issue_codes.add("MISSING_COMPONENT_TYPE") | |
| elif component["type"] != "machine-learning-model": | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "INVALID_COMPONENT_TYPE", | |
| "message": f"Invalid type in first component: {component['type']}. Must be 'machine-learning-model'", | |
| "path": "$.components[0].type" | |
| }) | |
| issue_codes.add("INVALID_COMPONENT_TYPE") | |
| # Check name | |
| if "name" not in component or not component["name"]: | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "MISSING_COMPONENT_NAME", | |
| "message": "Missing name in first component", | |
| "path": "$.components[0].name" | |
| }) | |
| issue_codes.add("MISSING_COMPONENT_NAME") | |
| # Check bom-ref | |
| if "bom-ref" not in component or not component["bom-ref"]: | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "MISSING_BOM_REF", | |
| "message": "Missing bom-ref in first component", | |
| "path": "$.components[0].bom-ref" | |
| }) | |
| issue_codes.add("MISSING_BOM_REF") | |
| # Check purl | |
| if "purl" not in component or not component["purl"]: | |
| issues.append({ | |
| "severity": ValidationSeverity.WARNING.value, | |
| "code": "MISSING_PURL", | |
| "message": "Missing purl in first component", | |
| "path": "$.components[0].purl" | |
| }) | |
| issue_codes.add("MISSING_PURL") | |
| elif not component["purl"].startswith("pkg:"): | |
| issues.append({ | |
| "severity": ValidationSeverity.ERROR.value, | |
| "code": "INVALID_PURL_FORMAT", | |
| "message": f"Invalid purl format: {component['purl']}. Must start with 'pkg:'", | |
| "path": "$.components[0].purl" | |
| }) | |
| issue_codes.add("INVALID_PURL_FORMAT") | |
| elif "@" not in component["purl"]: | |
| issues.append({ | |
| "severity": ValidationSeverity.WARNING.value, | |
| "code": "MISSING_VERSION_IN_PURL", | |
| "message": f"Missing version in purl: {component['purl']}. Should include version after '@'", | |
| "path": "$.components[0].purl" | |
| }) | |
| issue_codes.add("MISSING_VERSION_IN_PURL") | |
| # Check description | |
| if "description" not in component or not component["description"]: | |
| issues.append({ | |
| "severity": ValidationSeverity.WARNING.value, | |
| "code": "MISSING_DESCRIPTION", | |
| "message": "Missing description in first component", | |
| "path": "$.components[0].description" | |
| }) | |
| issue_codes.add("MISSING_DESCRIPTION") | |
| elif len(component["description"]) < 20: | |
| issues.append({ | |
| "severity": ValidationSeverity.INFO.value, | |
| "code": "SHORT_DESCRIPTION", | |
| "message": f"Description is too short: {len(component['description'])} characters. Recommended minimum is 20 characters", | |
| "path": "$.components[0].description" | |
| }) | |
| issue_codes.add("SHORT_DESCRIPTION") | |
| # Check modelCard | |
| if "modelCard" not in component or not component["modelCard"]: | |
| issues.append({ | |
| "severity": ValidationSeverity.WARNING.value, | |
| "code": "MISSING_MODEL_CARD", | |
| "message": "Missing modelCard in first component", | |
| "path": "$.components[0].modelCard" | |
| }) | |
| issue_codes.add("MISSING_MODEL_CARD") | |
| else: | |
| model_card = component["modelCard"] | |
| # Check modelParameters | |
| if "modelParameters" not in model_card or not model_card["modelParameters"]: | |
| issues.append({ | |
| "severity": ValidationSeverity.WARNING.value, | |
| "code": "MISSING_MODEL_PARAMETERS", | |
| "message": "Missing modelParameters in modelCard", | |
| "path": "$.components[0].modelCard.modelParameters" | |
| }) | |
| issue_codes.add("MISSING_MODEL_PARAMETERS") | |
| # Check considerations | |
| if "considerations" not in model_card or not model_card["considerations"]: | |
| issues.append({ | |
| "severity": ValidationSeverity.WARNING.value, | |
| "code": "MISSING_CONSIDERATIONS", | |
| "message": "Missing considerations in modelCard", | |
| "path": "$.components[0].modelCard.considerations" | |
| }) | |
| issue_codes.add("MISSING_CONSIDERATIONS") | |
| return issues | |
| def _generate_validation_recommendations(issues: List[Dict[str, Any]]) -> List[str]: | |
| """ | |
| Generate recommendations based on validation issues. | |
| Args: | |
| issues: List of validation issues | |
| Returns: | |
| List of recommendations | |
| """ | |
| recommendations = [] | |
| issue_codes = set(issue["code"] for issue in issues) | |
| # Generate recommendations based on issue codes | |
| if "MISSING_COMPONENTS" in issue_codes: | |
| recommendations.append("Add at least one component to the AIBOM") | |
| if "MISSING_COMPONENT_TYPE" in issue_codes or "INVALID_COMPONENT_TYPE" in issue_codes: | |
| recommendations.append("Ensure all AI components have type 'machine-learning-model'") | |
| if "MISSING_PURL" in issue_codes or "INVALID_PURL_FORMAT" in issue_codes: | |
| recommendations.append("Ensure all components have a valid PURL starting with 'pkg:'") | |
| if "MISSING_VERSION_IN_PURL" in issue_codes: | |
| recommendations.append("Include version information in PURLs using '@' syntax (e.g., pkg:huggingface/org/model@version)") | |
| if "MISSING_MODEL_CARD" in issue_codes: | |
| recommendations.append("Add a model card section to AI components") | |
| if "MISSING_MODEL_PARAMETERS" in issue_codes: | |
| recommendations.append("Include model parameters in the model card section") | |
| if "MISSING_CONSIDERATIONS" in issue_codes: | |
| recommendations.append("Add ethical considerations, limitations, and risks to the model card") | |
| if "MISSING_METADATA" in issue_codes: | |
| recommendations.append("Add metadata section to the AIBOM") | |
| if "MISSING_TOOLS" in issue_codes: | |
| recommendations.append("Include tools information in the metadata section") | |
| if "MISSING_AUTHORS" in issue_codes: | |
| recommendations.append("Add authors information to the metadata section") | |
| if "MISSING_PROPERTIES" in issue_codes: | |
| recommendations.append("Include additional properties in the metadata section") | |
| if "INVALID_AUTHOR_PROPERTY" in issue_codes: | |
| recommendations.append("Remove 'url' property from author objects and use 'email' instead to comply with CycloneDX schema") | |
| return recommendations | |
| def validate_aibom(aibom: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Validate an AIBOM against AI-specific requirements. | |
| Args: | |
| aibom: The AIBOM to validate | |
| Returns: | |
| Validation report with issues and recommendations | |
| """ | |
| # Initialize validation report | |
| report = { | |
| "valid": True, | |
| "ai_valid": True, | |
| "issues": [], | |
| "recommendations": [], | |
| "summary": { | |
| "error_count": 0, | |
| "warning_count": 0, | |
| "info_count": 0 | |
| } | |
| } | |
| # Validate AI-specific requirements | |
| ai_issues = _validate_ai_requirements(aibom) | |
| if ai_issues: | |
| report["ai_valid"] = False | |
| report["valid"] = False | |
| report["issues"].extend(ai_issues) | |
| # Generate recommendations | |
| report["recommendations"] = _generate_validation_recommendations(report["issues"]) | |
| # Update summary counts | |
| for issue in report["issues"]: | |
| if issue["severity"] == ValidationSeverity.ERROR.value: | |
| report["summary"]["error_count"] += 1 | |
| elif issue["severity"] == ValidationSeverity.WARNING.value: | |
| report["summary"]["warning_count"] += 1 | |
| elif issue["severity"] == ValidationSeverity.INFO.value: | |
| report["summary"]["info_count"] += 1 | |
| return report | |
| def get_validation_summary(report: Dict[str, Any]) -> str: | |
| """ | |
| Get a human-readable summary of the validation report. | |
| Args: | |
| report: Validation report | |
| Returns: | |
| Human-readable summary | |
| """ | |
| if report["valid"]: | |
| summary = "β AIBOM is valid and complies with AI requirements.\n" | |
| else: | |
| summary = "β AIBOM validation failed.\n" | |
| summary += f"\nSummary:\n" | |
| summary += f"- Errors: {report['summary']['error_count']}\n" | |
| summary += f"- Warnings: {report['summary']['warning_count']}\n" | |
| summary += f"- Info: {report['summary']['info_count']}\n" | |
| if not report["valid"]: | |
| summary += "\nIssues:\n" | |
| for issue in report["issues"]: | |
| severity = issue["severity"].upper() | |
| code = issue["code"] | |
| message = issue["message"] | |
| path = issue["path"] | |
| summary += f"- [{severity}] {code}: {message} (at {path})\n" | |
| summary += "\nRecommendations:\n" | |
| for i, recommendation in enumerate(report["recommendations"], 1): | |
| summary += f"{i}. {recommendation}\n" | |
| return summary | |
| def calculate_industry_neutral_score(aibom: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Calculate completeness score using industry best practices without explicit standard references. | |
| Args: | |
| aibom: The AIBOM to score | |
| Returns: | |
| Dictionary containing score and recommendations | |
| """ | |
| field_checklist = {} | |
| max_scores = { | |
| "required_fields": 20, | |
| "metadata": 20, | |
| "component_basic": 20, | |
| "component_model_card": 30, | |
| "external_references": 10 | |
| } | |
| # Track missing fields by tier | |
| missing_fields = { | |
| "critical": [], | |
| "important": [], | |
| "supplementary": [] | |
| } | |
| # Score each field based on classification | |
| scores_by_category = {category: 0 for category in max_scores.keys()} | |
| max_possible_by_category = {category: 0 for category in max_scores.keys()} | |
| for field, classification in FIELD_CLASSIFICATION.items(): | |
| tier = classification["tier"] | |
| weight = classification["weight"] | |
| category = classification["category"] | |
| # Add to max possible score for this category | |
| max_possible_by_category[category] += weight | |
| # Check if field is present | |
| is_present = check_field_in_aibom(aibom, field) | |
| if is_present: | |
| scores_by_category[category] += weight | |
| else: | |
| missing_fields[tier].append(field) | |
| # Add to field checklist with appropriate indicators | |
| importance_indicator = "β β β " if tier == "critical" else "β β " if tier == "important" else "β " | |
| field_checklist[field] = f"{'β' if is_present else 'β'} {importance_indicator}" | |
| # Normalize category scores to max_scores | |
| normalized_scores = {} | |
| for category in scores_by_category: | |
| if max_possible_by_category[category] > 0: | |
| # Normalize to the max score for this category | |
| normalized_score = (scores_by_category[category] / max_possible_by_category[category]) * max_scores[category] | |
| normalized_scores[category] = min(normalized_score, max_scores[category]) | |
| else: | |
| normalized_scores[category] = 0 | |
| # Calculate total score (sum of weighted normalized scores) | |
| total_score = 0 | |
| for category, score in normalized_scores.items(): | |
| # Each category contributes its percentage to the total | |
| category_weight = max_scores[category] / sum(max_scores.values()) | |
| total_score += score * category_weight | |
| # Round to one decimal place | |
| total_score = round(total_score, 1) | |
| # Ensure score is between 0 and 100 | |
| total_score = max(0, min(total_score, 100)) | |
| # Determine completeness profile | |
| profile = determine_completeness_profile(aibom, total_score) | |
| # Apply penalties for missing critical fields | |
| penalty_result = apply_completeness_penalties(total_score, missing_fields) | |
| # Generate recommendations | |
| recommendations = generate_field_recommendations(missing_fields) | |
| return { | |
| "total_score": penalty_result["adjusted_score"], | |
| "section_scores": normalized_scores, | |
| "max_scores": max_scores, | |
| "field_checklist": field_checklist, | |
| "field_tiers": {field: info["tier"] for field, info in FIELD_CLASSIFICATION.items()}, | |
| "missing_fields": missing_fields, | |
| "completeness_profile": profile, | |
| "penalty_applied": penalty_result["penalty_applied"], | |
| "penalty_reason": penalty_result["penalty_reason"], | |
| "recommendations": recommendations | |
| } | |
| def calculate_completeness_score(aibom: Dict[str, Any], validate: bool = True, use_best_practices: bool = True) -> Dict[str, Any]: | |
| """ | |
| Calculate completeness score for an AIBOM and optionally validate against AI requirements. | |
| Enhanced with industry best practices scoring. | |
| Args: | |
| aibom: The AIBOM to score and validate | |
| validate: Whether to perform validation | |
| use_best_practices: Whether to use enhanced industry best practices scoring | |
| Returns: | |
| Dictionary containing score and validation results | |
| """ | |
| # If using best practices scoring, use the enhanced industry-neutral approach | |
| if use_best_practices: | |
| result = calculate_industry_neutral_score(aibom) | |
| # Add validation if requested | |
| if validate: | |
| validation_result = validate_aibom(aibom) | |
| result["validation"] = validation_result | |
| # Adjust score based on validation results | |
| if not validation_result["valid"]: | |
| # Count errors and warnings | |
| error_count = validation_result["summary"]["error_count"] | |
| warning_count = validation_result["summary"]["warning_count"] | |
| # Apply penalties to the score | |
| if error_count > 0: | |
| # Severe penalty for errors (up to 50% reduction) | |
| error_penalty = min(0.5, error_count * 0.1) | |
| result["total_score"] = round(result["total_score"] * (1 - error_penalty), 1) | |
| result["validation_penalty"] = f"-{int(error_penalty * 100)}% due to {error_count} schema errors" | |
| elif warning_count > 0: | |
| # Minor penalty for warnings (up to 20% reduction) | |
| warning_penalty = min(0.2, warning_count * 0.05) | |
| result["total_score"] = round(result["total_score"] * (1 - warning_penalty), 1) | |
| result["validation_penalty"] = f"-{int(warning_penalty * 100)}% due to {warning_count} schema warnings" | |
| return result | |
| # Otherwise, use the original scoring method | |
| field_checklist = {} | |
| max_scores = { | |
| "required_fields": 20, | |
| "metadata": 20, | |
| "component_basic": 20, | |
| "component_model_card": 30, | |
| "external_references": 10 | |
| } | |
| # Required Fields (20 points max) | |
| required_fields = ["bomFormat", "specVersion", "serialNumber", "version"] | |
| required_score = sum([5 if aibom.get(field) else 0 for field in required_fields]) | |
| for field in required_fields: | |
| field_checklist[field] = "β" if aibom.get(field) else "β" | |
| # Metadata (20 points max) | |
| metadata = aibom.get("metadata", {}) | |
| metadata_fields = ["timestamp", "tools", "authors", "component"] | |
| metadata_score = sum([5 if metadata.get(field) else 0 for field in metadata_fields]) | |
| for field in metadata_fields: | |
| field_checklist[f"metadata.{field}"] = "β" if metadata.get(field) else "β" | |
| # Component Basic Info (20 points max) | |
| components = aibom.get("components", []) | |
| component_score = 0 | |
| if components: | |
| # Use the first component as specified in the design | |
| comp = components[0] | |
| comp_fields = ["type", "name", "bom-ref", "purl", "description", "licenses"] | |
| component_score = sum([ | |
| 2 if comp.get("type") else 0, | |
| 4 if comp.get("name") else 0, | |
| 2 if comp.get("bom-ref") else 0, | |
| 4 if comp.get("purl") and re.match(r'^pkg:huggingface/.+', comp["purl"]) else 0, | |
| 4 if comp.get("description") and len(comp["description"]) > 20 else 0, | |
| 4 if comp.get("licenses") and validate_spdx(comp["licenses"]) else 0 | |
| ]) | |
| for field in comp_fields: | |
| field_checklist[f"component.{field}"] = "β" if comp.get(field) else "β" | |
| if field == "purl" and comp.get(field) and not re.match(r'^pkg:huggingface/.+', comp["purl"]): | |
| field_checklist[f"component.{field}"] = "β" | |
| if field == "description" and comp.get(field) and len(comp["description"]) <= 20: | |
| field_checklist[f"component.{field}"] = "β" | |
| if field == "licenses" and comp.get(field) and not validate_spdx(comp["licenses"]): | |
| field_checklist[f"component.{field}"] = "β" | |
| # Model Card Section (30 points max) | |
| model_card_score = 0 | |
| if components: | |
| # Use the first component's model card as specified in the design | |
| comp = components[0] | |
| card = comp.get("modelCard", {}) | |
| card_fields = ["modelParameters", "quantitativeAnalysis", "considerations"] | |
| model_card_score = sum([ | |
| 10 if card.get("modelParameters") else 0, | |
| 10 if card.get("quantitativeAnalysis") else 0, | |
| 10 if card.get("considerations") and isinstance(card["considerations"], dict) and len(str(card["considerations"])) > 50 else 0 | |
| ]) | |
| for field in card_fields: | |
| field_checklist[f"modelCard.{field}"] = "β" if field in card else "β" | |
| if field == "considerations" and field in card and (not isinstance(card["considerations"], dict) or len(str(card["considerations"])) <= 50): | |
| field_checklist[f"modelCard.{field}"] = "β" | |
| # External References (10 points max) | |
| ext_refs = [] | |
| if components and components[0].get("externalReferences"): | |
| ext_refs = components[0].get("externalReferences") | |
| ext_score = 0 | |
| for ref in ext_refs: | |
| url = ref.get("url", "").lower() | |
| if "modelcard" in url: | |
| ext_score += 4 | |
| elif "huggingface.co" in url or "github.com" in url: | |
| ext_score += 3 | |
| elif "dataset" in url: | |
| ext_score += 3 | |
| ext_score = min(ext_score, 10) | |
| field_checklist["externalReferences"] = "β" if ext_refs else "β" | |
| # Calculate total score | |
| section_scores = { | |
| "required_fields": required_score, | |
| "metadata": metadata_score, | |
| "component_basic": component_score, | |
| "component_model_card": model_card_score, | |
| "external_references": ext_score | |
| } | |
| # Calculate weighted total score | |
| total_score = ( | |
| (section_scores["required_fields"] / max_scores["required_fields"]) * 20 + | |
| (section_scores["metadata"] / max_scores["metadata"]) * 20 + | |
| (section_scores["component_basic"] / max_scores["component_basic"]) * 20 + | |
| (section_scores["component_model_card"] / max_scores["component_model_card"]) * 30 + | |
| (section_scores["external_references"] / max_scores["external_references"]) * 10 | |
| ) | |
| # Round to one decimal place | |
| total_score = round(total_score, 1) | |
| # Ensure score is between 0 and 100 | |
| total_score = max(0, min(total_score, 100)) | |
| result = { | |
| "total_score": total_score, | |
| "section_scores": section_scores, | |
| "max_scores": max_scores, | |
| "field_checklist": field_checklist | |
| } | |
| # Add validation if requested | |
| if validate: | |
| validation_result = validate_aibom(aibom) | |
| result["validation"] = validation_result | |
| # Adjust score based on validation results | |
| if not validation_result["valid"]: | |
| # Count errors and warnings | |
| error_count = validation_result["summary"]["error_count"] | |
| warning_count = validation_result["summary"]["warning_count"] | |
| # Apply penalties to the score | |
| if error_count > 0: | |
| # Severe penalty for errors (up to 50% reduction) | |
| error_penalty = min(0.5, error_count * 0.1) | |
| result["total_score"] = round(result["total_score"] * (1 - error_penalty), 1) | |
| result["validation_penalty"] = f"-{int(error_penalty * 100)}% due to {error_count} schema errors" | |
| elif warning_count > 0: | |
| # Minor penalty for warnings (up to 20% reduction) | |
| warning_penalty = min(0.2, warning_count * 0.05) | |
| result["total_score"] = round(result["total_score"] * (1 - warning_penalty), 1) | |
| result["validation_penalty"] = f"-{int(warning_penalty * 100)}% due to {warning_count} schema warnings" | |
| return result | |
| def merge_metadata(primary: Dict[str, Any], secondary: Dict[str, Any]) -> Dict[str, Any]: | |
| result = secondary.copy() | |
| for key, value in primary.items(): | |
| if value is not None: | |
| if key in result and isinstance(value, dict) and isinstance(result[key], dict): | |
| result[key] = merge_metadata(value, result[key]) | |
| else: | |
| result[key] = value | |
| return result | |
| def extract_model_id_parts(model_id: str) -> Dict[str, str]: | |
| parts = model_id.split("/") | |
| if len(parts) == 1: | |
| return {"owner": None, "name": parts[0]} | |
| return {"owner": parts[0], "name": "/".join(parts[1:])} | |
| def create_purl(model_id: str) -> str: | |
| parts = extract_model_id_parts(model_id) | |
| if parts["owner"]: | |
| return f"pkg:huggingface/{parts['owner']}/{parts['name']}" | |
| return f"pkg:huggingface/{parts['name']}" | |