Spaces:
Sleeping
Sleeping
| """ | |
| Microsoft AI Model SBOM Generator | |
| This module handles the generation of AI SBOMs for Microsoft-hosted models. | |
| """ | |
| import requests | |
| import logging | |
| import json | |
| import uuid | |
| from datetime import datetime | |
| import os | |
| from cyclonedx.model import OrganizationalEntity, ExternalReference, ExternalReferenceType, Component, ComponentType | |
| from cyclonedx.model.bom import Bom | |
| from cyclonedx.output import get_instance | |
| from cyclonedx.version import Version | |
| from cyclonedx.factory.license import LicenseFactory | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| # Microsoft API endpoints | |
| MICROSOFT_API_BASE = "https://api.cognitive.microsoft.com/models/" | |
| MICROSOFT_DOC_BASE = "https://learn.microsoft.com/en-us/azure/ai-services/" | |
| def generate_aibom_for_microsoft(model_id, include_inference=True, use_best_practices=True): | |
| """ | |
| Generate an AI SBOM for a Microsoft hosted model. | |
| Args: | |
| model_id (str): The Microsoft model ID, e.g., 'microsoft/phi-2' | |
| include_inference (bool): Whether to use inference to enhance the SBOM | |
| use_best_practices (bool): Whether to use industry best practices for scoring | |
| Returns: | |
| dict: The generated AI SBOM in CycloneDX format | |
| """ | |
| logger.info(f"Generating AI SBOM for Microsoft model: {model_id}") | |
| try: | |
| # Extract organization and model name | |
| if "/" in model_id: | |
| org, model_name = model_id.split("/", 1) | |
| else: | |
| org = "microsoft" | |
| model_name = model_id | |
| # Initialize the SBOM structure | |
| bom = Bom() | |
| # Set basic metadata | |
| bom.metadata.component = Component( | |
| name=model_name, | |
| version="1.0.0", # Default version if not available | |
| type=ComponentType.MACHINE_LEARNING_MODEL, | |
| supplier=OrganizationalEntity(name=org) | |
| ) | |
| # Add description and model information | |
| model_info = get_microsoft_model_info(model_id) | |
| if model_info: | |
| if "description" in model_info: | |
| bom.metadata.component.description = model_info["description"] | |
| if "version" in model_info: | |
| bom.metadata.component.version = model_info["version"] | |
| # Add properties from model info | |
| for key, value in model_info.items(): | |
| if key not in ["name", "description", "version"]: | |
| bom.metadata.component.properties.add(key, str(value)) | |
| # Add external references | |
| doc_url = f"{MICROSOFT_DOC_BASE}openai/concepts/models#{model_name.lower()}" | |
| bom.metadata.component.external_references.add( | |
| ExternalReference( | |
| type=ExternalReferenceType.DOCUMENTATION, | |
| url=doc_url | |
| ) | |
| ) | |
| # Add license information if available | |
| license_factory = LicenseFactory() | |
| bom.metadata.component.licenses.add(license_factory.make_from_string("Microsoft AI License")) | |
| # Add components - would need to be expanded with actual components | |
| # This is a placeholder - in a real implementation, you'd extract dependencies | |
| component = Component( | |
| name="microsoft-ai-runtime", | |
| version="1.0", | |
| type=ComponentType.APPLICATION | |
| ) | |
| bom.components.add(component) | |
| # Generate the final JSON | |
| output_format = get_instance(Version.V1_6, "json") | |
| output = output_format.output_as_string(bom) | |
| # Parse the output as JSON and return | |
| return json.loads(output) | |
| except Exception as e: | |
| logger.error(f"Error generating AI SBOM for Microsoft model: {e}", exc_info=True) | |
| return None | |
| def get_microsoft_model_info(model_id): | |
| """ | |
| Get information about a Microsoft model. | |
| In a production environment, this would make API calls to Microsoft's model registry | |
| or other relevant services. For this implementation, we return basic placeholder info. | |
| Args: | |
| model_id (str): The Microsoft model ID | |
| Returns: | |
| dict: Information about the model | |
| """ | |
| # Extract model name from ID | |
| if "/" in model_id: | |
| _, model_name = model_id.split("/", 1) | |
| else: | |
| model_name = model_id | |
| # This is a placeholder - in a production implementation, | |
| # you would fetch actual data from Microsoft's APIs | |
| model_info = { | |
| "name": model_name, | |
| "description": f"Microsoft AI model: {model_name}", | |
| "version": "1.0", | |
| "created": datetime.now().isoformat(), | |
| "framework": "Microsoft AI", | |
| "model_type": "Unknown", # Would be determined from API | |
| "parameters": "Unknown", # Would be determined from API | |
| "context_length": "Unknown", # Would be determined from API | |
| } | |
| # Add additional fields for common Microsoft models | |
| if "phi" in model_name.lower(): | |
| model_info.update({ | |
| "description": "Phi is a small language model developed by Microsoft Research", | |
| "model_type": "Language Model", | |
| "parameters": "2.7B", | |
| "context_length": "2048" | |
| }) | |
| elif "florence" in model_name.lower(): | |
| model_info.update({ | |
| "description": "Florence is a foundation vision model by Microsoft", | |
| "model_type": "Vision Model", | |
| "parameters": "Unknown", | |
| "context_length": "N/A" | |
| }) | |
| return model_info |