aibom / src /aibom_generator /microsoft_generate.py
nothuman2718's picture
Add Support for Microsoft models
f87bac8
"""
Microsoft AI Model SBOM Generator
This module handles the generation of AI SBOMs for Microsoft-hosted models.
"""
import requests
import logging
import json
import uuid
from datetime import datetime
import os
from cyclonedx.model import OrganizationalEntity, ExternalReference, ExternalReferenceType, Component, ComponentType
from cyclonedx.model.bom import Bom
from cyclonedx.output import get_instance
from cyclonedx.version import Version
from cyclonedx.factory.license import LicenseFactory
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Microsoft API endpoints
MICROSOFT_API_BASE = "https://api.cognitive.microsoft.com/models/"
MICROSOFT_DOC_BASE = "https://learn.microsoft.com/en-us/azure/ai-services/"
def generate_aibom_for_microsoft(model_id, include_inference=True, use_best_practices=True):
"""
Generate an AI SBOM for a Microsoft hosted model.
Args:
model_id (str): The Microsoft model ID, e.g., 'microsoft/phi-2'
include_inference (bool): Whether to use inference to enhance the SBOM
use_best_practices (bool): Whether to use industry best practices for scoring
Returns:
dict: The generated AI SBOM in CycloneDX format
"""
logger.info(f"Generating AI SBOM for Microsoft model: {model_id}")
try:
# Extract organization and model name
if "/" in model_id:
org, model_name = model_id.split("/", 1)
else:
org = "microsoft"
model_name = model_id
# Initialize the SBOM structure
bom = Bom()
# Set basic metadata
bom.metadata.component = Component(
name=model_name,
version="1.0.0", # Default version if not available
type=ComponentType.MACHINE_LEARNING_MODEL,
supplier=OrganizationalEntity(name=org)
)
# Add description and model information
model_info = get_microsoft_model_info(model_id)
if model_info:
if "description" in model_info:
bom.metadata.component.description = model_info["description"]
if "version" in model_info:
bom.metadata.component.version = model_info["version"]
# Add properties from model info
for key, value in model_info.items():
if key not in ["name", "description", "version"]:
bom.metadata.component.properties.add(key, str(value))
# Add external references
doc_url = f"{MICROSOFT_DOC_BASE}openai/concepts/models#{model_name.lower()}"
bom.metadata.component.external_references.add(
ExternalReference(
type=ExternalReferenceType.DOCUMENTATION,
url=doc_url
)
)
# Add license information if available
license_factory = LicenseFactory()
bom.metadata.component.licenses.add(license_factory.make_from_string("Microsoft AI License"))
# Add components - would need to be expanded with actual components
# This is a placeholder - in a real implementation, you'd extract dependencies
component = Component(
name="microsoft-ai-runtime",
version="1.0",
type=ComponentType.APPLICATION
)
bom.components.add(component)
# Generate the final JSON
output_format = get_instance(Version.V1_6, "json")
output = output_format.output_as_string(bom)
# Parse the output as JSON and return
return json.loads(output)
except Exception as e:
logger.error(f"Error generating AI SBOM for Microsoft model: {e}", exc_info=True)
return None
def get_microsoft_model_info(model_id):
"""
Get information about a Microsoft model.
In a production environment, this would make API calls to Microsoft's model registry
or other relevant services. For this implementation, we return basic placeholder info.
Args:
model_id (str): The Microsoft model ID
Returns:
dict: Information about the model
"""
# Extract model name from ID
if "/" in model_id:
_, model_name = model_id.split("/", 1)
else:
model_name = model_id
# This is a placeholder - in a production implementation,
# you would fetch actual data from Microsoft's APIs
model_info = {
"name": model_name,
"description": f"Microsoft AI model: {model_name}",
"version": "1.0",
"created": datetime.now().isoformat(),
"framework": "Microsoft AI",
"model_type": "Unknown", # Would be determined from API
"parameters": "Unknown", # Would be determined from API
"context_length": "Unknown", # Would be determined from API
}
# Add additional fields for common Microsoft models
if "phi" in model_name.lower():
model_info.update({
"description": "Phi is a small language model developed by Microsoft Research",
"model_type": "Language Model",
"parameters": "2.7B",
"context_length": "2048"
})
elif "florence" in model_name.lower():
model_info.update({
"description": "Florence is a foundation vision model by Microsoft",
"model_type": "Vision Model",
"parameters": "Unknown",
"context_length": "N/A"
})
return model_info