File size: 5,467 Bytes
6165ba9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""

License utility functions for normalising and verifying SPDX license IDs.

"""
import logging
from typing import Optional, Dict

logger = logging.getLogger(__name__)

# Common mapping of license names or incomplete IDs to generic URLs or valid SPDX
LICENSE_URLS: Dict[str, str] = {
    "Apache-2.0": "https://www.apache.org/licenses/LICENSE-2.0.txt",
    "MIT": "https://opensource.org/licenses/MIT",
    "BSD-3-Clause": "https://opensource.org/licenses/BSD-3-Clause",
    "BSD-2-Clause": "https://opensource.org/licenses/BSD-2-Clause",
    "GPL-3.0-only": "https://www.gnu.org/licenses/gpl-3.0.txt",
    "GPL-2.0-only": "https://www.gnu.org/licenses/gpl-2.0.txt",
    "LGPL-3.0-only": "https://www.gnu.org/licenses/lgpl-3.0.txt",
    "CC-BY-4.0": "https://creativecommons.org/licenses/by/4.0/legalcode",
    "CC-BY-SA-4.0": "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
    "CC-BY-NC-4.0": "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
    "CC-BY-ND-4.0": "https://creativecommons.org/licenses/by-nd/4.0/legalcode",
    "CC-BY-NC-SA-4.0": "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode",
    "CC-BY-NC-ND-4.0": "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode",
    "CC0-1.0": "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
    "MPL-2.0": "https://www.mozilla.org/en-US/MPL/2.0/",
    "Unlicense": "https://unlicense.org/",
    "nvidia-open-model-license": "https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/",
}

# Mapping common variations to valid SPDX IDs
LICENSE_MAPPING: Dict[str, str] = {
    "apache license 2.0": "Apache-2.0",
    "apache-2.0": "Apache-2.0",
    "mit": "MIT",
    "mit license": "MIT",
    "bsd-3-clause": "BSD-3-Clause",
    "cc-by-4.0": "CC-BY-4.0",
    "cc-by-nc-4.0": "CC-BY-NC-4.0",
    "cc0-1.0": "CC0-1.0",
    "gpl-3.0": "GPL-3.0-only",
    "nvidia open model license agreement": "nvidia-open-model-license",
    # Add more as needed
}

def normalize_license_id(license_id: str) -> Optional[str]:
    """

    Normalize a license string to a valid SPDX ID if possible.

    Returns None if no clear mapping is found.

    """
    if not license_id:
        return None
        
    # Check if exact match in our known list
    if license_id in LICENSE_URLS:
        return license_id
        
    lower_id = license_id.lower()
    
    # Check mapping
    if lower_id in LICENSE_MAPPING:
        return LICENSE_MAPPING[lower_id]
        
    # Check if any key in URLS (case-insensitive) matches
    for valid_id in LICENSE_URLS:
        if valid_id.lower() == lower_id:
            return valid_id
            
    # Simple heuristic: if it looks like an ID, return it (e.g. contains hyphens/dots, no spaces)
    if " " not in license_id and len(license_id) < 50:
         # Might be valid, might not. Let's return it and rely on validation warnings.
         return license_id
         
    return None

def get_license_url(license_id: str, fallback: bool = True) -> Optional[str]:
    """Get the URL for a license based on its ID.

       If fallback is False, returns None if not in known list.

    """
    if license_id in LICENSE_URLS:
        return LICENSE_URLS[license_id]
    
    # Case insensitive fallback
    lower_id = license_id.lower()
    for valid_id, url in LICENSE_URLS.items():
        if valid_id.lower() == lower_id:
            return url
            
    return f"https://spdx.org/licenses/{license_id}.html" if fallback else None

# Global licensing instance
_licensing = None

def is_valid_spdx_license_id(license_id: str) -> bool:
    """Check if the license ID is a valid SPDX ID"""
    global _licensing
    try:
        from license_expression import get_spdx_licensing
        if _licensing is None:
            _licensing = get_spdx_licensing()
            
        # Validate that it is a valid SPDX expression AND a simple license ID (no AND/OR/WITH)
        res = _licensing.validate(license_id)
        if len(res.errors) > 0:
            return False
            
        # Parse expression to ensure it's a single license, not a compound expression
        parsed = _licensing.parse(license_id)
        # Check if it's a simple LicenseSymbol (single ID)
        # license-expression objects: LicenseSymbol, LicenseExpression (AND, OR, WITH)
        # We only want simple IDs for the 'id' field in CycloneDX 
        # (though CDX 'expression' field exists, 'id' must be a valid SPDX ID from the enum)
        
        # Checking if it has children or is a symbol
        # parsed object structure depends on library version, but safe bet is type check
        # A simple license parses to a LicenseSymbol which has no 'children' usually, 
        # or we check if the string representation matches the input (normalized)
        
        # Actually simplest way: check if it contains spaces or operators
        # But let's use the library structure if possible.
        # "MIT" -> LicenseSymbol
        # "MIT OR Apache-2.0" -> OR expression
        
        return hasattr(parsed, "key") and not hasattr(parsed, "children")
    except ImportError:
        logger.warning("license-expression library not found, skipping validation")
        return True 
    except Exception as e:
        logger.debug(f"License validation error: {e}")
        return False