File size: 5,788 Bytes
2bb1e37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/env bash
#
# generate-sbom.sh — Generate a clean CycloneDX SBOM using Syft
#
# Produces a single SBOM from resolved manifests only — no directory scanning,
# no venv pollution, no local state. Works identically locally and in CI.
#
# How it works:
#   1. Python: uv pip compile resolves all transitive deps from requirements.txt
#   2. JavaScript: package-lock.json already contains the full resolved tree
#   3. Syft scans these resolved files, not the filesystem
#
# Usage:
#   ./scripts/generate-sbom.sh              # generate sbom.cdx.json from manifests
#   ./scripts/generate-sbom.sh docker       # generate from Docker image (best license coverage)
#   ./scripts/generate-sbom.sh docker IMG   # generate from a specific image
#   ./scripts/generate-sbom.sh validate     # validate existing SBOM
#
# Requirements:
#   - syft (brew install syft)
#   - uv  (brew install uv)
#

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"

RED='\033[0;31m'
GREEN='\033[0;32m'
DIM='\033[2m'
BOLD='\033[1m'
RESET='\033[0m'

info()  { echo -e "${BOLD}${GREEN}${RESET} $1"; }
warn()  { echo -e "${BOLD}${RED}${RESET} $1"; }
dim()   { echo -e "${DIM}  $1${RESET}"; }

OUTPUT="$ROOT_DIR/sbom.cdx.json"

check_deps() {
    local missing=()
    command -v syft &>/dev/null || missing+=("syft")
    command -v uv &>/dev/null   || missing+=("uv")
    if [[ ${#missing[@]} -gt 0 ]]; then
        warn "Missing: ${missing[*]}. Install with: brew install ${missing[*]}"
        exit 1
    fi
    dim "Using $(syft --version), $(uv --version)"
}

generate() {
    info "Generating SBOM from resolved manifests..."
    check_deps

    local VERSION
    VERSION="$(python3 -c "import json; print(json.load(open('$ROOT_DIR/package.json'))['version'])")"

    local WORK_DIR
    WORK_DIR="$(mktemp -d)"
    trap 'rm -rf "$WORK_DIR"' RETURN

    # --- Python: resolve all transitive deps without installing ---
    dim "Resolving Python transitive deps (uv pip compile)..."
    uv pip compile "$ROOT_DIR/backend/requirements.txt" \
        --python-version 3.11 \
        --quiet \
        > "$WORK_DIR/requirements-resolved.txt" 2>/dev/null

    # --- JavaScript: package-lock.json is already fully resolved ---
    if [[ -f "$ROOT_DIR/package-lock.json" ]]; then
        cp "$ROOT_DIR/package-lock.json" "$WORK_DIR/package-lock.json"
        # Syft needs package.json alongside the lockfile
        cp "$ROOT_DIR/package.json" "$WORK_DIR/package.json"
    else
        warn "package-lock.json not found — JS deps will be skipped"
    fi

    # --- Scan only the resolved files ---
    dim "Scanning resolved manifests with Syft..."
    syft scan "dir:$WORK_DIR" \
        --output "cyclonedx-json=$OUTPUT" \
        --source-name open-webui \
        --source-version "$VERSION" \
        --quiet

    # Print summary
    python3 -c "
import json
with open('$OUTPUT') as f:
    data = json.load(f)
comps = data.get('components', [])
py = [c for c in comps if 'pypi' in c.get('purl', '')]
js = [c for c in comps if 'npm' in c.get('purl', '')]
with_lic = sum(1 for c in comps if c.get('licenses'))
print(f'  {len(comps)} total ({len(py)} Python, {len(js)} JavaScript)')
print(f'  {with_lic}/{len(comps)} with license info')
print(f'  Serial: {data.get(\"serialNumber\", \"none\")}')
print(f'  Timestamp: {data.get(\"metadata\", {}).get(\"timestamp\", \"none\")}')
"

    info "SBOM written → sbom.cdx.json"
}

generate_docker() {
    local IMAGE="${1:-ghcr.io/open-webui/open-webui:latest}"
    info "Generating SBOM from Docker image: $IMAGE"

    if ! command -v syft &>/dev/null; then
        warn "syft is not installed. Install with: brew install syft"
        exit 1
    fi

    dim "Pulling and scanning image..."
    syft scan "docker:$IMAGE" \
        --output "cyclonedx-json=$OUTPUT" \
        --quiet

    python3 -c "
import json
with open('$OUTPUT') as f:
    data = json.load(f)
comps = data.get('components', [])
with_lic = sum(1 for c in comps if c.get('licenses'))
print(f'  {len(comps)} total components')
print(f'  {with_lic}/{len(comps)} with license info ({round(with_lic/max(len(comps),1)*100)}%)')
"

    info "SBOM written → sbom.cdx.json"
}

validate() {
    info "Validating SBOM..."

    python3 -c "
import json, sys

try:
    with open('$OUTPUT') as f:
        data = json.load(f)
except FileNotFoundError:
    print('  ✗ sbom.cdx.json not found — run ./scripts/generate-sbom.sh first')
    sys.exit(1)

issues = []
if data.get('bomFormat') != 'CycloneDX':
    issues.append('Not CycloneDX format')
if not data.get('specVersion'):
    issues.append('Missing specVersion')
if not data.get('serialNumber'):
    issues.append('Missing serial number')

components = data.get('components', [])

# Check for phantom local packages
phantoms = []
for c in components:
    for ref in c.get('externalReferences', []):
        url = ref.get('url', '')
        if 'file://' in url and '/Users/' in url:
            phantoms.append(c['name'])
if phantoms:
    issues.append(f'Phantom local packages: {phantoms}')

with_lic = sum(1 for c in components if c.get('licenses'))
lic_pct = round(with_lic / max(len(components), 1) * 100)

if issues:
    print(f'  ✗ {len(components)} components, {lic_pct}% licensed')
    for i in issues:
        print(f'    ✗ {i}')
    sys.exit(1)
else:
    print(f'  ✓ {len(components)} components, {lic_pct}% licensed — PASS')
"
}

# --- Main ---
cd "$ROOT_DIR"
TARGET="${1:-generate}"

case "$TARGET" in
    generate) generate ;;
    docker)   generate_docker "${2:-}" ;;
    validate) validate ;;
    *)
        warn "Unknown target: $TARGET"
        echo "Usage: $0 [generate|docker [IMAGE]|validate]"
        exit 1
        ;;
esac

echo ""
info "Done."