Update src/aibom-generator/generator.py
Browse files- src/aibom-generator/generator.py +115 -145
src/aibom-generator/generator.py
CHANGED
|
@@ -81,12 +81,6 @@ class AIBOMGenerator:
|
|
| 81 |
# Calculate final score with industry-neutral approach if enabled
|
| 82 |
final_score = calculate_completeness_score(aibom, validate=True, use_best_practices=use_best_practices)
|
| 83 |
|
| 84 |
-
# Ensure metadata.properties exists
|
| 85 |
-
if "metadata" in aibom and "properties" not in aibom["metadata"]:
|
| 86 |
-
aibom["metadata"]["properties"] = []
|
| 87 |
-
|
| 88 |
-
# Note: Quality score information is no longer added to the AIBOM metadata
|
| 89 |
-
# This was removed as requested by the user
|
| 90 |
|
| 91 |
if output_file:
|
| 92 |
with open(output_file, 'w') as f:
|
|
@@ -214,16 +208,17 @@ class AIBOMGenerator:
|
|
| 214 |
]
|
| 215 |
}
|
| 216 |
|
| 217 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
if metadata and "commit_url" in metadata:
|
| 219 |
-
# Add external reference for downloadLocation
|
| 220 |
-
if "externalReferences" not in aibom:
|
| 221 |
-
aibom["externalReferences"] = []
|
| 222 |
-
|
| 223 |
aibom["externalReferences"].append({
|
| 224 |
-
"type": "
|
| 225 |
-
"url":
|
| 226 |
-
})
|
| 227 |
|
| 228 |
return aibom
|
| 229 |
|
|
@@ -234,22 +229,30 @@ class AIBOMGenerator:
|
|
| 234 |
model_card: Optional[ModelCard],
|
| 235 |
) -> Dict[str, Any]:
|
| 236 |
metadata = {}
|
| 237 |
-
|
| 238 |
if model_info:
|
| 239 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
metadata.update({
|
| 241 |
-
"name":
|
| 242 |
-
"author":
|
| 243 |
-
"tags":
|
| 244 |
-
"pipeline_tag":
|
| 245 |
-
"downloads":
|
| 246 |
-
"last_modified":
|
| 247 |
-
"commit": model_info
|
| 248 |
-
"commit_url": f"https://huggingface.co/{model_id}/commit/{model_info.sha}" if
|
| 249 |
})
|
| 250 |
except Exception as e:
|
| 251 |
print(f"Error extracting model info metadata: {e}")
|
| 252 |
-
|
| 253 |
if model_card and hasattr(model_card, "data") and model_card.data:
|
| 254 |
try:
|
| 255 |
card_data = model_card.data.to_dict() if hasattr(model_card.data, "to_dict") else {}
|
|
@@ -267,104 +270,35 @@ class AIBOMGenerator:
|
|
| 267 |
metadata["eval_results"] = model_card.data.eval_results
|
| 268 |
except Exception as e:
|
| 269 |
print(f"Error extracting model card metadata: {e}")
|
| 270 |
-
|
| 271 |
metadata["ai:type"] = "Transformer"
|
| 272 |
metadata["ai:task"] = metadata.get("pipeline_tag", "Text Generation")
|
| 273 |
metadata["ai:framework"] = "PyTorch" if "transformers" in metadata.get("library_name", "") else "Unknown"
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
|
|
|
|
|
|
|
|
|
| 280 |
metadata["typeOfModel"] = metadata.get("ai:type", "Transformer")
|
| 281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
return {k: v for k, v in metadata.items() if v is not None}
|
|
|
|
| 283 |
|
| 284 |
def _extract_unstructured_metadata(self, model_card: Optional[ModelCard], model_id: str) -> Dict[str, Any]:
|
| 285 |
"""
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
In a real implementation, this would:
|
| 290 |
-
1. Extract text from model card
|
| 291 |
-
2. Use BERT to identify key information
|
| 292 |
-
3. Structure the extracted information
|
| 293 |
-
|
| 294 |
-
For now, we'll simulate this with some basic extraction logic.
|
| 295 |
"""
|
| 296 |
-
|
| 297 |
|
| 298 |
-
# In a real implementation, we would use a BERT model here
|
| 299 |
-
# Since we can't install the required libraries due to space constraints,
|
| 300 |
-
# we'll simulate the enhancement with a placeholder implementation
|
| 301 |
-
|
| 302 |
-
if model_card and hasattr(model_card, "text") and model_card.text:
|
| 303 |
-
try:
|
| 304 |
-
card_text = model_card.text
|
| 305 |
-
|
| 306 |
-
# Simulate BERT extraction with basic text analysis
|
| 307 |
-
# In reality, this would be done with NLP models
|
| 308 |
-
|
| 309 |
-
# Extract description if missing
|
| 310 |
-
if card_text and "description" not in enhanced_metadata:
|
| 311 |
-
# Take first paragraph that's longer than 20 chars as description
|
| 312 |
-
paragraphs = [p.strip() for p in card_text.split('\n\n')]
|
| 313 |
-
for p in paragraphs:
|
| 314 |
-
if len(p) > 20 and not p.startswith('#'):
|
| 315 |
-
enhanced_metadata["description"] = p
|
| 316 |
-
break
|
| 317 |
-
|
| 318 |
-
# Extract limitations if present
|
| 319 |
-
if "limitations" not in enhanced_metadata:
|
| 320 |
-
if "## Limitations" in card_text:
|
| 321 |
-
limitations_section = card_text.split("## Limitations")[1].split("##")[0].strip()
|
| 322 |
-
if limitations_section:
|
| 323 |
-
enhanced_metadata["limitations"] = limitations_section
|
| 324 |
-
|
| 325 |
-
# Extract ethical considerations if present
|
| 326 |
-
if "ethical_considerations" not in enhanced_metadata:
|
| 327 |
-
for heading in ["## Ethical Considerations", "## Ethics", "## Bias"]:
|
| 328 |
-
if heading in card_text:
|
| 329 |
-
section = card_text.split(heading)[1].split("##")[0].strip()
|
| 330 |
-
if section:
|
| 331 |
-
enhanced_metadata["ethical_considerations"] = section
|
| 332 |
-
break
|
| 333 |
-
|
| 334 |
-
# Extract risks if present
|
| 335 |
-
if "risks" not in enhanced_metadata:
|
| 336 |
-
if "## Risks" in card_text:
|
| 337 |
-
risks_section = card_text.split("## Risks")[1].split("##")[0].strip()
|
| 338 |
-
if risks_section:
|
| 339 |
-
enhanced_metadata["risks"] = risks_section
|
| 340 |
-
|
| 341 |
-
# Extract datasets if present
|
| 342 |
-
if "datasets" not in enhanced_metadata:
|
| 343 |
-
datasets = []
|
| 344 |
-
if "## Dataset" in card_text or "## Datasets" in card_text:
|
| 345 |
-
dataset_section = ""
|
| 346 |
-
if "## Dataset" in card_text:
|
| 347 |
-
dataset_section = card_text.split("## Dataset")[1].split("##")[0].strip()
|
| 348 |
-
elif "## Datasets" in card_text:
|
| 349 |
-
dataset_section = card_text.split("## Datasets")[1].split("##")[0].strip()
|
| 350 |
-
|
| 351 |
-
if dataset_section:
|
| 352 |
-
# Simple parsing to extract dataset names
|
| 353 |
-
lines = dataset_section.split("\n")
|
| 354 |
-
for line in lines:
|
| 355 |
-
if line.strip() and not line.startswith("#"):
|
| 356 |
-
datasets.append({
|
| 357 |
-
"type": "dataset",
|
| 358 |
-
"name": line.strip().split()[0] if line.strip().split() else "Unknown",
|
| 359 |
-
"description": line.strip()
|
| 360 |
-
})
|
| 361 |
-
|
| 362 |
-
if datasets:
|
| 363 |
-
enhanced_metadata["datasets"] = datasets
|
| 364 |
-
except Exception as e:
|
| 365 |
-
print(f"Error extracting unstructured metadata: {e}")
|
| 366 |
-
|
| 367 |
-
return enhanced_metadata
|
| 368 |
|
| 369 |
def _create_metadata_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
| 370 |
timestamp = datetime.datetime.utcnow().isoformat() + "Z"
|
|
@@ -419,10 +353,25 @@ class AIBOMGenerator:
|
|
| 419 |
# Add copyright
|
| 420 |
component["copyright"] = "NOASSERTION"
|
| 421 |
|
| 422 |
-
# Create properties array for additional metadata
|
| 423 |
properties = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
for key, value in metadata.items():
|
| 425 |
-
if key not in
|
| 426 |
if isinstance(value, (list, dict)):
|
| 427 |
if not isinstance(value, str):
|
| 428 |
value = json.dumps(value)
|
|
@@ -432,12 +381,10 @@ class AIBOMGenerator:
|
|
| 432 |
metadata_section = {
|
| 433 |
"timestamp": timestamp,
|
| 434 |
"tools": tools,
|
| 435 |
-
"component": component
|
|
|
|
| 436 |
}
|
| 437 |
|
| 438 |
-
if properties:
|
| 439 |
-
metadata_section["properties"] = properties
|
| 440 |
-
|
| 441 |
return metadata_section
|
| 442 |
|
| 443 |
def _create_component_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -465,18 +412,29 @@ class AIBOMGenerator:
|
|
| 465 |
"purl": purl
|
| 466 |
}
|
| 467 |
|
| 468 |
-
#
|
| 469 |
-
if "license" in metadata:
|
| 470 |
component["licenses"] = [{
|
| 471 |
"license": {
|
| 472 |
"id": metadata["license"],
|
| 473 |
"url": self._get_license_url(metadata["license"])
|
| 474 |
}
|
| 475 |
}]
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
|
| 481 |
# Add external references
|
| 482 |
external_refs = [{
|
|
@@ -490,17 +448,18 @@ class AIBOMGenerator:
|
|
| 490 |
})
|
| 491 |
component["externalReferences"] = external_refs
|
| 492 |
|
| 493 |
-
#
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
component["
|
|
|
|
| 497 |
component["supplier"] = {
|
| 498 |
-
"name":
|
| 499 |
-
"url": [f"https://huggingface.co/{
|
| 500 |
}
|
| 501 |
component["manufacturer"] = {
|
| 502 |
-
"name":
|
| 503 |
-
"url": [f"https://huggingface.co/{
|
| 504 |
}
|
| 505 |
|
| 506 |
# Add copyright
|
|
@@ -593,19 +552,30 @@ class AIBOMGenerator:
|
|
| 593 |
def _get_license_url(self, license_id: str) -> str:
|
| 594 |
"""Get the URL for a license based on its SPDX ID."""
|
| 595 |
license_urls = {
|
| 596 |
-
"
|
| 597 |
-
"
|
| 598 |
-
"
|
| 599 |
-
"
|
| 600 |
-
"
|
| 601 |
-
"
|
| 602 |
-
"
|
| 603 |
-
"
|
| 604 |
-
"
|
| 605 |
-
"
|
| 606 |
-
"
|
| 607 |
-
"
|
| 608 |
}
|
| 609 |
|
| 610 |
-
return license_urls.get(license_id, "https://spdx.org/licenses/")
|
| 611 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
# Calculate final score with industry-neutral approach if enabled
|
| 82 |
final_score = calculate_completeness_score(aibom, validate=True, use_best_practices=use_best_practices)
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
if output_file:
|
| 86 |
with open(output_file, 'w') as f:
|
|
|
|
| 208 |
]
|
| 209 |
}
|
| 210 |
|
| 211 |
+
# ALWAYS add root-level external references
|
| 212 |
+
aibom["externalReferences"] = [{
|
| 213 |
+
"type": "distribution",
|
| 214 |
+
"url": f"https://huggingface.co/{model_id}"
|
| 215 |
+
}]
|
| 216 |
+
|
| 217 |
if metadata and "commit_url" in metadata:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
aibom["externalReferences"].append({
|
| 219 |
+
"type": "vcs",
|
| 220 |
+
"url": metadata["commit_url"]
|
| 221 |
+
} )
|
| 222 |
|
| 223 |
return aibom
|
| 224 |
|
|
|
|
| 229 |
model_card: Optional[ModelCard],
|
| 230 |
) -> Dict[str, Any]:
|
| 231 |
metadata = {}
|
| 232 |
+
|
| 233 |
if model_info:
|
| 234 |
try:
|
| 235 |
+
author = getattr(model_info, "author", None)
|
| 236 |
+
if not author or author.strip() == "":
|
| 237 |
+
parts = model_id.split("/")
|
| 238 |
+
author = parts[0] if len(parts) > 1 else "unknown"
|
| 239 |
+
print(f"DEBUG: Fallback author used: {author}")
|
| 240 |
+
else:
|
| 241 |
+
print(f"DEBUG: Author from model_info: {author}")
|
| 242 |
+
|
| 243 |
metadata.update({
|
| 244 |
+
"name": getattr(model_info, "modelId", model_id).split("/")[-1],
|
| 245 |
+
"author": author,
|
| 246 |
+
"tags": getattr(model_info, "tags", []),
|
| 247 |
+
"pipeline_tag": getattr(model_info, "pipeline_tag", None),
|
| 248 |
+
"downloads": getattr(model_info, "downloads", 0),
|
| 249 |
+
"last_modified": getattr(model_info, "lastModified", None),
|
| 250 |
+
"commit": getattr(model_info, "sha", None)[:7] if getattr(model_info, "sha", None) else None,
|
| 251 |
+
"commit_url": f"https://huggingface.co/{model_id}/commit/{model_info.sha}" if getattr(model_info, "sha", None) else None,
|
| 252 |
})
|
| 253 |
except Exception as e:
|
| 254 |
print(f"Error extracting model info metadata: {e}")
|
| 255 |
+
|
| 256 |
if model_card and hasattr(model_card, "data") and model_card.data:
|
| 257 |
try:
|
| 258 |
card_data = model_card.data.to_dict() if hasattr(model_card.data, "to_dict") else {}
|
|
|
|
| 270 |
metadata["eval_results"] = model_card.data.eval_results
|
| 271 |
except Exception as e:
|
| 272 |
print(f"Error extracting model card metadata: {e}")
|
| 273 |
+
|
| 274 |
metadata["ai:type"] = "Transformer"
|
| 275 |
metadata["ai:task"] = metadata.get("pipeline_tag", "Text Generation")
|
| 276 |
metadata["ai:framework"] = "PyTorch" if "transformers" in metadata.get("library_name", "") else "Unknown"
|
| 277 |
+
|
| 278 |
+
metadata["primaryPurpose"] = metadata.get("ai:task", "text-generation")
|
| 279 |
+
|
| 280 |
+
# Use model owner as fallback for suppliedBy if no author
|
| 281 |
+
if not metadata.get("author"):
|
| 282 |
+
parts = model_id.split("/")
|
| 283 |
+
metadata["author"] = parts[0] if len(parts) > 1 else "unknown"
|
| 284 |
+
|
| 285 |
+
metadata["suppliedBy"] = metadata.get("author", "unknown")
|
| 286 |
metadata["typeOfModel"] = metadata.get("ai:type", "Transformer")
|
| 287 |
+
|
| 288 |
+
print(f"DEBUG: Final metadata['author'] = {metadata.get('author')}")
|
| 289 |
+
print(f"DEBUG: Adding primaryPurpose = {metadata.get('ai:task', 'Text Generation')}")
|
| 290 |
+
print(f"DEBUG: Adding suppliedBy = {metadata.get('suppliedBy')}")
|
| 291 |
+
|
| 292 |
return {k: v for k, v in metadata.items() if v is not None}
|
| 293 |
+
|
| 294 |
|
| 295 |
def _extract_unstructured_metadata(self, model_card: Optional[ModelCard], model_id: str) -> Dict[str, Any]:
|
| 296 |
"""
|
| 297 |
+
Placeholder for future AI enhancement.
|
| 298 |
+
Currently returns empty dict since AI enhancement is not implemented.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
"""
|
| 300 |
+
return {}
|
| 301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
def _create_metadata_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
| 304 |
timestamp = datetime.datetime.utcnow().isoformat() + "Z"
|
|
|
|
| 353 |
# Add copyright
|
| 354 |
component["copyright"] = "NOASSERTION"
|
| 355 |
|
| 356 |
+
# Create properties array for additional metadata (ALWAYS include critical fields)
|
| 357 |
properties = []
|
| 358 |
+
|
| 359 |
+
# ALWAYS add critical fields for scoring
|
| 360 |
+
critical_fields = {
|
| 361 |
+
"primaryPurpose": metadata.get("primaryPurpose", metadata.get("ai:task", "text-generation")),
|
| 362 |
+
"suppliedBy": metadata.get("suppliedBy", metadata.get("author", "unknown")),
|
| 363 |
+
"typeOfModel": metadata.get("ai:type", "transformer")
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
# Add critical fields first
|
| 367 |
+
for key, value in critical_fields.items():
|
| 368 |
+
if value and value != "unknown":
|
| 369 |
+
properties.append({"name": key, "value": str(value)})
|
| 370 |
+
|
| 371 |
+
# Add other metadata fields (excluding basic component fields)
|
| 372 |
+
excluded_fields = ["name", "author", "license", "description", "commit", "primaryPurpose", "suppliedBy", "typeOfModel"]
|
| 373 |
for key, value in metadata.items():
|
| 374 |
+
if key not in excluded_fields and value is not None:
|
| 375 |
if isinstance(value, (list, dict)):
|
| 376 |
if not isinstance(value, str):
|
| 377 |
value = json.dumps(value)
|
|
|
|
| 381 |
metadata_section = {
|
| 382 |
"timestamp": timestamp,
|
| 383 |
"tools": tools,
|
| 384 |
+
"component": component,
|
| 385 |
+
"properties": properties # ALWAYS include properties
|
| 386 |
}
|
| 387 |
|
|
|
|
|
|
|
|
|
|
| 388 |
return metadata_section
|
| 389 |
|
| 390 |
def _create_component_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
| 412 |
"purl": purl
|
| 413 |
}
|
| 414 |
|
| 415 |
+
# ALWAYS add licenses (use default if not available)
|
| 416 |
+
if metadata and "license" in metadata and metadata["license"]:
|
| 417 |
component["licenses"] = [{
|
| 418 |
"license": {
|
| 419 |
"id": metadata["license"],
|
| 420 |
"url": self._get_license_url(metadata["license"])
|
| 421 |
}
|
| 422 |
}]
|
| 423 |
+
else:
|
| 424 |
+
# Add default license structure for consistency
|
| 425 |
+
component["licenses"] = [{
|
| 426 |
+
"license": {
|
| 427 |
+
"id": "unknown",
|
| 428 |
+
"url": "https://spdx.org/licenses/"
|
| 429 |
+
}
|
| 430 |
+
}]
|
| 431 |
+
# Debug
|
| 432 |
+
print(f"DEBUG: License in metadata: {'license' in metadata}" )
|
| 433 |
+
if "license" in metadata:
|
| 434 |
+
print(f"DEBUG: Adding licenses = {metadata['license']}")
|
| 435 |
+
|
| 436 |
+
# ALWAYS add description
|
| 437 |
+
component["description"] = metadata.get("description", f"AI model {model_id}")
|
| 438 |
|
| 439 |
# Add external references
|
| 440 |
external_refs = [{
|
|
|
|
| 448 |
})
|
| 449 |
component["externalReferences"] = external_refs
|
| 450 |
|
| 451 |
+
# ALWAYS add author information (use model owner if not available )
|
| 452 |
+
author_name = metadata.get("author", group if group else "unknown")
|
| 453 |
+
if author_name and author_name != "unknown":
|
| 454 |
+
component["authors"] = [{"name": author_name}]
|
| 455 |
+
component["publisher"] = author_name
|
| 456 |
component["supplier"] = {
|
| 457 |
+
"name": author_name,
|
| 458 |
+
"url": [f"https://huggingface.co/{author_name}"]
|
| 459 |
}
|
| 460 |
component["manufacturer"] = {
|
| 461 |
+
"name": author_name,
|
| 462 |
+
"url": [f"https://huggingface.co/{author_name}"]
|
| 463 |
}
|
| 464 |
|
| 465 |
# Add copyright
|
|
|
|
| 552 |
def _get_license_url(self, license_id: str) -> str:
|
| 553 |
"""Get the URL for a license based on its SPDX ID."""
|
| 554 |
license_urls = {
|
| 555 |
+
"apache-2.0": "https://www.apache.org/licenses/LICENSE-2.0",
|
| 556 |
+
"mit": "https://opensource.org/licenses/MIT",
|
| 557 |
+
"bsd-3-clause": "https://opensource.org/licenses/BSD-3-Clause",
|
| 558 |
+
"gpl-3.0": "https://www.gnu.org/licenses/gpl-3.0.en.html",
|
| 559 |
+
"cc-by-4.0": "https://creativecommons.org/licenses/by/4.0/",
|
| 560 |
+
"cc-by-sa-4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
|
| 561 |
+
"cc-by-nc-4.0": "https://creativecommons.org/licenses/by-nc/4.0/",
|
| 562 |
+
"cc-by-nd-4.0": "https://creativecommons.org/licenses/by-nd/4.0/",
|
| 563 |
+
"cc-by-nc-sa-4.0": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
|
| 564 |
+
"cc-by-nc-nd-4.0": "https://creativecommons.org/licenses/by-nc-nd/4.0/",
|
| 565 |
+
"lgpl-3.0": "https://www.gnu.org/licenses/lgpl-3.0.en.html",
|
| 566 |
+
"mpl-2.0": "https://www.mozilla.org/en-US/MPL/2.0/",
|
| 567 |
}
|
| 568 |
|
| 569 |
+
return license_urls.get(license_id.lower(), "https://spdx.org/licenses/" )
|
| 570 |
|
| 571 |
+
def _fetch_with_retry(self, fetch_func, *args, max_retries=3, **kwargs):
|
| 572 |
+
"""Fetch data with retry logic for network failures."""
|
| 573 |
+
for attempt in range(max_retries):
|
| 574 |
+
try:
|
| 575 |
+
return fetch_func(*args, **kwargs)
|
| 576 |
+
except Exception as e:
|
| 577 |
+
if attempt == max_retries - 1:
|
| 578 |
+
logger.warning(f"Failed to fetch after {max_retries} attempts: {e}")
|
| 579 |
+
return None
|
| 580 |
+
time.sleep(1 * (attempt + 1)) # Exponential backoff
|
| 581 |
+
return None
|