artificialguybr-demo-lora / scripts /update_loras_catalog.py
artificialguybr's picture
Support multi-model LoRA generation and auto-build catalog with trigger words
b63bce5
#!/usr/bin/env python3
"""Build a LoRA catalog for a Hugging Face user.
Usage:
python scripts/update_loras_catalog.py --author artificialguybr --output loras.json
"""
from __future__ import annotations
import argparse
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any
import requests
HF_API_MODELS = "https://huggingface.co/api/models"
IMAGE_EXTENSIONS = (".png", ".jpg", ".jpeg", ".webp")
@dataclass
class LoraEntry:
title: str
repo: str
trigger_word: str
family: str
base_model: str
image: str
weight_name: str
def as_dict(self) -> dict[str, Any]:
return {
"title": self.title,
"repo": self.repo,
"trigger_word": self.trigger_word,
"family": self.family,
"base_model": self.base_model,
"image": self.image,
"weight_name": self.weight_name,
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("--author", required=True, help="HF username/org")
parser.add_argument("--output", default="loras.json", help="Output JSON path")
return parser.parse_args()
def load_existing_triggers(path: Path) -> dict[str, str]:
if not path.exists():
return {}
try:
content = json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {}
triggers: dict[str, str] = {}
for item in content:
repo = str(item.get("repo", "")).strip()
trigger = str(item.get("trigger_word", "")).strip()
if repo and trigger:
triggers[repo] = trigger
return triggers
def paginated_models(author: str) -> list[dict[str, Any]]:
models: list[dict[str, Any]] = []
url = HF_API_MODELS
params: dict[str, Any] | None = {"author": author, "full": "true", "limit": 100}
while True:
response = requests.get(url, params=params, timeout=60)
response.raise_for_status()
chunk = response.json()
models.extend(chunk)
link_header = response.headers.get("Link", "")
if 'rel="next"' not in link_header:
break
next_url = link_header.split(";")[0].strip("<>")
url = next_url
params = None
return models
def extract_base_model(tags: list[str]) -> str:
for tag in tags:
if tag.startswith("base_model:adapter:"):
return tag.replace("base_model:adapter:", "", 1)
for tag in tags:
if tag.startswith("base_model:"):
return tag.replace("base_model:", "", 1)
return ""
def detect_family(base_model: str, repo_id: str, tags: list[str]) -> str:
base = base_model.lower()
if "stable-diffusion-xl" in base or "sdxl" in base:
return "sdxl"
if "stable-diffusion-v1-5" in base or "sd 1.5" in base or "sd1.5" in base or "sd-1-5" in base:
return "sd15"
if "qwen-image" in base or "qwen image" in base:
return "qwen-image"
if "z-image" in base or "zimage" in base:
return "z-image"
if "flux" in base:
return "flux"
text = " ".join([repo_id.lower(), *[t.lower() for t in tags]])
if "stable-diffusion-xl" in text or "sdxl" in text:
return "sdxl"
if "stable-diffusion-v1-5" in text or "sd 1.5" in text or "sd1.5" in text or "sd-1-5" in text:
return "sd15"
if "qwen-image" in text or "qwen image" in text:
return "qwen-image"
if "z-image" in text or "zimage" in text:
return "z-image"
if "flux" in text:
return "flux"
return "other"
def is_t2i_lora(model: dict[str, Any]) -> bool:
if model.get("pipeline_tag") != "text-to-image":
return False
tags = [str(tag).lower() for tag in model.get("tags", [])]
if any("lora" in tag for tag in tags):
return True
return "base_model:adapter:" in " ".join(tags)
def infer_title(repo_id: str) -> str:
name = repo_id.split("/", 1)[-1]
cleaned = name.replace("_", " ").replace("-", " ").strip()
return " ".join(part.capitalize() for part in cleaned.split())
def pick_cover_image(repo_id: str, siblings: list[dict[str, Any]]) -> str:
for item in siblings:
filename = str(item.get("rfilename", ""))
lower = filename.lower()
if lower.endswith(IMAGE_EXTENSIONS) and not lower.startswith("."):
return f"https://huggingface.co/{repo_id}/resolve/main/{filename}"
return ""
def pick_weight_name(siblings: list[dict[str, Any]]) -> str:
preferred = []
fallback = []
for item in siblings:
filename = str(item.get("rfilename", ""))
lower = filename.lower()
if not lower.endswith(".safetensors"):
continue
if "comfyui/" in lower:
continue
if lower.startswith("adapter_model"):
preferred.append(filename)
continue
if "/" not in filename:
preferred.append(filename)
continue
fallback.append(filename)
if preferred:
return sorted(preferred)[0]
if fallback:
return sorted(fallback)[0]
return ""
def normalize_trigger(text: str) -> str:
cleaned = text.strip().strip("\"'").strip()
cleaned = re.sub(r"\s+", " ", cleaned)
cleaned = cleaned.strip(" ,;.")
if cleaned in {"-", "none", "n/a"}:
return ""
return cleaned
def extract_trigger_from_readme(readme: str) -> str:
frontmatter = readme
if readme.startswith("---"):
parts = readme.split("---", 2)
if len(parts) >= 3:
frontmatter = parts[1]
patterns = [
r"(?im)^\s*instance_prompt\s*:\s*(.+?)\s*$",
r"(?im)^\s*trigger_word\s*:\s*(.+?)\s*$",
r"(?im)^\s*activation[_ ]token\s*:\s*(.+?)\s*$",
r"(?im)^\s*trigger[_ ]phrase\s*:\s*(.+?)\s*$",
r"(?im)^\s*token\s*:\s*(.+?)\s*$",
]
for pattern in patterns:
match = re.search(pattern, frontmatter)
if match:
trigger = normalize_trigger(match.group(1))
if trigger:
return trigger
body_patterns = [
r"(?im)trigger word\s*[:\-]\s*`?([^`\n]+)`?",
r"(?im)activation token\s*[:\-]\s*`?([^`\n]+)`?",
r"(?im)use\s+`([^`]+)`\s+in your prompt",
r"(?im)you can use\s+([^.\n]+)",
]
for pattern in body_patterns:
match = re.search(pattern, readme)
if match:
trigger = normalize_trigger(match.group(1))
if trigger:
return trigger
return ""
def fetch_trigger_word(repo_id: str, session: requests.Session) -> str:
readme_url = f"https://huggingface.co/{repo_id}/raw/main/README.md"
try:
response = session.get(readme_url, timeout=30)
if response.status_code != 200:
return ""
return extract_trigger_from_readme(response.text)
except Exception:
return ""
def build_catalog(
models: list[dict[str, Any]], existing_triggers: dict[str, str]
) -> list[dict[str, Any]]:
entries: list[LoraEntry] = []
session = requests.Session()
for model in models:
if not is_t2i_lora(model):
continue
repo_id = model["id"]
tags = [str(tag) for tag in model.get("tags", [])]
base_model = extract_base_model(tags)
family = detect_family(base_model, repo_id, tags)
siblings = model.get("siblings") or []
trigger_word = fetch_trigger_word(repo_id, session) or existing_triggers.get(repo_id, "")
entries.append(
LoraEntry(
title=infer_title(repo_id),
repo=repo_id,
trigger_word=trigger_word,
family=family,
base_model=base_model,
image=pick_cover_image(repo_id, siblings),
weight_name=pick_weight_name(siblings),
)
)
entries.sort(key=lambda x: (x.family, x.title.lower()))
return [entry.as_dict() for entry in entries]
def main() -> None:
args = parse_args()
output_path = Path(args.output)
existing_triggers = load_existing_triggers(output_path)
models = paginated_models(args.author)
catalog = build_catalog(models, existing_triggers)
output_path.write_text(json.dumps(catalog, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
by_family: dict[str, int] = {}
for row in catalog:
fam = row["family"]
by_family[fam] = by_family.get(fam, 0) + 1
with_trigger = sum(1 for row in catalog if row.get("trigger_word"))
print(f"Saved {len(catalog)} LoRAs to {output_path}")
print(f"Trigger words filled: {with_trigger}")
print("Family counts:")
for fam in sorted(by_family):
print(f" - {fam}: {by_family[fam]}")
if __name__ == "__main__":
main()