"""Inference pipeline: images -> LLM -> validated JSON -> summary.""" from __future__ import annotations import json import logging import re from pathlib import Path from typing import Any, Dict, List, Sequence from llm_clients import analyze from prompt_loader import load_system_prompt from schemas import ValidationError, build_summary, validate_trend_payload from settings import Settings LOGGER = logging.getLogger("pipeline") DEFAULT_USER_PROMPT = "Analyze this garment image and output the micro-trend JSON per your schema." def _extract_json(text: str) -> Dict[str, Any]: """Parse JSON; if raw text contains extra prose, grab the first JSON object.""" try: return json.loads(text) except json.JSONDecodeError: pass match = re.search(r"\{.*\}", text, flags=re.S) if not match: raise json.JSONDecodeError("No JSON object found", text, 0) return json.loads(match.group(0)) def process_images( images: Sequence[bytes], model_choice: str, settings: Settings, system_prompt_path: Path | str | None = None, user_prompt: str = DEFAULT_USER_PROMPT, ) -> Dict[str, Any]: system_prompt = load_system_prompt(system_prompt_path) if system_prompt_path else load_system_prompt() raw_text = analyze(images, system_prompt, user_prompt, model_choice, settings) payload = _extract_json(raw_text) validated = validate_trend_payload(payload) summary = build_summary(validated) return {"trends": validated, "summary": summary}