shahkushan1 commited on
Commit
2948ced
·
0 Parent(s):

Add Gradio micro-trend app with LLM integrations and prompt loading

Browse files

- add system prompt file under prompts/ and loader utilities
- implement settings loader with env overrides and example env file
- create schema validator/summary builder and LLM client wrappers (OpenAI/Gemini)
- build inference pipeline and Gradio UI (multi-image upload, previews, JSON + formatted summary)
- add requirements, README instructions, and ignore local config/secrets
- improve logging, error handling, and bullet formatting; support Gemini auth modes

.gitignore ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+ .Python
7
+ env/
8
+ venv/
9
+ .venv/
10
+ ENV/
11
+
12
+ # Packaging / build
13
+ build/
14
+ dist/
15
+ *.egg-info/
16
+
17
+ # IDE / editor
18
+ .idea/
19
+ .vscode/
20
+
21
+ # OS
22
+ .DS_Store
23
+
24
+ # Local config/secrets
25
+ .env
26
+ settings.json
27
+ .env.example
28
+
29
+ # Gradio cache
30
+ gradio_cached_examples/
31
+ gradio_processed_images/
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Micro-Trend Detection Gradio UI
2
+
3
+ Gradio app that ingests garment images, calls GPT-5/GPT-5 mini or Gemini 3 vision models, and returns the micro-trend JSON plus a bullet summary.
4
+
5
+ ## Setup
6
+ - Python 3.11+ recommended.
7
+ - Install deps: `pip install -r requirements.txt`
8
+ - Configure secrets via environment or `settings.json` (keys mirror `sample_code/settings.json`) or `.env`:
9
+ - `OPENAI_API_KEY`, `GEMINI_API_KEY`
10
+ - `OPENAI_MODEL` (default `gpt-5-mini`), `OPENAI_REASONING_EFFORT`
11
+ - `GOOGLE_GENAI_USE_VERTEXAI`, `GOOGLE_CLOUD_PROJECT`, `GOOGLE_CLOUD_LOCATION`
12
+ - The system prompt lives in `prompts/micro-trend-prompt.md` and is loaded automatically.
13
+
14
+ ## Run
15
+ ```bash
16
+ python app.py
17
+ ```
18
+ Use `PORT`/`HOST` env vars if you need custom binding (Gradio honors them).
19
+
20
+ ## How it works
21
+ - `app.py` builds the Gradio UI (multi-image upload, model dropdown, optional downscale).
22
+ - `pipeline.py` calls the unified LLM client, extracts/validates the JSON, and derives summary bullets.
23
+ - `llm_clients.py` wraps OpenAI Responses API and Gemini 3 vision.
24
+ - `schemas.py` provides structural validation and summary helper.
25
+ - `settings.py` loads config with env overrides.
26
+ - Prompt is read from `prompts/micro-trend-prompt.md` unchanged.
27
+
28
+ ## Gemini auth notes
29
+ - Two modes:
30
+ - Vertex (default): set `GOOGLE_GENAI_USE_VERTEXAI=true` and ensure ADC is available (e.g., `gcloud auth application-default login`) plus `GOOGLE_CLOUD_PROJECT`/`GOOGLE_CLOUD_LOCATION`. Confirm the chosen model exists in your Vertex region.
31
+ - API key (HuggingFace / Studio): set `GOOGLE_GENAI_USE_VERTEXAI=false` and provide `GEMINI_API_KEY`.
32
+ - Default Gemini model name is `gemini-3-pro-preview` (multimodal text-out). Adjust to a region-available model if needed.
33
+
34
+ ## Notes
35
+ - Testing is deferred for now; add unit tests later for schema validation and summary builder.
36
+ - Downscale checkbox reduces images to 1024px for lower cost/latency. If downscale fails, original bytes are used.
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import List
7
+
8
+ import gradio as gr
9
+ from PIL import Image
10
+
11
+ from llm_clients import GEMINI_3_VISION, OPENAI_GPT5, OPENAI_GPT5_MINI
12
+ from pipeline import DEFAULT_USER_PROMPT, process_images
13
+ from settings import load_settings
14
+
15
+ logging.basicConfig(level=logging.INFO)
16
+ LOGGER = logging.getLogger("app")
17
+
18
+
19
+ def _load_images(files: List[gr.File], downscale: bool) -> List[bytes]:
20
+ images: List[bytes] = []
21
+ for f in files or []:
22
+ data = Path(f.name).read_bytes()
23
+ if downscale:
24
+ try:
25
+ img = Image.open(io.BytesIO(data)).convert("RGB")
26
+ img.thumbnail((1024, 1024))
27
+ buf = io.BytesIO()
28
+ img.save(buf, format="PNG")
29
+ data = buf.getvalue()
30
+ except Exception as exc: # noqa: BLE001
31
+ LOGGER.warning("Downscale failed for %s: %s; using original", f.name, exc)
32
+ images.append(data)
33
+ return images
34
+
35
+
36
+ def make_interface():
37
+ settings = load_settings()
38
+ settings.require_api_keys()
39
+
40
+ def _infer(files, model, creativity, downscale_images):
41
+ images = _load_images(files, downscale_images)
42
+ if not images:
43
+ raise gr.Error("Please upload at least one image.")
44
+
45
+ try:
46
+ result = process_images(
47
+ images,
48
+ model,
49
+ settings,
50
+ system_prompt_path=None,
51
+ user_prompt=DEFAULT_USER_PROMPT,
52
+ )
53
+ except Exception as exc: # noqa: BLE001
54
+ LOGGER.exception("Inference failed")
55
+ raise gr.Error(str(exc))
56
+
57
+ trends = result["trends"]
58
+ bullets = result["summary"]
59
+ md = "\n\n".join(f"- {b}" for b in bullets) if bullets else "No summary available."
60
+ return trends, md
61
+
62
+ def _on_files_change(files):
63
+ """Update preview and clear outputs when files are removed."""
64
+ if not files:
65
+ return [], None, ""
66
+ return files, gr.update(), gr.update()
67
+
68
+ with gr.Blocks(title="Garment Micro-Trend Detector") as demo:
69
+ gr.Markdown(
70
+ "Upload garment image(s), pick a model, and get structured micro-trend JSON plus a bullet summary."
71
+ )
72
+
73
+ with gr.Row():
74
+ image_input = gr.Files(file_count="multiple", label="Garment images")
75
+ with gr.Column():
76
+ model_choices = [OPENAI_GPT5, OPENAI_GPT5_MINI, GEMINI_3_VISION]
77
+ default_model = settings.openai_model if settings.openai_model in model_choices else OPENAI_GPT5_MINI
78
+ model_dd = gr.Dropdown(
79
+ choices=model_choices,
80
+ value=default_model,
81
+ label="Model",
82
+ allow_custom_value=True, # allow custom OpenAI model overrides like gpt-5.1
83
+ )
84
+ creativity = gr.Slider(
85
+ minimum=0.0,
86
+ maximum=1.0,
87
+ step=0.1,
88
+ value=0.2,
89
+ label="Creativity (temperature hint)",
90
+ info="Not all models use this directly; for now it is informational.",
91
+ )
92
+ downscale_chk = gr.Checkbox(value=True, label="Downscale images to 1024px for speed/cost")
93
+ run_btn = gr.Button("Analyze", variant="primary")
94
+
95
+ preview = gr.Gallery(
96
+ label="Preview",
97
+ show_label=True,
98
+ object_fit="contain", # preserve aspect ratio
99
+ height="auto",
100
+ )
101
+ json_out = gr.JSON(label="Micro-trend JSON")
102
+ summary_md = gr.Markdown(label="Summary")
103
+
104
+ image_input.change(_on_files_change, inputs=image_input, outputs=[preview, json_out, summary_md], queue=False)
105
+
106
+ run_btn.click(
107
+ _infer,
108
+ inputs=[image_input, model_dd, creativity, downscale_chk],
109
+ outputs=[json_out, summary_md],
110
+ queue=True,
111
+ )
112
+
113
+ return demo
114
+
115
+
116
+ if __name__ == "__main__":
117
+ app = make_interface()
118
+ app.queue()
119
+ app.launch()
docs/plan.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gradio Micro-Trend Detector — Build Plan
2
+
3
+ - **Use the provided prompt verbatim**: The system prompt in `docs/problem-statement.md` must be used as-is for all providers (OpenAI + Gemini). Only attach a minimal user prompt per request.
4
+ - **Reuse the settings shape**: Follow the `sample_code/settings.json` structure for all configurable keys (API keys, model names, reasoning effort, project/location flags).
5
+ - **Reference samples**: Mirror integration patterns shown in `sample_code/llm_client.py` (OpenAI responses API) and any other helpers in `sample_code/` for payloads, retries, and settings resolution.
6
+
7
+ ## Delivery Steps
8
+ 1) **Requirements & schema**
9
+ - Extract the output JSON contract from `docs/problem-statement.md` and codify it (Pydantic/TypedDict) for validation and downstream parsing.
10
+ - Decide on the response envelope: `{ "trends": <validated JSON>, "summary": <bullet list> }`.
11
+
12
+ 2) **Configuration layer**
13
+ - Implement a `settings` loader that reads `settings.json` (and env overrides) using the same keys as `sample_code/settings.json` (`OPENAI_API_KEY`, `GEMINI_API_KEY`, `OPENAI_MODEL`, `OPENAI_REASONING_EFFORT`, `GOOGLE_GENAI_USE_VERTEXAI`, `GOOGLE_CLOUD_PROJECT`, `GOOGLE_CLOUD_LOCATION`).
14
+ - Provide `.env.example` and document required vars in `README`.
15
+
16
+ 3) **Model abstraction**
17
+ - Create a unified `llm_clients.py` with `analyze(images: list[bytes], model: str) -> dict`.
18
+ - Providers: OpenAI GPT-5 and GPT-5 mini via the Responses API; Gemini 3 vision endpoint with safety params aligned to the sample.
19
+ - Shared concerns: timeouts, retries/backoff, logging, optional temperature/max_tokens, deterministic defaults.
20
+
21
+ 4) **Prompting strategy**
22
+ - System prompt = the exact content from `docs/problem-statement.md` (no edits).
23
+ - User prompt per call: short instruction to analyze the attached garment image(s) and emit only the specified JSON.
24
+ - Enforce “JSON first” responses; consider a post-parse repair/reprompt path if JSON is invalid.
25
+
26
+ 5) **Inference pipeline**
27
+ - Image intake: validate file types, normalize to RGB, optional downscale/compress for cost and latency.
28
+ - Call model abstraction; parse and validate JSON against the schema; if invalid, attempt regex extract or auto-reprompt with the model including the error.
29
+ - Derive the bullet-point summary from validated JSON (or accept model-provided summary if valid).
30
+
31
+ 6) **Gradio UI**
32
+ - Inputs: `gr.Files` (multiple images), model dropdown (`GPT-5`, `GPT-5-mini`, `Gemini 3`), creativity/temperature slider, optional checkbox for “downscale images”.
33
+ - Outputs: `gr.JSON` for the structured trends, `gr.Markdown` for bullet summary; error banner for validation issues; loading indicator/queue enabled.
34
+ - Add helper text describing acceptable formats and latency expectations; optional “Download JSON” button.
35
+
36
+ 7) **Observability & performance**
37
+ - Log per-request latency, model used, image count/size, and validation outcomes.
38
+ - Default to GPT-5 mini to control cost; allow overrides via settings or UI.
39
+ - Optional image downscaling knob; consider concurrency limits via Gradio queue.
40
+
41
+ 8) **Packaging & run**
42
+ - Add `requirements.txt/pyproject` entries (gradio, openai>=1.x, google-genai/vertex client, pydantic, pillow).
43
+ - Document `python app.py --settings settings.json` (or env-only) startup, including PORT/HOST env handling for deployment.
44
+
45
+ 9) **Acceptance checklist**
46
+ - Gradio UI renders, accepts multiple images, selects among the three models, and returns validated JSON + bullet summary.
47
+ - Prompt from `docs/problem-statement.md` is used unchanged.
48
+ - Settings follow the `sample_code/settings.json` shape; README and `.env.example` supplied.
docs/problem-statement.md ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Here’s a ready-to-paste meta-prompt you can drop into a Gemini Gem’s Instructions field to turn it into a garment micro-trend extractor (especially focused on print type + placement).
2
+ Name: Garment-MicroTrend-JSON
3
+ Description: Converts garment images into structured JSON capturing print/placement micro-trends.
4
+ Instructions (System Prompt):
5
+ You are MicroTrendStruct, an advanced Fashion Vision & Micro-Trend Serialization Engine. Your sole purpose is to ingest visual input (garment images) and transcode every discernible print, pattern, and placement detail into a rigorous, machine-readable JSON format suitable for micro-trend analysis.
6
+ ROLE & OBJECTIVE
7
+ Your focus is garments, not generic scenes.
8
+ Your primary objective is to:
9
+ Detect whether the garment has any print/pattern/graphic/texture beyond a flat solid.
10
+ Describe the type of print, motifs, scale, density, layout, and print technique (if inferable).
11
+ Describe where the print lives on the garment (placement zones, coverage, orientation, engineered vs all-over).
12
+ Normalize these details into micro-trend tags that can be aggregated across large datasets.
13
+ You are not describing art; you are building a fashion trend database row from pixels.
14
+ CORE DIRECTIVE
15
+ Do not summarize in prose outside the JSON schema.
16
+ Do not offer high-level commentary unless it is in the dedicated fields for micro-trend tags or short “feel” strings.
17
+ If a detail exists in pixels and is relevant to print, pattern, color story, or placement, it should appear somewhere in your JSON.
18
+ If something is not visible or genuinely ambiguous, keep the field but set the value to null and lower confidence for that field. Do not silently omit fields.
19
+ ANALYSIS PROTOCOL (SILENT)
20
+ Before generating JSON, perform a silent multi-pass visual sweep (do not output this):
21
+ Garment Sweep
22
+ Count visible garments. Identify primary garment(s).
23
+ Determine approximate category (dress, shirt, tee, blouse, skirt, jeans, trouser, co-ord top, co-ord bottom, jacket, etc.).
24
+ Note view type (front, back, side, 3/4, flatlay, detail shot, runway/street on model).
25
+ Print & Pattern Sweep
26
+ Detect whether the garment is solid, textured, or printed.
27
+ If printed/graphic, identify print families (floral, geometric, stripe, check, polka, animal, abstract, logo, slogan, photo, etc.).
28
+ Check for multiple print layers (e.g., ditsy floral over a stripe, border prints, panel prints).
29
+ Placement Sweep
30
+ Map where prints appear: overall/all-over vs specific zones (chest, hem, sleeves, collar, yoke, side panels, back only, etc.).
31
+ Estimate coverage percentage in each zone and whether the print is engineered/placed or repeated all-over.
32
+ Micro-Trend Sweep
33
+ Translate observable features into normalized micro-trend tags: e.g. ditsy_floral, oversized_floral, border_print_at_hem, front_chest_slogan, allover_logo, psychedelic_swirl, warped_stripes, photo_real_graphic, tonal_neutral_print, neon_accent_on_black, etc.
34
+ OUTPUT FORMAT (STRICT)
35
+ You must return ONLY a single valid JSON object.
36
+ Do not include markdown fences (no ```json).
37
+ Do not include any conversational text before or after the JSON.
38
+ Use this schema (and expand arrays as needed):
39
+ {
40
+ "meta": {
41
+ "image_quality": "Low/Medium/High",
42
+ "image_type": "Photo/Illustration/Flatlay/Runway/Street/etc",
43
+ "view_type": "Front/Back/Side/3_4/Flatlay/Detail/Full_body_on_model",
44
+ "num_visible_garments": 1
45
+ },
46
+ "global_scene": {
47
+ "setting": "Studio_white_bg/Studio_colored_bg/Street/Runway/Store/etc",
48
+ "model_present": true,
49
+ "occlusions_or_crops": "Brief note about parts of the garment that are cut off, hidden or overlapped, or null if none"
50
+ },
51
+ "garments": [
52
+ {
53
+ "id": "garment_001",
54
+ "role": "primary/secondary/background",
55
+ "category": "Dress/Top/Tee/Shirt/Blouse/Skirt/Jeans/Trouser/Jacket/Co_ord_top/Co_ord_bottom/Other",
56
+ "sub_category": "Free-text subcategory, e.g. 'bodycon mini dress', 'oversized graphic tee'",
57
+ "silhouette_summary": "Short description of silhouette, e.g. 'relaxed tee', 'A-line midi dress', or null",
58
+ "base_fabric_impression": "Woven/Knit/Denim/Satin/Jersey/Sheer/Lace/Leather/Unknown",
59
+ "base_color_main": "Main ground color name, e.g. 'black', 'off-white'",
60
+ "base_color_secondary": [
61
+ "Other ground/solid areas if any, else empty array"
62
+ ],
63
+
64
+ "print_presence": "none/subtle/medium/dominant",
65
+
66
+ "print_overview": {
67
+ "has_print_or_graphic": true,
68
+ "primary_print_family": "Floral/Geometric/Stripe/Check/Plaid/Polka/Animal_skin/Camouflage/Abstract/Logo/Monogram/Slogan/Text/Photo/Texture/Other/Unknown",
69
+ "secondary_print_families": [
70
+ "Additional families if visible, else []"
71
+ ],
72
+ "print_technique_estimate": "Surface_print/Embroidery/Jacquard/Yarn_dyed/Knit_pattern/Applique/Heat_transfer/Unknown",
73
+ "print_style_tags": [
74
+ "Hand_drawn/Watercolor/Outline_only/Line_art/Photoreal/Pixelated/Retro_70s/Retro_90s/Y2K/etc"
75
+ ]
76
+ },
77
+
78
+ "print_placement": [
79
+ {
80
+ "zone": "Overall_allover/Front_bodice/Front_chest/Center_front/Front_hem/Back_panel/Back_yoke/Back_only/Sleeves_full/Sleeve_upper/Sleeve_cuff/Collar/Placket/Side_panels/Waistband/Pockets/Hood/Other",
81
+ "side": "Front/Back/Both/Side/All_around",
82
+ "coverage_percent_of_zone": 80,
83
+ "orientation": "Vertical/Horizontal/Diagonal/Radial/Omni_directional/One_way/Engineered_motif",
84
+ "alignment_with_garment": "Engineered_to_seams/Follows_stripes_or_checks/Random_repeat/Unknown",
85
+ "notes": "Short note for unusual placement like 'single oversized motif across front chest', or null"
86
+ }
87
+ ],
88
+
89
+ "motif_atoms": [
90
+ {
91
+ "motif_type": "Flower/Leaf/Fruit/Star/Heart/Logo_letter/Word/Number/Animal/Animal_skin/Geo_shape/Stripe/Check/Dot/Swirl/Icon/Character/Other",
92
+ "motif_description": "1–2 line concise description, e.g. 'small white daisies with yellow centers'",
93
+ "scale": "micro/small/medium/large/oversized",
94
+ "density": "very_sparse/sparse/medium/dense/very_dense",
95
+ "spacing_pattern": "Even/Random/Clustered/Gradient/Border",
96
+ "edge_treatment": "Outline_only/Filled/Shadowed/3D_effect/Flat",
97
+ "colorways": "Short description of motif vs ground, e.g. 'navy flowers with white outline on beige ground'"
98
+ }
99
+ ],
100
+
101
+ "color_story": {
102
+ "ground_color": "Main background/solid color under the print",
103
+ "print_colors": [
104
+ "Key print colors in simple words"
105
+ ],
106
+ "contrast_behavior": "Low/Medium/High",
107
+ "colorblocking_or_panels": "Description if different colored panels/blocks exist, else null"
108
+ },
109
+
110
+ "construction_interaction": {
111
+ "print_cutoff_or_misalignment": "yes/no/uncertain",
112
+ "placed_around_features": [
113
+ "Neckline/Placket/Pockets/Side_seams/Waist/Hem/etc where the print clearly interacts, else []"
114
+ ],
115
+ "border_and_trim_details": [
116
+ "e.g. 'floral border at skirt hem', 'side tape stripe with logo repeat', or []"
117
+ ]
118
+ },
119
+
120
+ "text_and_logo_details": {
121
+ "has_text_or_logo": true,
122
+ "text_samples": [
123
+ "Exact or approximate words seen, case-sensitive if legible"
124
+ ],
125
+ "placement": [
126
+ "Center_chest/Left_chest/Back_center/Sleeve/Allover/Label_area/etc"
127
+ ],
128
+ "style": "Block/Handwriting/Graffiti/College/Retro/Stencil/Minimal/Unknown",
129
+ "logo_repetition_style": "Single/Scattered_repeat/Allover_monogram/None_or_unknown"
130
+ },
131
+
132
+ "micro_trend_inferences": {
133
+ "print_micro_trend_tags": [
134
+ "Normalized tags like 'ditsy_floral', 'large_floral', 'warped_stripes', 'psychedelic_swirl', 'allover_animal_skin', 'photo_real_graphic', 'allover_logo_monogram'"
135
+ ],
136
+ "placement_micro_trend_tags": [
137
+ "e.g. 'engineered_front_motif', 'border_print_at_hem', 'back_only_graphic', 'side_stripe_leg', 'chest_slogan'"
138
+ ],
139
+ "color_micro_trend_tags": [
140
+ "e.g. 'high_contrast_black_neon', 'tonal_neutrals', 'pastel_duo', 'primary_color_triad'"
141
+ ],
142
+ "other_detail_micro_trend_tags": [
143
+ "e.g. 'mixed_scale_florals', 'print_on_sheer', 'print_blocked_sleeves', 'print_yoke_with_solid_body'"
144
+ ],
145
+ "overall_trend_feel": "1 sentence, e.g. 'Y2K graphic tee', 'cottagecore ditsy floral midi dress', 'sportswear stripe legging', or null"
146
+ },
147
+
148
+ "confidence": {
149
+ "overall": "Low/Medium/High",
150
+ "print_family": "Low/Medium/High",
151
+ "placement": "Low/Medium/High",
152
+ "motif_details": "Low/Medium/High",
153
+ "color_story": "Low/Medium/High"
154
+ }
155
+ }
156
+
157
+ ],
158
+ "image_level_micro_trends": {
159
+ "deduplicated_tags": [
160
+ "Set-like union of all micro_trend_inferences tags across garments"
161
+ ],
162
+ "summary_comment": "Optional 1–2 line objective summary of the key print/placement micro-trend signals observed, or null"
163
+ }
164
+ }
165
+ CRITICAL CONSTRAINTS
166
+ Granularity:
167
+ Do not say “floral dress” and stop. Break it down into motif atoms, placement zones, scale, density, and normalized tags.
168
+ Null Values:
169
+ If any field is not applicable or not visible, keep the key and set value to null (or an empty array for list fields). Do not drop keys.
170
+ No Prose Outside JSON:
171
+ Your final response for each image must be only the JSON object described above, with double-quoted keys and values suitable for strict JSON parsing. No extra text, no Markdown, no explanations.
172
+
173
+ 1. System prompt (Gem Instructions)
174
+ Everything that defines the role, objective, schema, and rules for the model lives in the System / Instructions field.
175
+ Concretely, for the prompt I gave you, the System prompt is:
176
+ The identity + objective
177
+ “You are MicroTrendStruct, an advanced Fashion Vision & Micro-Trend Serialization Engine…”
178
+ The analysis protocol
179
+ Garment Sweep / Print & Pattern Sweep / Placement Sweep / Micro-Trend Sweep
180
+ The strict JSON schema description
181
+ The full meta, global_scene, garments[], motif_atoms[], micro_trend_inferences, etc.
182
+ The critical constraints
183
+ Granularity, null values, no prose outside JSON, etc.
184
+ In practice, you paste all of that into Gemini’s “Instructions” box as the System prompt.
185
+ You do not paste the image or ask a question there — it’s just behavior + schema + rules.
186
+ If you want a clean version of what to treat as System, it starts from:
187
+ “You are MicroTrendStruct, an advanced Fashion Vision & Micro-Trend Serialization Engine…”
188
+ and goes all the way through the JSON schema and “CRITICAL CONSTRAINTS”.
189
+ 2. User prompt (per request / per image)
190
+ Once the Gem is configured with that System prompt, each time you call it you only need a very small user prompt alongside the image, for example:
191
+ User prompt (per call):
192
+ “Here is an image of a garment. Analyze the visible garment(s) and return only the JSON object as specified in your instructions, with all micro-trend fields filled as far as the pixels allow.”
193
+ Or even shorter, once the Gem is stable:
194
+ “Analyze this garment image and output the micro-trend JSON per your schema.”
195
+ Then attach the image.
llm_clients.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LLM provider wrappers (OpenAI + Gemini 3) with a unified analyze interface."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import json
7
+ import logging
8
+ from typing import List, Sequence
9
+
10
+ from openai import OpenAI
11
+ from google import genai
12
+ from google.genai import types as genai_types
13
+ from google.genai import errors as genai_errors
14
+
15
+ from settings import Settings
16
+
17
+ LOGGER = logging.getLogger("llm")
18
+
19
+ # Model identifiers exposed to the UI
20
+ OPENAI_GPT5 = "gpt-5"
21
+ OPENAI_GPT5_MINI = "gpt-5-mini"
22
+ # Gemini 3 multimodal text-out model (supports image+text input, text output)
23
+ GEMINI_3_VISION = "gemini-3-pro-preview"
24
+
25
+
26
+ class LLMError(RuntimeError):
27
+ pass
28
+
29
+
30
+ def _encode_image_to_data_url(image_bytes: bytes, mime: str = "image/png") -> str:
31
+ b64 = base64.b64encode(image_bytes).decode("utf-8")
32
+ return f"data:{mime};base64,{b64}"
33
+
34
+
35
+ def _collect_openai_messages(system_prompt: str, user_prompt: str, images: Sequence[bytes]):
36
+ system = {"role": "system", "content": [{"type": "input_text", "text": system_prompt}]}
37
+ user_content = [{"type": "input_text", "text": user_prompt}]
38
+ for img in images:
39
+ user_content.append({"type": "input_image", "image_url": _encode_image_to_data_url(img)})
40
+ user = {"role": "user", "content": user_content}
41
+ return [system, user]
42
+
43
+
44
+ def run_openai(
45
+ images: Sequence[bytes],
46
+ system_prompt: str,
47
+ user_prompt: str,
48
+ model: str,
49
+ settings: Settings,
50
+ ) -> str:
51
+ if not settings.openai_api_key:
52
+ raise LLMError("OPENAI_API_KEY is missing")
53
+
54
+ client = OpenAI(api_key=settings.openai_api_key)
55
+ messages = _collect_openai_messages(system_prompt, user_prompt, images)
56
+
57
+ kwargs = {}
58
+ if settings.openai_reasoning_effort:
59
+ kwargs["reasoning"] = {"effort": settings.openai_reasoning_effort}
60
+
61
+ LOGGER.info(
62
+ "Calling OpenAI model=%s reasoning=%s images=%s total_bytes=%s",
63
+ model,
64
+ settings.openai_reasoning_effort,
65
+ len(images),
66
+ sum(len(i) for i in images),
67
+ )
68
+ resp = client.responses.create(model=model, input=messages, **kwargs)
69
+ text = getattr(resp, "output_text", None) or str(resp)
70
+ LOGGER.info("OpenAI response (truncated 500 chars): %s", text[:500])
71
+ return text
72
+
73
+
74
+ def run_gemini(
75
+ images: Sequence[bytes],
76
+ system_prompt: str,
77
+ user_prompt: str,
78
+ model: str,
79
+ settings: Settings,
80
+ ) -> str:
81
+ # Two modes:
82
+ # - Vertex (preferred when GOOGLE_GENAI_USE_VERTEXAI=True): uses ADC / gcloud auth
83
+ # - API key (Studio): uses GEMINI_API_KEY
84
+ if settings.google_genai_use_vertexai:
85
+ client = genai.Client(
86
+ vertexai=True,
87
+ project=settings.google_cloud_project,
88
+ location=settings.google_cloud_location or "us-central1",
89
+ )
90
+ else:
91
+ if not settings.gemini_api_key:
92
+ raise LLMError("GEMINI_API_KEY is missing and vertex mode is disabled")
93
+ client = genai.Client(api_key=settings.gemini_api_key)
94
+
95
+ parts: List[genai_types.Part | str] = [system_prompt]
96
+ for img in images:
97
+ parts.append(genai_types.Part.from_bytes(data=img, mime_type="image/png"))
98
+ parts.append(user_prompt)
99
+
100
+ LOGGER.info(
101
+ "Calling Gemini model=%s vertex=%s images=%s total_bytes=%s",
102
+ model,
103
+ settings.google_genai_use_vertexai,
104
+ len(images),
105
+ sum(len(i) for i in images),
106
+ )
107
+ try:
108
+ response = client.models.generate_content(
109
+ model=model,
110
+ contents=parts,
111
+ config=genai_types.GenerateContentConfig(response_modalities=["text"]),
112
+ )
113
+ except genai_errors.ClientError as exc:
114
+ # Provide clearer guidance for common auth/model issues.
115
+ raise LLMError(
116
+ "Gemini request failed. "
117
+ "If using Vertex, ensure the model exists in your project/location and ADC is active (`gcloud auth application-default login`). "
118
+ "If using Studio/API key (e.g., on HuggingFace), set GOOGLE_GENAI_USE_VERTEXAI=false and provide GEMINI_API_KEY. "
119
+ f"Details: {exc}"
120
+ ) from exc
121
+
122
+ # Prefer `.text`; fallback to concatenated text parts
123
+ if getattr(response, "text", None):
124
+ text = response.text
125
+ if getattr(response, "parts", None):
126
+ text_parts = [p.text for p in response.parts if getattr(p, "text", None)]
127
+ if text_parts:
128
+ text = "\n".join(text_parts)
129
+ if "text" not in locals():
130
+ text = str(response)
131
+
132
+ LOGGER.info("Gemini response (truncated 500 chars): %s", text[:500])
133
+ return text
134
+
135
+
136
+ def analyze(
137
+ images: Sequence[bytes],
138
+ system_prompt: str,
139
+ user_prompt: str,
140
+ model_choice: str,
141
+ settings: Settings,
142
+ ) -> str:
143
+ """Dispatch to the correct provider based on model_choice."""
144
+ if model_choice in {OPENAI_GPT5, OPENAI_GPT5_MINI}:
145
+ return run_openai(images, system_prompt, user_prompt, model_choice, settings)
146
+ if model_choice.startswith("gemini"):
147
+ return run_gemini(images, system_prompt, user_prompt, model_choice, settings)
148
+ raise LLMError(f"Unsupported model choice: {model_choice}")
pipeline.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Inference pipeline: images -> LLM -> validated JSON -> summary."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import re
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Sequence
10
+
11
+ from llm_clients import analyze
12
+ from prompt_loader import load_system_prompt
13
+ from schemas import ValidationError, build_summary, validate_trend_payload
14
+ from settings import Settings
15
+
16
+ LOGGER = logging.getLogger("pipeline")
17
+
18
+ DEFAULT_USER_PROMPT = "Analyze this garment image and output the micro-trend JSON per your schema."
19
+
20
+
21
+ def _extract_json(text: str) -> Dict[str, Any]:
22
+ """Parse JSON; if raw text contains extra prose, grab the first JSON object."""
23
+ try:
24
+ return json.loads(text)
25
+ except json.JSONDecodeError:
26
+ pass
27
+
28
+ match = re.search(r"\{.*\}", text, flags=re.S)
29
+ if not match:
30
+ raise json.JSONDecodeError("No JSON object found", text, 0)
31
+ return json.loads(match.group(0))
32
+
33
+
34
+ def process_images(
35
+ images: Sequence[bytes],
36
+ model_choice: str,
37
+ settings: Settings,
38
+ system_prompt_path: Path | str | None = None,
39
+ user_prompt: str = DEFAULT_USER_PROMPT,
40
+ ) -> Dict[str, Any]:
41
+ system_prompt = load_system_prompt(system_prompt_path) if system_prompt_path else load_system_prompt()
42
+
43
+ raw_text = analyze(images, system_prompt, user_prompt, model_choice, settings)
44
+ payload = _extract_json(raw_text)
45
+ validated = validate_trend_payload(payload)
46
+ summary = build_summary(validated)
47
+
48
+ return {"trends": validated, "summary": summary}
prompt_loader.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utilities to load the system prompt from the prompts directory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ DEFAULT_PROMPT_PATH = Path("prompts/micro-trend-prompt.md")
8
+
9
+
10
+ def load_system_prompt(path: Path | str = DEFAULT_PROMPT_PATH) -> str:
11
+ prompt_path = Path(path)
12
+ if not prompt_path.exists():
13
+ raise FileNotFoundError(f"Prompt file not found at {prompt_path}")
14
+ return prompt_path.read_text(encoding="utf-8")
prompts/micro-trend-prompt.md ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Name: Garment-MicroTrend-JSON
2
+ Description: Converts garment images into structured JSON capturing print/placement micro-trends.
3
+ Instructions (System Prompt):
4
+ You are MicroTrendStruct, an advanced Fashion Vision & Micro-Trend Serialization Engine. Your sole purpose is to ingest visual input (garment images) and transcode every discernible print, pattern, and placement detail into a rigorous, machine-readable JSON format suitable for micro-trend analysis.
5
+ ROLE & OBJECTIVE
6
+ Your focus is garments, not generic scenes.
7
+ Your primary objective is to:
8
+ Detect whether the garment has any print/pattern/graphic/texture beyond a flat solid.
9
+ Describe the type of print, motifs, scale, density, layout, and print technique (if inferable).
10
+ Describe where the print lives on the garment (placement zones, coverage, orientation, engineered vs all-over).
11
+ Normalize these details into micro-trend tags that can be aggregated across large datasets.
12
+ You are not describing art; you are building a fashion trend database row from pixels.
13
+ CORE DIRECTIVE
14
+ Do not summarize in prose outside the JSON schema.
15
+ Do not offer high-level commentary unless it is in the dedicated fields for micro-trend tags or short “feel” strings.
16
+ If a detail exists in pixels and is relevant to print, pattern, color story, or placement, it should appear somewhere in your JSON.
17
+ If something is not visible or genuinely ambiguous, keep the field but set the value to null and lower confidence for that field. Do not silently omit fields.
18
+ ANALYSIS PROTOCOL (SILENT)
19
+ Before generating JSON, perform a silent multi-pass visual sweep (do not output this):
20
+ Garment Sweep
21
+ Count visible garments. Identify primary garment(s).
22
+ Determine approximate category (dress, shirt, tee, blouse, skirt, jeans, trouser, co-ord top, co-ord bottom, jacket, etc.).
23
+ Note view type (front, back, side, 3/4, flatlay, detail shot, runway/street on model).
24
+ Print & Pattern Sweep
25
+ Detect whether the garment is solid, textured, or printed.
26
+ If printed/graphic, identify print families (floral, geometric, stripe, check, polka, animal, abstract, logo, slogan, photo, etc.).
27
+ Check for multiple print layers (e.g., ditsy floral over a stripe, border prints, panel prints).
28
+ Placement Sweep
29
+ Map where prints appear: overall/all-over vs specific zones (chest, hem, sleeves, collar, yoke, side panels, back only, etc.).
30
+ Estimate coverage percentage in each zone and whether the print is engineered/placed or repeated all-over.
31
+ Micro-Trend Sweep
32
+ Translate observable features into normalized micro-trend tags: e.g. ditsy_floral, oversized_floral, border_print_at_hem, front_chest_slogan, allover_logo, psychedelic_swirl, warped_stripes, photo_real_graphic, tonal_neutral_print, neon_accent_on_black, etc.
33
+ OUTPUT FORMAT (STRICT)
34
+ You must return ONLY a single valid JSON object.
35
+ Do not include markdown fences (no ```json).
36
+ Do not include any conversational text before or after the JSON.
37
+ Use this schema (and expand arrays as needed):
38
+ {
39
+ "meta": {
40
+ "image_quality": "Low/Medium/High",
41
+ "image_type": "Photo/Illustration/Flatlay/Runway/Street/etc",
42
+ "view_type": "Front/Back/Side/3_4/Flatlay/Detail/Full_body_on_model",
43
+ "num_visible_garments": 1
44
+ },
45
+ "global_scene": {
46
+ "setting": "Studio_white_bg/Studio_colored_bg/Street/Runway/Store/etc",
47
+ "model_present": true,
48
+ "occlusions_or_crops": "Brief note about parts of the garment that are cut off, hidden or overlapped, or null if none"
49
+ },
50
+ "garments": [
51
+ {
52
+ "id": "garment_001",
53
+ "role": "primary/secondary/background",
54
+ "category": "Dress/Top/Tee/Shirt/Blouse/Skirt/Jeans/Trouser/Jacket/Co_ord_top/Co_ord_bottom/Other",
55
+ "sub_category": "Free-text subcategory, e.g. 'bodycon mini dress', 'oversized graphic tee'",
56
+ "silhouette_summary": "Short description of silhouette, e.g. 'relaxed tee', 'A-line midi dress', or null",
57
+ "base_fabric_impression": "Woven/Knit/Denim/Satin/Jersey/Sheer/Lace/Leather/Unknown",
58
+ "base_color_main": "Main ground color name, e.g. 'black', 'off-white'",
59
+ "base_color_secondary": [
60
+ "Other ground/solid areas if any, else empty array"
61
+ ],
62
+
63
+ "print_presence": "none/subtle/medium/dominant",
64
+
65
+ "print_overview": {
66
+ "has_print_or_graphic": true,
67
+ "primary_print_family": "Floral/Geometric/Stripe/Check/Plaid/Polka/Animal_skin/Camouflage/Abstract/Logo/Monogram/Slogan/Text/Photo/Texture/Other/Unknown",
68
+ "secondary_print_families": [
69
+ "Additional families if visible, else []"
70
+ ],
71
+ "print_technique_estimate": "Surface_print/Embroidery/Jacquard/Yarn_dyed/Knit_pattern/Applique/Heat_transfer/Unknown",
72
+ "print_style_tags": [
73
+ "Hand_drawn/Watercolor/Outline_only/Line_art/Photoreal/Pixelated/Retro_70s/Retro_90s/Y2K/etc"
74
+ ]
75
+ },
76
+
77
+ "print_placement": [
78
+ {
79
+ "zone": "Overall_allover/Front_bodice/Front_chest/Center_front/Front_hem/Back_panel/Back_yoke/Back_only/Sleeves_full/Sleeve_upper/Sleeve_cuff/Collar/Placket/Side_panels/Waistband/Pockets/Hood/Other",
80
+ "side": "Front/Back/Both/Side/All_around",
81
+ "coverage_percent_of_zone": 80,
82
+ "orientation": "Vertical/Horizontal/Diagonal/Radial/Omni_directional/One_way/Engineered_motif",
83
+ "alignment_with_garment": "Engineered_to_seams/Follows_stripes_or_checks/Random_repeat/Unknown",
84
+ "notes": "Short note for unusual placement like 'single oversized motif across front chest', or null"
85
+ }
86
+ ],
87
+
88
+ "motif_atoms": [
89
+ {
90
+ "motif_type": "Flower/Leaf/Fruit/Star/Heart/Logo_letter/Word/Number/Animal/Animal_skin/Geo_shape/Stripe/Check/Dot/Swirl/Icon/Character/Other",
91
+ "motif_description": "1–2 line concise description, e.g. 'small white daisies with yellow centers'",
92
+ "scale": "micro/small/medium/large/oversized",
93
+ "density": "very_sparse/sparse/medium/dense/very_dense",
94
+ "spacing_pattern": "Even/Random/Clustered/Gradient/Border",
95
+ "edge_treatment": "Outline_only/Filled/Shadowed/3D_effect/Flat",
96
+ "colorways": "Short description of motif vs ground, e.g. 'navy flowers with white outline on beige ground'"
97
+ }
98
+ ],
99
+
100
+ "color_story": {
101
+ "ground_color": "Main background/solid color under the print",
102
+ "print_colors": [
103
+ "Key print colors in simple words"
104
+ ],
105
+ "contrast_behavior": "Low/Medium/High",
106
+ "colorblocking_or_panels": "Description if different colored panels/blocks exist, else null"
107
+ },
108
+
109
+ "construction_interaction": {
110
+ "print_cutoff_or_misalignment": "yes/no/uncertain",
111
+ "placed_around_features": [
112
+ "Neckline/Placket/Pockets/Side_seams/Waist/Hem/etc where the print clearly interacts, else []"
113
+ ],
114
+ "border_and_trim_details": [
115
+ "e.g. 'floral border at skirt hem', 'side tape stripe with logo repeat', or []"
116
+ ]
117
+ },
118
+
119
+ "text_and_logo_details": {
120
+ "has_text_or_logo": true,
121
+ "text_samples": [
122
+ "Exact or approximate words seen, case-sensitive if legible"
123
+ ],
124
+ "placement": [
125
+ "Center_chest/Left_chest/Back_center/Sleeve/Allover/Label_area/etc"
126
+ ],
127
+ "style": "Block/Handwriting/Graffiti/College/Retro/Stencil/Minimal/Unknown",
128
+ "logo_repetition_style": "Single/Scattered_repeat/Allover_monogram/None_or_unknown"
129
+ },
130
+
131
+ "micro_trend_inferences": {
132
+ "print_micro_trend_tags": [
133
+ "Normalized tags like 'ditsy_floral', 'large_floral', 'warped_stripes', 'psychedelic_swirl', 'allover_animal_skin', 'photo_real_graphic', 'allover_logo_monogram'"
134
+ ],
135
+ "placement_micro_trend_tags": [
136
+ "e.g. 'engineered_front_motif', 'border_print_at_hem', 'back_only_graphic', 'side_stripe_leg', 'chest_slogan'"
137
+ ],
138
+ "color_micro_trend_tags": [
139
+ "e.g. 'high_contrast_black_neon', 'tonal_neutrals', 'pastel_duo', 'primary_color_triad'"
140
+ ],
141
+ "other_detail_micro_trend_tags": [
142
+ "e.g. 'mixed_scale_florals', 'print_on_sheer', 'print_blocked_sleeves', 'print_yoke_with_solid_body'"
143
+ ],
144
+ "overall_trend_feel": "1 sentence, e.g. 'Y2K graphic tee', 'cottagecore ditsy floral midi dress', 'sportswear stripe legging', or null"
145
+ },
146
+
147
+ "confidence": {
148
+ "overall": "Low/Medium/High",
149
+ "print_family": "Low/Medium/High",
150
+ "placement": "Low/Medium/High",
151
+ "motif_details": "Low/Medium/High",
152
+ "color_story": "Low/Medium/High"
153
+ }
154
+ }
155
+
156
+ ],
157
+ "image_level_micro_trends": {
158
+ "deduplicated_tags": [
159
+ "Set-like union of all micro_trend_inferences tags across garments"
160
+ ],
161
+ "summary_comment": "Optional 1–2 line objective summary of the key print/placement micro-trend signals observed, or null"
162
+ }
163
+ }
164
+ CRITICAL CONSTRAINTS
165
+ Granularity:
166
+ Do not say “floral dress” and stop. Break it down into motif atoms, placement zones, scale, density, and normalized tags.
167
+ Null Values:
168
+ If any field is not applicable or not visible, keep the key and set value to null (or an empty array for list fields). Do not drop keys.
169
+ No Prose Outside JSON:
170
+ Your final response for each image must be only the JSON object described above, with double-quoted keys and values suitable for strict JSON parsing. No extra text, no Markdown, no explanations.
171
+
172
+ 1. System prompt (Gem Instructions)
173
+ Everything that defines the role, objective, schema, and rules for the model lives in the System / Instructions field.
174
+ Concretely, for the prompt I gave you, the System prompt is:
175
+ The identity + objective
176
+ “You are MicroTrendStruct, an advanced Fashion Vision & Micro-Trend Serialization Engine…”
177
+ The analysis protocol
178
+ Garment Sweep / Print & Pattern Sweep / Placement Sweep / Micro-Trend Sweep
179
+ The strict JSON schema description
180
+ The full meta, global_scene, garments[], motif_atoms[], micro_trend_inferences, etc.
181
+ The critical constraints
182
+ Granularity, null values, no prose outside JSON, etc.
183
+ In practice, you paste all of that into Gemini’s “Instructions” box as the System prompt.
184
+ You do not paste the image or ask a question there — it’s just behavior + schema + rules.
185
+ If you want a clean version of what to treat as System, it starts from:
186
+ “You are MicroTrendStruct, an advanced Fashion Vision & Micro-Trend Serialization Engine…”
187
+ and goes all the way through the JSON schema and “CRITICAL CONSTRAINTS”.
188
+ 2. User prompt (per request / per image)
189
+ Once the Gem is configured with that System prompt, each time you call it you only need a very small user prompt alongside the image, for example:
190
+ User prompt (per call):
191
+ “Here is an image of a garment. Analyze the visible garment(s) and return only the JSON object as specified in your instructions, with all micro-trend fields filled as far as the pixels allow.”
192
+ Or even shorter, once the Gem is stable:
193
+ “Analyze this garment image and output the micro-trend JSON per your schema.”
194
+ Then attach the image.
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.44.0
2
+ openai>=1.45.0
3
+ google-genai>=0.4.0
4
+ pillow>=10.3.0
sample_code/generate_images.py ADDED
@@ -0,0 +1,679 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import json
5
+ import logging
6
+ import mimetypes
7
+ from datetime import datetime
8
+ import shutil
9
+ from pathlib import Path
10
+ from typing import List, Dict, Any
11
+
12
+ import os
13
+
14
+ from google import genai
15
+ from google.genai import types, errors as genai_errors
16
+
17
+ from constants import (
18
+ ROOT,
19
+ PLAN_PATH,
20
+ DEFAULT_SETTINGS,
21
+ GEMINI_SETTINGS_KEYS,
22
+ LOG_NAME,
23
+ STYLE_VIEW_ORDER,
24
+ )
25
+
26
+
27
+ class PromptTask(Dict[str, Any]):
28
+ """Typed mapping representing a single prompt item (slide, filename, prompt, order)."""
29
+ slide: str
30
+ filename: str
31
+ prompt: str
32
+ order: int
33
+
34
+
35
+ def slugify(text: str) -> str:
36
+ """Convert a slide label to a filesystem-friendly slug."""
37
+ text = text.lower()
38
+ text = re.sub(r"[^a-z0-9]+", "-", text)
39
+ text = text.strip("-")
40
+ return text or "slide"
41
+
42
+
43
+ def output_root(brand: str, collection: str) -> Path:
44
+ """Base directory for images under outputs/<brand>/collection/<collection>/images."""
45
+ return ROOT / "outputs" / slugify(brand) / "collection" / slugify(collection) / "images"
46
+
47
+
48
+ def parse_plan(plan_path: Path) -> List[PromptTask]:
49
+ """Pull every FILENAME/PROMPT pair from plan.md, keeping slide context."""
50
+ lines = plan_path.read_text(encoding="utf-8").splitlines()
51
+ tasks: List[PromptTask] = []
52
+ current_slide = "slide"
53
+ order = 0
54
+ i = 0
55
+ while i < len(lines):
56
+ line = lines[i].strip()
57
+
58
+ # Capture slide headers (e.g., "Slide 6", "Slide 6A", "Slides 8–19")
59
+ slide_match = re.match(r"slide[s]?\s+([\w–-]+)", line, re.IGNORECASE)
60
+ if slide_match:
61
+ current_slide = line
62
+ i += 1
63
+ continue
64
+
65
+ file_match = re.match(r"FILENAME:\s*(.+)", line, re.IGNORECASE)
66
+ if file_match:
67
+ filename = file_match.group(1).strip()
68
+
69
+ # Advance to the PROMPT line
70
+ j = i + 1
71
+ while j < len(lines) and not lines[j].strip().lower().startswith("prompt:"):
72
+ j += 1
73
+ if j >= len(lines):
74
+ raise ValueError(f"PROMPT missing for {filename}")
75
+
76
+ prompt_line = lines[j].strip()
77
+ prompt = prompt_line.split("PROMPT:", 1)[1].strip()
78
+
79
+ # Capture any prompt continuation lines until the next FILENAME/Slide header
80
+ k = j + 1
81
+ continuation: List[str] = []
82
+ while k < len(lines):
83
+ next_line = lines[k].strip()
84
+ if next_line == "":
85
+ k += 1
86
+ continue
87
+ if re.match(r"(FILENAME:|Slide[s]?\s+|< Text Content)", next_line, re.IGNORECASE):
88
+ break
89
+ continuation.append(next_line)
90
+ k += 1
91
+
92
+ if continuation:
93
+ prompt = " ".join([prompt] + continuation)
94
+
95
+ tasks.append(
96
+ {
97
+ "slide": current_slide,
98
+ "filename": filename,
99
+ "prompt": prompt,
100
+ "order": order,
101
+ }
102
+ )
103
+ order += 1
104
+ i = k
105
+ continue
106
+
107
+ i += 1
108
+
109
+ return tasks
110
+
111
+
112
+ def setup_logging(out_root: Path, mode: str, level: str = "INFO", log_path: Path | None = None) -> logging.Logger:
113
+ """Configure stdout + file logging; file goes under outputs/<brand>/collection/<collection>/images/<mode>/run.log."""
114
+ out_dir = out_root / mode
115
+ out_dir.mkdir(parents=True, exist_ok=True)
116
+
117
+ log_file = log_path or out_dir / "run.log"
118
+ numeric_level = getattr(logging, level.upper(), logging.INFO)
119
+
120
+ formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
121
+
122
+ handlers: list[logging.Handler] = [logging.StreamHandler()]
123
+ handlers[0].setFormatter(formatter)
124
+ file_handler = logging.FileHandler(log_file, encoding="utf-8")
125
+ file_handler.setFormatter(formatter)
126
+ handlers.append(file_handler)
127
+
128
+ logging.basicConfig(level=numeric_level, handlers=handlers, force=True)
129
+ logger = logging.getLogger(LOG_NAME)
130
+ logger.setLevel(numeric_level)
131
+ logger.info("Logging initialized (mode=%s, file=%s, level=%s)", mode, log_file, level.upper())
132
+ return logger
133
+
134
+
135
+ def clean_output_dir(out_root: Path, mode: str, logger: logging.Logger | None = None) -> None:
136
+ """Remove all files under the given mode folder to start from a clean slate."""
137
+ target = out_root / mode
138
+ if target.exists():
139
+ if logger:
140
+ logger.info("Cleaning output directory %s", target)
141
+ shutil.rmtree(target)
142
+
143
+
144
+ def anchor_part(prompt: str, logger: logging.Logger) -> tuple[None, None]:
145
+ """Anchor images via folder are removed; function retained for signature compatibility."""
146
+ return None, None
147
+
148
+
149
+ def part_from_path(path: Path) -> types.Part:
150
+ """Load an image file as a genai Part with an inferred MIME type."""
151
+ mime, _ = mimetypes.guess_type(path)
152
+ if not mime:
153
+ mime = "image/jpeg"
154
+ data = path.read_bytes()
155
+ return types.Part.from_bytes(data=data, mime_type=mime)
156
+
157
+
158
+ def detect_style_view(filename: str) -> tuple[str, str] | None:
159
+ """Return (style_code, view) for style view images; else None."""
160
+ m = re.match(r"^(MG-[A-Z]-SS\d{2}-\d{3})_(hero|front|back)\.", filename, re.IGNORECASE)
161
+ if not m:
162
+ return None
163
+ style_code, view = m.group(1), m.group(2).lower()
164
+ return style_code, view
165
+
166
+
167
+ def reorder_tasks_for_styles(tasks: List[PromptTask]) -> List[PromptTask]:
168
+ """Group style views and order front->back->hero; keep non-style in original positions."""
169
+ style_map: dict[str, list[PromptTask]] = {}
170
+ for t in tasks:
171
+ sv = detect_style_view(t["filename"])
172
+ if sv:
173
+ code, view = sv
174
+ style_map.setdefault(code, []).append(t | {"_style_view": view})
175
+ final: list[PromptTask] = []
176
+ processed: set[str] = set()
177
+
178
+ for t in tasks:
179
+ sv = detect_style_view(t["filename"])
180
+ if not sv:
181
+ final.append(t)
182
+ continue
183
+
184
+ code, _ = sv
185
+ if code in processed:
186
+ continue
187
+ processed.add(code)
188
+ grouped = style_map.get(code, [])
189
+ grouped.sort(key=lambda x: (STYLE_VIEW_ORDER.get(x.get("_style_view", "other"), 99), x["order"]))
190
+ # remove helper key before returning
191
+ for g in grouped:
192
+ g.pop("_style_view", None)
193
+ final.append(g)
194
+
195
+ return final
196
+
197
+
198
+ def load_settings(settings_path: Path | None) -> dict[str, str]:
199
+ """Load settings JSON (if present) limited to known keys."""
200
+ path = settings_path or DEFAULT_SETTINGS
201
+ if not path.exists():
202
+ return {}
203
+ try:
204
+ data = json.loads(path.read_text(encoding="utf-8"))
205
+ return {k: v for k, v in data.items() if k in GEMINI_SETTINGS_KEYS and v}
206
+ except json.JSONDecodeError as exc: # noqa: BLE001
207
+ raise SystemExit(f"settings file {path} is not valid JSON: {exc}")
208
+
209
+
210
+ def resolve_api_key(settings: dict[str, str]) -> str:
211
+ """Get API key from env first, then settings file; env wins."""
212
+ if os.environ.get("GEMINI_API_KEY"):
213
+ return os.environ["GEMINI_API_KEY"]
214
+ if os.environ.get("GOOGLE_API_KEY"):
215
+ return os.environ["GOOGLE_API_KEY"]
216
+
217
+ key = settings.get("GEMINI_API_KEY") or settings.get("GOOGLE_API_KEY")
218
+ if key:
219
+ return key
220
+
221
+ raise SystemExit(
222
+ "GEMINI_API_KEY/GOOGLE_API_KEY is not set. Set the env var or create settings.json (see settings.example.json)."
223
+ )
224
+
225
+
226
+ def generate_images(
227
+ tasks: List[PromptTask],
228
+ mode: str,
229
+ limit: int | None,
230
+ api_key: str,
231
+ logger: logging.Logger,
232
+ out_root: Path,
233
+ timestamp: str,
234
+ ) -> None:
235
+ """Generate images for the provided tasks list and write a manifest."""
236
+ client = genai.Client(api_key=api_key)
237
+
238
+ to_run = tasks if mode == "full" else tasks[: limit or 2]
239
+ logger.info("Starting generation: %s tasks (mode=%s)", len(to_run), mode)
240
+
241
+ style_state: dict[str, dict[str, Path]] = {}
242
+ manifest = []
243
+ for task in to_run:
244
+ slide_slug = slugify(task["slide"])
245
+ out_dir = out_root / mode / slide_slug
246
+ out_dir.mkdir(parents=True, exist_ok=True)
247
+ out_path = out_dir / task["filename"]
248
+
249
+ logger.info("Generating %s (slide: %s)", task["filename"], task["slide"])
250
+
251
+ style_view = detect_style_view(task["filename"])
252
+ anchor, anchor_code = anchor_part(task["prompt"], logger)
253
+ anchor_used = None
254
+
255
+ contents: list[types.Part | str] = []
256
+
257
+ if style_view:
258
+ style_code, view = style_view
259
+ state = style_state.get(style_code, {})
260
+ preferred_path: Path | None = None
261
+ if view == "hero":
262
+ preferred_path = None # first in chain, prompt-only
263
+ elif view == "front":
264
+ preferred_path = state.get("hero")
265
+ anchor_used = "hero" if preferred_path else None
266
+ elif view == "back":
267
+ preferred_path = state.get("front") or state.get("hero")
268
+ anchor_used = "front" if state.get("front") else ("hero" if state.get("hero") else None)
269
+
270
+ if preferred_path and preferred_path.exists():
271
+ try:
272
+ contents.append(part_from_path(preferred_path))
273
+ anchor_used = anchor_used or "previous"
274
+ except Exception as exc: # noqa: BLE001
275
+ logger.exception("Failed to load prior view %s as anchor: %s", preferred_path, exc)
276
+
277
+ if not contents and anchor:
278
+ contents.append(anchor)
279
+ anchor_used = anchor_used or (f"face:{anchor_code}" if anchor_code else "face")
280
+
281
+ contents.append(task["prompt"])
282
+
283
+ try:
284
+ response = client.models.generate_content(
285
+ model="gemini-2.5-flash-image",
286
+ contents=contents,
287
+ config=types.GenerateContentConfig(
288
+ response_modalities=["image"],
289
+ ),
290
+ )
291
+ except genai_errors.ClientError as exc: # noqa: BLE001
292
+ if exc.status_code == 401:
293
+ logger.error(
294
+ "401 Unauthorized. This usually means the key is missing, the wrong key type (use Google AI Studio key), or Vertex mode requires OAuth."
295
+ )
296
+ logger.exception("Generation failed for %s: %s", task["filename"], exc)
297
+ continue
298
+ except Exception as exc: # noqa: BLE001
299
+ logger.exception("Generation failed for %s: %s", task["filename"], exc)
300
+ continue
301
+
302
+ parts = getattr(response, "parts", None)
303
+ if not parts:
304
+ logger.warning("Response had no parts for %s; skipping", task["filename"])
305
+ continue
306
+ image_part = next((p for p in parts if getattr(p, "inline_data", None)), None)
307
+ if not image_part:
308
+ logger.warning("No image part returned for %s; skipping", task["filename"])
309
+ continue
310
+
311
+ try:
312
+ image = image_part.as_image()
313
+ image.save(out_path)
314
+ logger.info("Saved %s", out_path)
315
+ except Exception as exc: # noqa: BLE001
316
+ logger.exception("Failed to save %s: %s", out_path, exc)
317
+ continue
318
+
319
+ if style_view:
320
+ style_code, view = style_view
321
+ style_state.setdefault(style_code, {})[view] = out_path
322
+
323
+ manifest.append(
324
+ {
325
+ "slide": task["slide"],
326
+ "filename": task["filename"],
327
+ "prompt": task["prompt"],
328
+ "path": str(out_path.relative_to(ROOT)),
329
+ "anchor": anchor_used,
330
+ "anchor_face": anchor_code,
331
+ }
332
+ )
333
+
334
+ if manifest:
335
+ manifest_path = out_root / mode / f"manifest_{timestamp}.json"
336
+ manifest_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8")
337
+ logger.info("Manifest written to %s", manifest_path)
338
+ else:
339
+ logger.warning("No images were generated; manifest not written")
340
+
341
+
342
+ def run_generation(
343
+ mode: str = "full",
344
+ limit: int | None = None,
345
+ settings_path: Path | None = None,
346
+ brand: str = "mango",
347
+ collection: str = "hot-summer-ss26",
348
+ log_level: str = "INFO",
349
+ clean: bool = False,
350
+ ) -> None:
351
+ """Programmatic entrypoint to parse plan.md and generate Gemini images."""
352
+ tasks = parse_plan(PLAN_PATH)
353
+ tasks = reorder_tasks_for_styles(tasks)
354
+ if not tasks:
355
+ raise SystemExit("No prompts found in plan.md")
356
+
357
+ if mode == "sample" and limit is not None and limit <= 0:
358
+ raise SystemExit("limit must be positive for sample mode")
359
+
360
+ out_root = output_root(brand, collection)
361
+
362
+ if clean:
363
+ clean_output_dir(out_root, mode)
364
+
365
+ timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
366
+
367
+ logger = setup_logging(out_root, mode, log_level)
368
+ settings = load_settings(settings_path)
369
+
370
+ if "GOOGLE_GENAI_USE_VERTEXAI" in settings and "GOOGLE_GENAI_USE_VERTEXAI" not in os.environ:
371
+ os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = str(settings["GOOGLE_GENAI_USE_VERTEXAI"]).lower()
372
+
373
+ api_key = resolve_api_key(settings)
374
+
375
+ prompts_dir = ROOT / "outputs" / slugify(brand) / "collection" / slugify(collection) / "prompts"
376
+ prompts_dir.mkdir(parents=True, exist_ok=True)
377
+ prompts_path = prompts_dir / f"images_prompts_{timestamp}.json"
378
+ prompts_payload = [{"slide": t["slide"], "filename": t["filename"], "prompt": t["prompt"], "order": t["order"]} for t in tasks]
379
+ prompts_path.write_text(json.dumps(prompts_payload, indent=2), encoding="utf-8")
380
+ logger.info("Prompts saved to %s", prompts_path)
381
+
382
+ generate_images(tasks, mode, limit, api_key, logger, out_root, timestamp)
383
+
384
+
385
+ # ---------------- Reusable runner for external callers ---------------- #
386
+
387
+ def run_prompt_list(
388
+ prompt_items: List[Dict[str, Any]],
389
+ brand: str,
390
+ collection: str,
391
+ mode: str,
392
+ api_key: str | None,
393
+ logger: logging.Logger,
394
+ ) -> List[Dict[str, Any]]:
395
+ """
396
+ Run a list of prompts (each dict: prompt, filename) through Gemini and save to images/<mode>.
397
+ Returns manifest entries.
398
+ Includes simple anchoring for style views (hero -> front -> back) using previously
399
+ generated images for the same style code.
400
+ """
401
+ out_root = output_root(brand, collection) / mode
402
+ out_root.mkdir(parents=True, exist_ok=True)
403
+ # Auth resolution: prefer explicit api_key, else settings.json (no env reliance)
404
+ settings_path = ROOT / "settings.json"
405
+ settings = {}
406
+ if settings_path.exists():
407
+ try:
408
+ settings = json.loads(settings_path.read_text(encoding="utf-8"))
409
+ except Exception:
410
+ settings = {}
411
+
412
+ use_vertex = str(settings.get("GOOGLE_GENAI_USE_VERTEXAI", "")).lower() == "true"
413
+
414
+ if not api_key:
415
+ api_key = settings.get("GEMINI_API_KEY") or settings.get("GOOGLE_API_KEY")
416
+
417
+ project = (
418
+ settings.get("GOOGLE_VERTEX_PROJECT")
419
+ or settings.get("GOOGLE_CLOUD_PROJECT")
420
+ or settings.get("GCLOUD_PROJECT")
421
+ )
422
+ location = settings.get("GOOGLE_VERTEX_LOCATION") or settings.get("GOOGLE_CLOUD_LOCATION") or "us-central1"
423
+
424
+ logger.info(
425
+ "[gemini] auth resolution: api_key=%s use_vertex=%s project=%s location=%s",
426
+ "yes" if api_key else "no",
427
+ use_vertex,
428
+ project or "none",
429
+ location,
430
+ )
431
+
432
+ if use_vertex:
433
+ if not project:
434
+ raise SystemExit(
435
+ "Gemini Vertex auth missing project. Set GOOGLE_VERTEX_PROJECT or GOOGLE_CLOUD_PROJECT in settings.json."
436
+ )
437
+ client = genai.Client(vertexai={"project": project, "location": location})
438
+ logger.info("[gemini] using Vertex ADC project=%s location=%s", project, location)
439
+ elif api_key:
440
+ client = genai.Client(api_key=api_key)
441
+ logger.info("[gemini] using API key auth")
442
+ else:
443
+ raise SystemExit(
444
+ "Gemini auth missing: set GEMINI_API_KEY/GOOGLE_API_KEY in settings.json or set GOOGLE_GENAI_USE_VERTEXAI=true with GOOGLE_CLOUD_PROJECT in settings.json"
445
+ )
446
+ manifest = []
447
+ style_state: dict[str, dict[str, Path]] = {}
448
+ for item in prompt_items:
449
+ prompt = item["prompt"]
450
+ filename = item.get("filename") or f"prompt_{len(manifest)+1}.png"
451
+ slide_slug = slugify(item.get("slide", "adhoc"))
452
+ if item.get("out_path"):
453
+ out_path = (ROOT / item["out_path"]).resolve() if not Path(item["out_path"]).is_absolute() else Path(item["out_path"])
454
+ out_path.parent.mkdir(parents=True, exist_ok=True)
455
+ else:
456
+ out_dir = out_root / slide_slug
457
+ out_dir.mkdir(parents=True, exist_ok=True)
458
+ out_path = out_dir / filename
459
+ logger.info("[run_prompt_list] %s -> %s", filename, out_path)
460
+ contents: list[types.Part | str] = []
461
+
462
+ # Style chaining: if filename matches style view, attach prior image
463
+ anchor_used = None
464
+ style_view = detect_style_view(filename)
465
+ if style_view:
466
+ code, view = style_view
467
+ state = style_state.get(code, {})
468
+ preferred_path: Path | None = None
469
+ if view == "hero":
470
+ preferred_path = None
471
+ elif view == "front":
472
+ preferred_path = state.get("hero")
473
+ anchor_used = "hero" if preferred_path else None
474
+ elif view == "back":
475
+ preferred_path = state.get("front") or state.get("hero")
476
+ anchor_used = "front" if state.get("front") else ("hero" if state.get("hero") else None)
477
+
478
+ if preferred_path and preferred_path.exists():
479
+ try:
480
+ contents.append(part_from_path(preferred_path))
481
+ anchor_used = anchor_used or "previous"
482
+ except Exception as exc: # noqa: BLE001
483
+ logger.exception("Failed to load prior view %s as anchor: %s", preferred_path, exc)
484
+
485
+ contents.append(prompt)
486
+ try:
487
+ resp = client.models.generate_content(
488
+ model="gemini-2.5-flash-image",
489
+ contents=contents,
490
+ config=types.GenerateContentConfig(response_modalities=["image"]),
491
+ )
492
+ image_part = None
493
+ if hasattr(resp, "parts") and resp.parts:
494
+ image_part = next((p for p in resp.parts if getattr(p, "inline_data", None)), None)
495
+ if not image_part and hasattr(resp, "candidates"):
496
+ for cand in resp.candidates or []:
497
+ content = getattr(cand, "content", None)
498
+ parts = getattr(content, "parts", []) if content else []
499
+ for part in parts or []:
500
+ if getattr(part, "inline_data", None):
501
+ image_part = part
502
+ break
503
+ if image_part:
504
+ break
505
+ if not image_part:
506
+ logger.warning("[run_prompt_list] no image returned for %s", filename)
507
+ manifest.append({"filename": filename, "status": "no_image"})
508
+ continue
509
+ image = image_part.as_image()
510
+ image.save(out_path)
511
+ manifest.append({"filename": filename, "status": "ok", "path": str(out_path.relative_to(ROOT)), "anchor": anchor_used})
512
+ if style_view:
513
+ code, view = style_view
514
+ style_state.setdefault(code, {})[view] = out_path
515
+ except Exception as exc: # noqa: BLE001
516
+ logger.exception("[run_prompt_list] failed for %s: %s", filename, exc)
517
+ manifest.append({"filename": filename, "status": f"error:{exc}"})
518
+ return manifest
519
+
520
+
521
+ def run_prompt_list_vertex_chain(
522
+ prompt_items: List[Dict[str, Any]],
523
+ brand: str,
524
+ collection: str,
525
+ mode: str,
526
+ logger: logging.Logger,
527
+ temp: float = 1.0,
528
+ top_p: float = 0.95,
529
+ ) -> List[Dict[str, Any]]:
530
+ """
531
+ Multi-turn Vertex image chain per style (hero → front → back) with image feedback.
532
+
533
+ End-to-end flow:
534
+ 1) Group prompts by style/slide so each style runs as one mini-session.
535
+ 2) HERO: call Gemini Vertex with the hero prompt (no anchors). Save the returned image.
536
+ 3) FRONT: send the original hero prompt as a user turn, the hero image as a *model* turn,
537
+ then the front prompt as a user turn. Generate and save the front image.
538
+ 4) BACK: send hero prompt + hero image (model turn) + front prompt + front image (model turn),
539
+ then the back prompt. Generate and save the back image.
540
+ 5) Persist outputs under `outputs/<brand>/collection/<collection>/images/<mode>/...`
541
+ and record a manifest entry per view.
542
+
543
+ Notes:
544
+ - Uses Vertex client with explicit safety + image config (1:1, 1K) and temperature/top_p controls.
545
+ - If any of the three views fail, the function logs an error for that view and continues to the next style.
546
+ """
547
+ from collections import defaultdict
548
+
549
+ out_root = output_root(brand, collection) / mode
550
+ out_root.mkdir(parents=True, exist_ok=True)
551
+
552
+ client = genai.Client(vertexai=True)
553
+
554
+ cfg = types.GenerateContentConfig(
555
+ temperature=temp,
556
+ top_p=top_p,
557
+ max_output_tokens=32768,
558
+ response_modalities=["TEXT", "IMAGE"],
559
+ safety_settings=[
560
+ types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
561
+ types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"),
562
+ types.SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"),
563
+ types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
564
+ ],
565
+ image_config=types.ImageConfig(
566
+ aspect_ratio="1:1",
567
+ image_size="1K",
568
+ output_mime_type="image/png",
569
+ ),
570
+ )
571
+
572
+ def to_model_image_content(img_bytes: bytes) -> types.Content:
573
+ """Wrap prior image bytes as a model-role content part for chaining."""
574
+ return types.Content(
575
+ role="model",
576
+ parts=[
577
+ types.Part.from_text(text="`"),
578
+ types.Part.from_bytes(data=img_bytes, mime_type="image/png"),
579
+ ],
580
+ )
581
+
582
+ def extract_first_image(resp) -> bytes | None:
583
+ """Extract the first inline image payload from a Vertex response object."""
584
+ for cand in getattr(resp, "candidates", []) or []:
585
+ parts = getattr(getattr(cand, "content", None), "parts", []) or []
586
+ for part in parts:
587
+ if getattr(part, "inline_data", None) and getattr(part.inline_data, "data", None):
588
+ return part.inline_data.data
589
+ return None
590
+
591
+ grouped: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
592
+ for itm in prompt_items:
593
+ grouped[itm.get("slide") or itm.get("style_name") or "unknown"].append(itm)
594
+
595
+ manifest: List[Dict[str, Any]] = []
596
+
597
+ for slide, items in grouped.items():
598
+ logger.info("[vertex-chain] style=%s items=%d", slide, len(items))
599
+ hero = next((i for i in items if "_hero" in i.get("filename", "")), None)
600
+ front = next((i for i in items if "_front" in i.get("filename", "")), None)
601
+ back = next((i for i in items if "_back" in i.get("filename", "")), None)
602
+ if not (hero and front and back):
603
+ logger.warning("[vertex-chain] skip %s missing hero/front/back", slide)
604
+ continue
605
+
606
+ def resolve_out_path(itm: Dict[str, Any]) -> Path:
607
+ """Resolve the output path for an item, creating parent folders as needed."""
608
+ op = itm.get("out_path")
609
+ if op:
610
+ p = Path(op)
611
+ if not p.is_absolute():
612
+ p = ROOT / p
613
+ p.parent.mkdir(parents=True, exist_ok=True)
614
+ return p
615
+ out_dir = out_root / (itm.get("slide") or slide)
616
+ out_dir.mkdir(parents=True, exist_ok=True)
617
+ return out_dir / itm.get("filename", "out.png")
618
+
619
+ # HERO
620
+ # 1) Hero request: single user turn with hero prompt.
621
+ hero_resp = client.models.generate_content(
622
+ model="gemini-2.5-flash-image",
623
+ contents=[types.Content(role="user", parts=[types.Part.from_text(text=hero["prompt"])])],
624
+ config=cfg,
625
+ )
626
+ hero_img = extract_first_image(hero_resp)
627
+ if not hero_img:
628
+ manifest.append({"filename": hero.get("filename"), "status": "error", "path": None})
629
+ logger.error("[vertex-chain] no hero image for %s", slide)
630
+ continue
631
+ hero_path = resolve_out_path(hero)
632
+ hero_path.write_bytes(hero_img)
633
+ manifest.append({"filename": hero.get("filename"), "status": "ok", "path": str(hero_path.relative_to(ROOT))})
634
+
635
+ # FRONT
636
+ # 2) Front request: feed hero prompt (user) + hero image (model turn) + front prompt (user).
637
+ contents_front = [
638
+ types.Content(role="user", parts=[types.Part.from_text(text=hero["prompt"])]),
639
+ to_model_image_content(hero_img),
640
+ types.Content(role="user", parts=[types.Part.from_text(text=front["prompt"])]),
641
+ ]
642
+ front_resp = client.models.generate_content(
643
+ model="gemini-2.5-flash-image",
644
+ contents=contents_front,
645
+ config=cfg,
646
+ )
647
+ front_img = extract_first_image(front_resp)
648
+ if not front_img:
649
+ manifest.append({"filename": front.get("filename"), "status": "error", "path": None})
650
+ logger.error("[vertex-chain] no front image for %s", slide)
651
+ continue
652
+ front_path = resolve_out_path(front)
653
+ front_path.write_bytes(front_img)
654
+ manifest.append({"filename": front.get("filename"), "status": "ok", "path": str(front_path.relative_to(ROOT))})
655
+
656
+ # BACK
657
+ # 3) Back request: hero prompt (user) + hero image (model) + front prompt (user) + front image (model) + back prompt (user).
658
+ contents_back = [
659
+ types.Content(role="user", parts=[types.Part.from_text(text=hero["prompt"])]),
660
+ to_model_image_content(hero_img),
661
+ types.Content(role="user", parts=[types.Part.from_text(text=front["prompt"])]),
662
+ to_model_image_content(front_img),
663
+ types.Content(role="user", parts=[types.Part.from_text(text=back["prompt"])]),
664
+ ]
665
+ back_resp = client.models.generate_content(
666
+ model="gemini-2.5-flash-image",
667
+ contents=contents_back,
668
+ config=cfg,
669
+ )
670
+ back_img = extract_first_image(back_resp)
671
+ if not back_img:
672
+ manifest.append({"filename": back.get("filename"), "status": "error", "path": None})
673
+ logger.error("[vertex-chain] no back image for %s", slide)
674
+ continue
675
+ back_path = resolve_out_path(back)
676
+ back_path.write_bytes(back_img)
677
+ manifest.append({"filename": back.get("filename"), "status": "ok", "path": str(back_path.relative_to(ROOT))})
678
+
679
+ return manifest
sample_code/llm_client.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Lightweight OpenAI GPT-5 client for orchestration steps (4–9 prompts, etc.)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import json
7
+ import logging
8
+ import os
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ from openai import OpenAI
13
+
14
+ from constants import (
15
+ ROOT,
16
+ DEFAULT_SETTINGS,
17
+ LLM_SETTING_KEYS as SETTING_KEYS,
18
+ DEFAULT_MODEL,
19
+ DEFAULT_REASONING,
20
+ )
21
+
22
+
23
+ def load_settings(path: Path | None) -> dict:
24
+ """Load settings.json (or a provided path) and keep only recognized keys."""
25
+ path = path or DEFAULT_SETTINGS
26
+ if not path.exists():
27
+ return {}
28
+ data = json.loads(path.read_text(encoding="utf-8"))
29
+ return {k: v for k, v in data.items() if k in SETTING_KEYS and v}
30
+
31
+
32
+ def resolve_api_key(settings: dict) -> str:
33
+ """Resolve OPENAI_API_KEY preferring env over settings; exit if missing."""
34
+ if os.environ.get("OPENAI_API_KEY"):
35
+ return os.environ["OPENAI_API_KEY"]
36
+ if settings.get("OPENAI_API_KEY"):
37
+ return settings["OPENAI_API_KEY"]
38
+ raise SystemExit("OPENAI_API_KEY is not set (env or settings.json)")
39
+
40
+
41
+ def resolve_model(settings: dict, cli_model: Optional[str]) -> str:
42
+ """Pick the model from CLI override, env, settings, or fallback default."""
43
+ return cli_model or os.environ.get("OPENAI_MODEL") or settings.get("OPENAI_MODEL") or DEFAULT_MODEL
44
+
45
+
46
+ def resolve_reasoning(settings: dict, cli_reasoning: Optional[str]) -> Optional[str]:
47
+ """Pick the reasoning effort from CLI override, env, settings, or default."""
48
+ return cli_reasoning or os.environ.get("OPENAI_REASONING_EFFORT") or settings.get("OPENAI_REASONING_EFFORT") or DEFAULT_REASONING
49
+
50
+
51
+ def run(prompt: object, model: str, reasoning: Optional[str], api_key: str) -> str:
52
+ """
53
+ Use the newer Responses API (per OpenAI 2025 guidelines).
54
+ Accepts:
55
+ - str prompt
56
+ - list/tuple [text, image_bytes] for multimodal
57
+ """
58
+ client = OpenAI(api_key=api_key)
59
+
60
+ kwargs = {}
61
+ if reasoning:
62
+ kwargs["reasoning"] = {"effort": reasoning}
63
+
64
+ # Build input payload
65
+ if isinstance(prompt, (list, tuple)) and len(prompt) == 2 and isinstance(prompt[0], str) and isinstance(prompt[1], (bytes, bytearray)):
66
+ b64 = base64.b64encode(prompt[1]).decode("utf-8")
67
+ logging.info("[llm] multimodal input: text_len=%s image_bytes=%s", len(prompt[0]), len(prompt[1]))
68
+ input_payload = [
69
+ {
70
+ "role": "user",
71
+ "content": [
72
+ {"type": "input_text", "text": prompt[0]},
73
+ {"type": "input_image", "image_url": f"data:image/png;base64,{b64}"},
74
+ ],
75
+ }
76
+ ]
77
+ else:
78
+ text_prompt = prompt if isinstance(prompt, str) else str(prompt)
79
+ input_payload = [
80
+ {
81
+ "role": "user",
82
+ "content": [{"type": "input_text", "text": text_prompt}],
83
+ }
84
+ ]
85
+
86
+ # Debug log full payload for traceability
87
+ logging.info("[llm] model=%s reasoning=%s payload=%s", model, reasoning, input_payload)
88
+
89
+ resp = client.responses.create(
90
+ model=model,
91
+ input=input_payload,
92
+ **kwargs,
93
+ )
94
+
95
+ return getattr(resp, "output_text", None) or str(resp)
schemas.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Lightweight schema helpers for micro-trend JSON validation and summarization."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List
6
+
7
+ REQUIRED_TOP_LEVEL_KEYS = {"meta", "global_scene", "garments", "image_level_micro_trends"}
8
+
9
+
10
+ class ValidationError(Exception):
11
+ pass
12
+
13
+
14
+ def validate_trend_payload(payload: Any) -> Dict[str, Any]:
15
+ """Basic structural validation to ensure expected keys/types exist."""
16
+ if not isinstance(payload, dict):
17
+ raise ValidationError("Payload is not a JSON object")
18
+
19
+ missing = REQUIRED_TOP_LEVEL_KEYS - payload.keys()
20
+ if missing:
21
+ raise ValidationError(f"Missing top-level keys: {', '.join(sorted(missing))}")
22
+
23
+ if not isinstance(payload.get("garments"), list):
24
+ raise ValidationError("`garments` must be a list")
25
+
26
+ for i, garment in enumerate(payload["garments"]):
27
+ if not isinstance(garment, dict):
28
+ raise ValidationError(f"garments[{i}] is not an object")
29
+ if "category" not in garment:
30
+ raise ValidationError(f"garments[{i}] missing `category`")
31
+ if "print_overview" in garment and not isinstance(garment["print_overview"], dict):
32
+ raise ValidationError(f"garments[{i}].print_overview must be an object")
33
+ if "print_placement" in garment and not isinstance(garment["print_placement"], list):
34
+ raise ValidationError(f"garments[{i}].print_placement must be a list")
35
+
36
+ return payload # type: ignore[return-value]
37
+
38
+
39
+ def _fmt_list(vals: List[str]) -> str:
40
+ vals = [v for v in vals if v]
41
+ if not vals:
42
+ return ""
43
+ if len(vals) == 1:
44
+ return vals[0]
45
+ return ", ".join(vals[:-1]) + f" and {vals[-1]}"
46
+
47
+
48
+ def _summarize_placement(placements: List[Dict[str, Any]]) -> str:
49
+ if not placements:
50
+ return "placement not specified"
51
+ parts = []
52
+ for p in placements[:3]:
53
+ zone = p.get("zone") or "zone unknown"
54
+ side = p.get("side") or "side n/a"
55
+ coverage = p.get("coverage_percent_of_zone")
56
+ orientation = p.get("orientation")
57
+ note = p.get("notes")
58
+ chunk = f"{zone} ({side}"
59
+ if coverage is not None:
60
+ chunk += f", ~{coverage}% coverage"
61
+ if orientation:
62
+ chunk += f", {orientation.lower()} orientation"
63
+ chunk += ")"
64
+ if note:
65
+ chunk += f" [{note}]"
66
+ parts.append(chunk)
67
+ if len(placements) > 3:
68
+ parts.append("additional placements not shown")
69
+ return "; ".join(parts)
70
+
71
+
72
+ def _summarize_motifs(motifs: List[Dict[str, Any]]) -> str:
73
+ if not motifs:
74
+ return "motifs not specified"
75
+ parts = []
76
+ for m in motifs[:3]:
77
+ motif = m.get("motif_type") or "motif"
78
+ desc = m.get("motif_description")
79
+ scale = m.get("scale")
80
+ density = m.get("density")
81
+ spacing = m.get("spacing_pattern")
82
+ colors = m.get("colorways")
83
+ chunk = motif
84
+ if desc:
85
+ chunk += f" ({desc})"
86
+ details = _fmt_list([scale, density, spacing])
87
+ if details:
88
+ chunk += f" | {details}"
89
+ if colors:
90
+ chunk += f" | colors: {colors}"
91
+ parts.append(chunk)
92
+ if len(motifs) > 3:
93
+ parts.append("additional motif atoms not shown")
94
+ return "; ".join(parts)
95
+
96
+
97
+ def build_summary(payload: Dict[str, Any], max_garments: int = 3) -> List[str]:
98
+ """Derive structured bullet points (Markdown-friendly) that narrate the JSON contents."""
99
+ bullets: List[str] = []
100
+
101
+ meta = payload.get("meta") or {}
102
+ scene = payload.get("global_scene") or {}
103
+ meta_bits = _fmt_list(
104
+ [
105
+ f"image quality {meta.get('image_quality')}" if meta.get("image_quality") else "",
106
+ meta.get("image_type"),
107
+ meta.get("view_type"),
108
+ f"{meta.get('num_visible_garments')} garment(s)" if meta.get("num_visible_garments") is not None else "",
109
+ ]
110
+ )
111
+ scene_bits = _fmt_list(
112
+ [
113
+ scene.get("setting"),
114
+ "model present" if scene.get("model_present") else "",
115
+ f"occlusions: {scene.get('occlusions_or_crops')}" if scene.get("occlusions_or_crops") else "",
116
+ ]
117
+ )
118
+ bullets.append(f"**Scene:** {meta_bits or 'n/a'}; {scene_bits or 'setting n/a'}.")
119
+
120
+ garments: List[Dict[str, Any]] = payload.get("garments", [])[:max_garments]
121
+ for idx, g in enumerate(garments, start=1):
122
+ cat = g.get("category") or g.get("sub_category") or "garment"
123
+ role = g.get("role") or "primary"
124
+ base_color = g.get("base_color_main") or "color n/a"
125
+ secondary = _fmt_list(g.get("base_color_secondary") or [])
126
+ fabric = g.get("base_fabric_impression")
127
+ presence = g.get("print_presence")
128
+ overview = g.get("print_overview") or {}
129
+ primary_family = overview.get("primary_print_family")
130
+ secondary_families = _fmt_list(overview.get("secondary_print_families") or [])
131
+ style_tags = _fmt_list(overview.get("print_style_tags") or [])
132
+ technique = overview.get("print_technique_estimate")
133
+ placement = _summarize_placement(g.get("print_placement") or [])
134
+ motifs = _summarize_motifs(g.get("motif_atoms") or [])
135
+ color_story = g.get("color_story") or {}
136
+ contrast = color_story.get("contrast_behavior")
137
+ print_colors = _fmt_list(color_story.get("print_colors") or [])
138
+ text_logo = g.get("text_and_logo_details") or {}
139
+ has_text = text_logo.get("has_text_or_logo")
140
+ text_samples = _fmt_list(text_logo.get("text_samples") or [])
141
+ tags = g.get("micro_trend_inferences") or {}
142
+ trend_tags = _fmt_list(
143
+ (tags.get("print_micro_trend_tags") or [])
144
+ + (tags.get("placement_micro_trend_tags") or [])
145
+ + (tags.get("color_micro_trend_tags") or [])
146
+ + (tags.get("other_detail_micro_trend_tags") or [])
147
+ )
148
+ confidence = g.get("confidence") or {}
149
+
150
+ bullet = (
151
+ f"**Garment {idx} ({role}) — {cat}:** base color {base_color}"
152
+ f"{' with ' + secondary if secondary else ''}"
153
+ f"{' | fabric ' + fabric if fabric else ''}"
154
+ f"; print presence {presence or 'n/a'}"
155
+ )
156
+ if primary_family:
157
+ bullet += f"; primary print family {primary_family}"
158
+ if secondary_families:
159
+ bullet += f"; secondary {secondary_families}"
160
+ if style_tags:
161
+ bullet += f"; style {style_tags}"
162
+ if technique:
163
+ bullet += f"; technique {technique}"
164
+ bullet += f"; placement: {placement}"
165
+ bullet += f"; motifs: {motifs}"
166
+ if print_colors or contrast:
167
+ bullet += f"; colors: ground={color_story.get('ground_color') or 'n/a'}, print={print_colors or 'n/a'}, contrast={contrast or 'n/a'}"
168
+ if has_text:
169
+ placements = _fmt_list(text_logo.get("placement") or [])
170
+ style = text_logo.get("style")
171
+ bullet += f"; text/logo present ({placements or 'placement n/a'}, style {style or 'n/a'}, samples: {text_samples or 'n/a'})"
172
+ if trend_tags:
173
+ bullet += f"; micro-trend tags: {trend_tags}"
174
+ if confidence.get("overall"):
175
+ bullet += f"; confidence overall {confidence.get('overall')}"
176
+ bullets.append(bullet + ".")
177
+
178
+ tags = (payload.get("image_level_micro_trends") or {}).get("deduplicated_tags") or []
179
+ if isinstance(tags, list) and tags:
180
+ bullets.append("**Image-level micro-trend tags:** " + ", ".join(tags) + ".")
181
+
182
+ summary_comment = (payload.get("image_level_micro_trends") or {}).get("summary_comment")
183
+ if isinstance(summary_comment, str) and summary_comment.strip():
184
+ bullets.append("**Image-level summary:** " + summary_comment.strip())
185
+
186
+ return bullets
settings.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Settings loader for the micro-trend Gradio app.
2
+
3
+ Loads `settings.json` (same shape as `sample_code/settings.json`) with env
4
+ overrides, and exposes a typed Settings object.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+ from typing import Any, Dict, Optional
14
+
15
+ DEFAULT_SETTINGS_PATH = Path("settings.json")
16
+
17
+ # Keys mirrored from sample_code/settings.json
18
+ SETTING_KEYS = {
19
+ "OPENAI_API_KEY",
20
+ "GEMINI_API_KEY",
21
+ "OPENAI_MODEL",
22
+ "OPENAI_REASONING_EFFORT",
23
+ "GOOGLE_GENAI_USE_VERTEXAI",
24
+ "GOOGLE_CLOUD_PROJECT",
25
+ "GOOGLE_CLOUD_LOCATION",
26
+ }
27
+
28
+ DEFAULT_MODEL = "gpt-5-mini"
29
+ DEFAULT_REASONING = "medium"
30
+
31
+
32
+ @dataclass
33
+ class Settings:
34
+ openai_api_key: Optional[str] = None
35
+ gemini_api_key: Optional[str] = None
36
+ openai_model: str = DEFAULT_MODEL
37
+ openai_reasoning_effort: Optional[str] = DEFAULT_REASONING
38
+ google_genai_use_vertexai: bool = True
39
+ google_cloud_project: Optional[str] = None
40
+ google_cloud_location: Optional[str] = None
41
+
42
+ def require_api_keys(self) -> None:
43
+ """Raise if both providers are missing keys."""
44
+ if not self.openai_api_key and not self.gemini_api_key:
45
+ raise RuntimeError("No API keys set: provide OPENAI_API_KEY and/or GEMINI_API_KEY via env or settings.json")
46
+
47
+ def to_payload(self) -> Dict[str, Any]:
48
+ """Return a dict useful for client construction/logging."""
49
+ return {
50
+ "openai_model": self.openai_model,
51
+ "openai_reasoning_effort": self.openai_reasoning_effort,
52
+ "google_genai_use_vertexai": self.google_genai_use_vertexai,
53
+ "google_cloud_project": self.google_cloud_project,
54
+ "google_cloud_location": self.google_cloud_location,
55
+ }
56
+
57
+
58
+ def _coerce_bool(value: Any) -> bool:
59
+ if isinstance(value, bool):
60
+ return value
61
+ if isinstance(value, str):
62
+ return value.strip().lower() in {"1", "true", "yes", "on"}
63
+ return bool(value)
64
+
65
+
66
+ def _load_json(path: Path) -> Dict[str, Any]:
67
+ if not path.exists():
68
+ return {}
69
+ return json.loads(path.read_text(encoding="utf-8"))
70
+
71
+
72
+ def load_settings(path: Path | None = None) -> Settings:
73
+ """
74
+ Load settings with env overrides.
75
+ Precedence: env > settings.json > defaults.
76
+ """
77
+ settings_path = path or DEFAULT_SETTINGS_PATH
78
+ raw = _load_json(settings_path)
79
+ # Keep only recognized keys
80
+ raw = {k: v for k, v in raw.items() if k in SETTING_KEYS}
81
+
82
+ def pick(key: str, default: Any = None) -> Any:
83
+ env_val = os.environ.get(key)
84
+ return env_val if env_val is not None else raw.get(key, default)
85
+
86
+ return Settings(
87
+ openai_api_key=pick("OPENAI_API_KEY"),
88
+ gemini_api_key=pick("GEMINI_API_KEY"),
89
+ openai_model=pick("OPENAI_MODEL", DEFAULT_MODEL),
90
+ openai_reasoning_effort=pick("OPENAI_REASONING_EFFORT", DEFAULT_REASONING),
91
+ google_genai_use_vertexai=_coerce_bool(pick("GOOGLE_GENAI_USE_VERTEXAI", True)),
92
+ google_cloud_project=pick("GOOGLE_CLOUD_PROJECT"),
93
+ google_cloud_location=pick("GOOGLE_CLOUD_LOCATION"),
94
+ )