numzoo

Runtime error

App Files Files Community

numzoo / scripts /generate_dataset.py

goumsss

Prep captions for LoRA training: trigger word + content-only

47fb1ee 18 days ago

Raw

History Blame Contribute Delete

15.1 kB

	"""
	NumZoo training dataset generator.

	Generates LoRA training images matching the NumZoo aesthetic using Qwen-Image
	via the HuggingFace Inference API (fal-ai provider, billed to your HF Pro credits).
	(Qwen-Image beat FLUX.1-dev on multi-animal accuracy; FLUX.2-dev is edit-only.)

	Alignment with the live app is guaranteed by construction:
	- The "A cute {animals} {places}" prefix is built with image_generator.build_subject
	(the SAME function the app uses), from the app's exact ANIMAL_MAP / PLACE_MAP.
	- The NUMZOO_STYLE suffix is imported from image_generator.
	- Scenes deliberately mix 1–3 animals and 1–3 places, exactly like the app does
	when the player selects multiple emojis.
	So every caption looks like a real app prompt, plus a rich scene detail clause.

	Output: training/image_001.jpg + training/image_001.txt (caption)

	Requirements:
	~/miniforge3/bin/pip install huggingface_hub pillow python-dotenv

	Setup (HF Pro — just your existing token):
	1. Get an HF token (fine-grained, "Make calls to Inference Providers" permission)
	https://huggingface.co/settings/tokens/new?ownUserPermissions=inference.serverless.write&tokenType=fineGrained
	2. Add to .env: HF_TOKEN=hf_...

	Usage:
	~/miniforge3/bin/python3 scripts/generate_dataset.py # all scenes
	~/miniforge3/bin/python3 scripts/generate_dataset.py --count 5 # first 5 (test run)
	~/miniforge3/bin/python3 scripts/generate_dataset.py --start 20 # resume from #20
	~/miniforge3/bin/python3 scripts/generate_dataset.py --dry-run # preview prompts
	"""

	import os
	import sys
	import argparse
	import time
	from pathlib import Path

	# Load .env from project root (GEMINI_API_KEY etc.)
	try:
	from dotenv import load_dotenv
	load_dotenv(Path(__file__).parent.parent / ".env")
	except ImportError:
	pass # dotenv optional — can also export GEMINI_API_KEY manually

	# Reuse the app's exact prompt builder + vocabulary so training captions and live
	# prompts share the same structure ("A cute {animals} {places}") and emoji mappings.
	sys.path.insert(0, str(Path(__file__).parent.parent))
	from image_generator import ( # noqa: E402
	build_subject,
	NUMZOO_STYLE as STYLE,
	ANIMAL_MAP,
	PLACE_MAP,
	)

	# ---------------------------------------------------------------------------
	# Scenes: (animal emojis, place emojis, scene-detail clause)
	# Animals/places use the app's exact emoji keys. The "A cute {animals} {places}"
	# prefix is built by image_generator.build_subject; the detail adds rich props,
	# activity and lighting for the cozy aesthetic. Deliberately mixes 1–3 animals
	# and 1–3 places to mirror multi-emoji selections in the app.
	# ---------------------------------------------------------------------------

	SCENES: list[tuple[list[str], list[str], str]] = [
	# ── Solo animal, single place — covers all 12 animals + all 10 places ──
	(["🐰"], ["🍄"], "sitting on a polka-dot toadstool, fireflies and floating spores drifting around, soft lantern glow"),
	(["🐱"], ["🌊"], "building a tiny sandcastle with a bucket and shells, gentle waves lapping, warm sunset sky"),
	(["🐶"], ["🏡"], "napping in a flower-filled wheelbarrow, watering can and butterflies nearby, golden afternoon light"),
	(["🦊"], ["⭐"], "curled up on a fluffy cloud cradling a tiny glowing star, glittering night sky"),
	(["🐼"], ["🌸"], "nibbling a dango skewer as petals fall, paper lanterns strung above, soft pink light"),
	(["🐨"], ["🌴"], "hugging a palm trunk with a coconut drink, striped hammock and a parrot, turquoise sea behind"),
	(["🦁"], ["🌈"], "wearing a tiny crown at the end of a rainbow, pastel clouds and floating sparkles"),
	(["🐯"], ["🏔️"], "bundled in a knitted scarf on a snowy peak planting a tiny flag, sparkling snow and faint aurora"),
	(["🐸"], ["🌺"], "perched on a giant hibiscus bloom, dewdrops glistening, big tropical leaves and warm bokeh"),
	(["🐧"], ["🌙"], "sitting on the curve of a glowing crescent moon in a knitted hat, scattered twinkling stars"),
	(["🦋"], ["🌸"], "fluttering through cherry blossoms trailing sparkles, pastel petals swirling in the breeze"),
	(["🦄"], ["⭐"], "galloping across a starry sky, rainbow mane glowing, a trail of sparkles behind"),
	(["🐶"], ["🍄"], "exploring beneath a giant mushroom with a tiny lantern, glowing toadstools and soft moss"),
	(["🐱"], ["🌙"], "curled asleep on a crescent moon wearing a nightcap, twinkling stars all around"),
	(["🐰"], ["🌊"], "splashing in shallow waves beside a starfish friend, beach pail and spade, pink sunset"),
	(["🐼"], ["🏡"], "tending a vegetable patch in a straw hat, bees and tall sunflowers, warm sun"),
	(["🐧"], ["🏔️"], "sliding down a snowy slope on its belly, scarf flying, sparkling powder snow"),
	(["🦊"], ["🌴"], "lounging in a hammock between two palms with sunglasses, coconuts and calm ocean"),
	(["🦁"], ["🌺"], "snoozing in a field of tropical flowers, a butterfly on its nose, dappled golden light"),
	(["🐯"], ["🌸"], "chasing falling cherry petals, paper lanterns above, soft pink and lilac tones"),
	(["🐸"], ["🍄"], "playing a tiny flute on a lily pad among glowing mushrooms, fireflies and reeds"),
	(["🦄"], ["🌈"], "standing proudly under a rainbow, flower garland around its neck, pastel clouds"),

	# ── Two animals, single place ──
	(["🐰", "🐱"], ["🍄"], "roasting marshmallows over a tiny campfire, fireflies and glowing mushrooms, cozy night"),
	(["🐶", "🦊"], ["🌊"], "building a sandcastle together with shell flags, gentle waves at golden hour"),
	(["🐼", "🐨"], ["🌸"], "sharing tea under a blooming cherry tree, paper lanterns, drifting petals"),
	(["🦁", "🐯"], ["🏡"], "tumbling over a ball of yarn in a cottage garden, picket fence and butterflies"),
	(["🐧", "🐰"], ["🏔️"], "ice skating on a frozen pond atop a snowy mountain, fairy lights, gentle snowfall"),
	(["🐸", "🦋"], ["🌺"], "resting together on lily pads among tropical flowers, dragonflies and warm bokeh"),
	(["🐱", "🐶"], ["🌙"], "stargazing from a crescent moon with a tiny brass telescope, soft constellations"),
	(["🦄", "🐰"], ["🌈"], "trotting side by side under a rainbow, flower garlands and floating sparkles"),
	(["🦊", "🐼"], ["🍄"], "reading a glowing storybook under a toadstool, a lantern and curious fireflies"),
	(["🐨", "🐧"], ["🌴"], "sipping coconut drinks on a tropical island, beach umbrella and gentle surf"),
	(["🐰", "🦄"], ["⭐"], "swinging on a swing hung from the stars, sparkles raining down, deep blue night"),
	(["🐱", "🐸"], ["🌊"], "collecting shells in tide pools at low tide, a little net and a pastel sunset"),

	# ── Three animals, single place ──
	(["🐰", "🐱", "🐶"], ["🍄"], "having a picnic on a checkered blanket among glowing mushrooms, lanterns and fireflies"),
	(["🦊", "🐼", "🐨"], ["🌸"], "a tea party under cherry blossoms with tiny cups, paper lanterns and drifting petals"),
	(["🦁", "🐯", "🐸"], ["🏡"], "playing tag through flower beds in a cottage garden, butterflies and warm sun"),
	(["🐧", "🐰", "🐱"], ["🏔️"], "building a snowman on a snowy peak in matching scarves, sparkling snow, aurora above"),
	(["🦄", "🦋", "🐰"], ["🌈"], "dancing under a rainbow amid sparkles and flower petals, pastel sky"),
	(["🐶", "🦊", "🐼"], ["🌊"], "surfing tiny waves together with a beach ball, palm trees and sunset glow"),
	(["🐱", "🐨", "🐸"], ["🌺"], "weaving flower crowns in a field of tropical flowers, butterflies and golden bokeh"),
	(["🦁", "🐯", "🐰"], ["⭐"], "huddled on a cloud counting sparkling stars under a shared blanket, soft glow"),

	# ── Two places ──
	(["🐰"], ["🍄", "🌈"], "hopping from a mushroom grove toward a rainbow, sparkles bridging the two, pastel light"),
	(["🐱", "🐶"], ["🌊", "🌴"], "a beach day between ocean waves and a tropical island, palm shade and scattered shells"),
	(["🦄"], ["⭐", "🌙"], "soaring past sparkling stars toward a crescent moon, a glowing rainbow trail"),
	(["🐼"], ["🌸", "🏡"], "wandering from cherry blossoms into a cosy cottage garden, petals and busy bees"),
	(["🐧", "🐰"], ["🏔️", "⭐"], "watching sparkling stars from a snowy mountain top, fairy lights and soft snow"),
	(["🦊"], ["🌺", "🌴"], "exploring tropical flowers along a tropical island shore, parrots and warm bokeh"),
	(["🐸", "🦋"], ["🌸", "🌺"], "drifting between cherry blossoms and tropical flowers, dewdrops and floating petals"),
	(["🦁"], ["🏡", "🌈"], "lazing in a cottage garden as a rainbow arcs overhead, butterflies and golden light"),

	# ── Three places ──
	(["🐰", "🐱"], ["🍄", "🌈", "⭐"], "a dreamy journey through a mushroom forest, under a rainbow and beneath sparkling stars, a glowing trail"),
	(["🦄"], ["🌙", "⭐", "🌈"], "flying past a crescent moon and sparkling stars toward a rainbow, sparkles everywhere"),
	(["🐶", "🦊", "🐼"], ["🌊", "🌴", "🌺"], "a tropical adventure across a sunny beach, a tropical island and fields of flowers, parrots and surf"),
	(["🐧", "🐰", "🐱"], ["🏔️", "⭐", "🌙"], "a starry night on a snowy peak under a crescent moon and sparkling stars, fairy lights and aurora"),
	]


	# LoRA trigger word — a nonsense token the style LoRA learns to associate with
	# the whole NumZoo aesthetic. The app prepends it at inference once the LoRA is
	# trained. Per BFL guidance, training captions = "TRIGGER. <content only>",
	# WITHOUT spelling out the style (so the trigger alone summons the look).
	TRIGGER = "NUMZOO"


	def _scene_name(idx: int, animals: list[str], places: list[str]) -> str:
	"""Short readable label for logs, e.g. '03_panda_1a1p'."""
	first = ANIMAL_MAP[animals[0]].replace("baby ", "")
	return f"{idx:02d}_{first}_{len(animals)}a{len(places)}p"


	def _gen_prompt(a: list[str], p: list[str], detail: str) -> str:
	"""Full STYLED prompt sent to the image model (needs explicit style cues)."""
	return f"{build_subject(a, p)}, {detail}, {STYLE}"


	def _caption(a: list[str], p: list[str], detail: str) -> str:
	"""Training caption: trigger word + content only, NO style words (BFL style-LoRA)."""
	return f"{TRIGGER}. {build_subject(a, p)}, {detail}"


	# (name, generation_prompt, training_caption) for each scene.
	PROMPTS: list[tuple[str, str, str]] = [
	(_scene_name(i + 1, a, p), _gen_prompt(a, p, detail), _caption(a, p, detail))
	for i, (a, p, detail) in enumerate(SCENES)
	]

	# ---------------------------------------------------------------------------
	# Generator using Qwen-Image via HuggingFace Inference API (fal-ai provider)
	# ---------------------------------------------------------------------------
	# Qwen-Image chosen over FLUX.1-dev: far better at rendering DISTINCT animals in
	# multi-animal scenes (critical — the app lets players pick up to 3), and a
	# softer painterly storybook style closer to the NumZoo references.
	# Note: FLUX.2-dev is edit-only (image-to-image) on every HF provider, so it
	# cannot be used for text-to-image dataset generation.

	MODEL = "Qwen/Qwen-Image"
	PROVIDER = "fal-ai"
	RETRIES = 4 # fal-ai occasionally returns transient 504s


	def generate_image(prompt: str) -> "PIL.Image.Image":
	from huggingface_hub import InferenceClient

	# HF Pro token (with "Make calls to Inference Providers" permission) covers
	# all providers — no separate provider key needed.
	hf_token = os.environ.get("HF_TOKEN")
	last_err = None
	for attempt in range(1, RETRIES + 1):
	try:
	client = InferenceClient(provider=PROVIDER, api_key=hf_token)
	return client.text_to_image(prompt, model=MODEL, width=1024, height=1024)
	except Exception as e:
	last_err = e
	if attempt < RETRIES:
	print(f" ⚠️ attempt {attempt}/{RETRIES} failed ({str(e)[:60]}) — retrying")
	time.sleep(4)
	raise last_err # exhausted retries


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--start", type=int, default=1, help="Resume from image N (1-based)")
	parser.add_argument("--count", type=int, default=None, help="Generate at most N images then stop")
	parser.add_argument("--dry-run", action="store_true", help="Print prompts without generating")
	parser.add_argument("--captions-only", action="store_true", help="Rewrite .txt captions for existing images (no API calls)")
	args = parser.parse_args()

	if not args.dry_run and not args.captions_only:
	if not os.environ.get("HF_TOKEN"):
	print("❌ HF_TOKEN not found. Add it to .env")
	print(" Get yours at https://huggingface.co/settings/tokens")
	sys.exit(1)

	out_dir = Path(__file__).parent.parent / "training"
	out_dir.mkdir(exist_ok=True)

	total = len(PROMPTS)
	end_at = (args.start - 1 + args.count) if args.count else total # inclusive upper bound (index)

	count_label = f"{args.count} images" if args.count else f"all {total} images"
	print(f"NumZoo dataset generator — {count_label} → {out_dir}")
	print(f"Range: {args.start}–{min(end_at, total)} of {total}")
	print()

	generated_this_run = 0

	for i, (name, gen_prompt, caption) in enumerate(PROMPTS):
	n = i + 1
	if n < args.start:
	continue
	if n > end_at:
	break

	img_path = out_dir / f"image_{n:03d}.jpg"
	txt_path = out_dir / f"image_{n:03d}.txt"

	# Rewrite captions for existing images, no image generation
	if args.captions_only:
	txt_path.write_text(caption)
	print(f"[{n:02d}/{total}] 📝 {name} — caption updated")
	continue

	if img_path.exists():
	print(f"[{n:02d}/{total}] ⏭ {name} — already exists, skipping")
	continue

	print(f"[{n:02d}/{total}] 🎨 {name}")

	if args.dry_run:
	print(f" gen: {gen_prompt[:90]}…")
	print(f" caption: {caption[:90]}…")
	continue

	try:
	image = generate_image(gen_prompt)
	image.save(img_path, "JPEG", quality=95)
	txt_path.write_text(caption)
	generated_this_run += 1
	print(f" ✅ saved {img_path.name}")
	except Exception as e:
	print(f" ❌ failed: {e}")
	time.sleep(5) # brief pause on error before continuing

	total_on_disk = len(list(out_dir.glob("*.jpg")))
	print(f"\nDone. {generated_this_run} generated this run · {total_on_disk}/{total} total in {out_dir}")


	if __name__ == "__main__":
	main()