File size: 9,353 Bytes

a70eb3d

#!/usr/bin/env python3
"""Generate a diverse character dataset using Flux 1 + PuLID for LoRA training."""

import json
import urllib.request
import time
import os

COMFYUI_URL = "http://127.0.0.1:80"
DATASET_DIR = "/home/azureuser/ai-toolkit/character_dataset"
TRIGGER_WORD = "ohwx"

# Diverse prompts covering different angles, scenes, lighting, outfits
PROMPTS = [
    # Close-ups / headshots
    (f"close up portrait photo of {TRIGGER_WORD} woman, natural lighting, soft smile, looking at camera, shallow depth of field", "closeup_front"),
    (f"close up portrait of {TRIGGER_WORD} woman, side profile view, golden hour lighting, outdoors", "closeup_profile"),
    (f"close up portrait of {TRIGGER_WORD} woman, three quarter view, studio lighting, neutral expression", "closeup_34"),
    (f"headshot of {TRIGGER_WORD} woman, looking slightly up, soft natural light, gentle smile", "closeup_up"),
    (f"close up of {TRIGGER_WORD} woman, looking down at something in her hands, natural indoor lighting", "closeup_down"),

    # Half body shots
    (f"half body photo of {TRIGGER_WORD} woman in a white blouse, sitting at a cafe table, warm ambient light, candid", "half_cafe"),
    (f"half body photo of {TRIGGER_WORD} woman wearing a leather jacket, urban street background, overcast day", "half_street"),
    (f"half body photo of {TRIGGER_WORD} woman in athletic wear, gym setting, bright lighting", "half_gym"),
    (f"half body photo of {TRIGGER_WORD} woman in a sundress, garden background, dappled sunlight", "half_garden"),
    (f"half body photo of {TRIGGER_WORD} woman in business attire, modern office, professional lighting", "half_office"),

    # Full body shots
    (f"full body photo of {TRIGGER_WORD} woman walking on the beach, sunset, casual summer outfit, warm tones", "full_beach"),
    (f"full body photo of {TRIGGER_WORD} woman standing in a city street, winter coat, evening city lights", "full_city"),
    (f"full body photo of {TRIGGER_WORD} woman hiking on a mountain trail, athletic outfit, golden hour", "full_hike"),
    (f"full body photo of {TRIGGER_WORD} woman leaning against a wall, casual jeans and t-shirt, natural daylight", "full_casual"),

    # Different lighting conditions
    (f"portrait of {TRIGGER_WORD} woman, dramatic side lighting, dark moody background, artistic photo", "light_dramatic"),
    (f"portrait of {TRIGGER_WORD} woman, bright overcast natural light, white background, clean look", "light_bright"),
    (f"portrait of {TRIGGER_WORD} woman, warm golden hour backlight, hair glowing, outdoor", "light_golden"),
    (f"portrait of {TRIGGER_WORD} woman, soft window light, sitting on a couch, cozy indoor setting", "light_window"),

    # Different expressions
    (f"photo of {TRIGGER_WORD} woman laughing genuinely, candid moment, natural setting", "expr_laugh"),
    (f"photo of {TRIGGER_WORD} woman with a serious contemplative expression, looking into distance", "expr_serious"),
]

# Corresponding captions (without trigger word for variety, will be added by trainer)
CAPTIONS = {
    "closeup_front": f"close up portrait photo of {TRIGGER_WORD} woman, natural lighting, soft smile, looking at camera",
    "closeup_profile": f"close up portrait of {TRIGGER_WORD} woman, side profile view, golden hour lighting, outdoors",
    "closeup_34": f"close up portrait of {TRIGGER_WORD} woman, three quarter view, studio lighting, neutral expression",
    "closeup_up": f"headshot of {TRIGGER_WORD} woman, looking slightly up, soft natural light, gentle smile",
    "closeup_down": f"close up of {TRIGGER_WORD} woman, looking down, natural indoor lighting",
    "half_cafe": f"half body photo of {TRIGGER_WORD} woman in a white blouse, sitting at a cafe, warm ambient light",
    "half_street": f"half body photo of {TRIGGER_WORD} woman wearing a leather jacket, urban street, overcast day",
    "half_gym": f"half body photo of {TRIGGER_WORD} woman in athletic wear, gym setting, bright lighting",
    "half_garden": f"half body photo of {TRIGGER_WORD} woman in a sundress, garden, dappled sunlight",
    "half_office": f"half body photo of {TRIGGER_WORD} woman in business attire, modern office",
    "full_beach": f"full body photo of {TRIGGER_WORD} woman walking on the beach, sunset, casual summer outfit",
    "full_city": f"full body photo of {TRIGGER_WORD} woman standing in city street, winter coat, evening lights",
    "full_hike": f"full body photo of {TRIGGER_WORD} woman hiking on mountain trail, athletic outfit, golden hour",
    "full_casual": f"full body photo of {TRIGGER_WORD} woman leaning against wall, jeans and t-shirt, daylight",
    "light_dramatic": f"portrait of {TRIGGER_WORD} woman, dramatic side lighting, dark moody background",
    "light_bright": f"portrait of {TRIGGER_WORD} woman, bright overcast light, white background, clean",
    "light_golden": f"portrait of {TRIGGER_WORD} woman, warm golden hour backlight, hair glowing, outdoor",
    "light_window": f"portrait of {TRIGGER_WORD} woman, soft window light, sitting on couch, cozy indoor",
    "expr_laugh": f"photo of {TRIGGER_WORD} woman laughing genuinely, candid moment, natural setting",
    "expr_serious": f"photo of {TRIGGER_WORD} woman with serious contemplative expression, looking into distance",
}


def queue_prompt(prompt_text, filename, seed):
    workflow = {
        "1": {"class_type": "UNETLoader", "inputs": {"unet_name": "flux1-dev.safetensors", "weight_dtype": "default"}},
        "2": {"class_type": "DualCLIPLoader", "inputs": {"clip_name1": "t5xxl_fp16.safetensors", "clip_name2": "clip_l.safetensors", "type": "flux"}},
        "3": {"class_type": "VAELoader", "inputs": {"vae_name": "ae.safetensors"}},
        "4": {"class_type": "PulidFluxModelLoader", "inputs": {"pulid_file": "pulid_flux_v0.9.1.safetensors"}},
        "5": {"class_type": "PulidFluxInsightFaceLoader", "inputs": {"provider": "CUDA"}},
        "6": {"class_type": "PulidFluxEvaClipLoader", "inputs": {}},
        "7": {"class_type": "LoadImage", "inputs": {"image": "reference_face.png"}},
        "8": {"class_type": "ApplyPulidFlux", "inputs": {
            "model": ["1", 0], "pulid_flux": ["4", 0], "eva_clip": ["6", 0],
            "face_analysis": ["5", 0], "image": ["7", 0],
            "weight": 0.85, "start_at": 0.0, "end_at": 1.0
        }},
        "9": {"class_type": "CLIPTextEncodeFlux", "inputs": {
            "clip": ["2", 0],
            "clip_l": prompt_text[:77],
            "t5xxl": prompt_text,
            "guidance": 3.5
        }},
        "10": {"class_type": "EmptySD3LatentImage", "inputs": {"width": 1024, "height": 1024, "batch_size": 1}},
        "11": {"class_type": "KSampler", "inputs": {
            "model": ["8", 0], "positive": ["9", 0], "negative": ["9", 0],
            "latent_image": ["10", 0], "seed": seed,
            "control_after_generate": "fixed", "steps": 20, "cfg": 1.0,
            "sampler_name": "euler", "scheduler": "simple", "denoise": 1.0
        }},
        "12": {"class_type": "VAEDecode", "inputs": {"samples": ["11", 0], "vae": ["3", 0]}},
        "13": {"class_type": "SaveImage", "inputs": {"images": ["12", 0], "filename_prefix": f"dataset_{filename}"}}
    }

    data = json.dumps({"prompt": workflow}).encode()
    req = urllib.request.Request(f'{COMFYUI_URL}/prompt', data=data, headers={'Content-Type': 'application/json'})
    resp = urllib.request.urlopen(req)
    return json.loads(resp.read())['prompt_id']


def wait_for_completion(prompt_id, timeout=600):
    start = time.time()
    while time.time() - start < timeout:
        req = urllib.request.Request(f'{COMFYUI_URL}/history/{prompt_id}')
        resp = urllib.request.urlopen(req)
        history = json.loads(resp.read())
        if prompt_id in history:
            h = history[prompt_id]
            status = h.get('status', {}).get('status_str', '')
            if status == 'success':
                for nid, out in h['outputs'].items():
                    if 'images' in out:
                        return out['images'][0]['filename']
            elif status == 'error':
                msgs = h.get('status', {}).get('messages', [])
                for m in msgs:
                    if m[0] == 'execution_error':
                        print(f"  ERROR: {m[1].get('exception_message', 'unknown')[:200]}")
                return None
        time.sleep(2)
    return None


def main():
    print(f"Generating {len(PROMPTS)} character images for LoRA training...\n")

    for i, (prompt, name) in enumerate(PROMPTS):
        seed = 10000 + i * 1337
        print(f"[{i+1}/{len(PROMPTS)}] {name} (seed={seed})")
        prompt_id = queue_prompt(prompt, name, seed)
        filename = wait_for_completion(prompt_id)

        if filename:
            # Copy to dataset folder
            src = f"/home/azureuser/ComfyUI/output/{filename}"
            dst = os.path.join(DATASET_DIR, f"{name}.png")
            os.system(f"cp '{src}' '{dst}'")

            # Write caption
            caption = CAPTIONS[name]
            with open(os.path.join(DATASET_DIR, f"{name}.txt"), 'w') as f:
                f.write(caption)

            print(f"  -> saved {name}.png + caption")
        else:
            print(f"  -> FAILED")

    print(f"\nDone! Dataset at: {DATASET_DIR}")
    print(f"Files: {len(os.listdir(DATASET_DIR))}")


if __name__ == "__main__":
    main()