Spaces:

Futured
/

mdevcamp-ai-playground

Runtime error

File size: 24,777 Bytes

import datetime
import gradio as gr
import requests
import concurrent.futures
import boto3
import os
from dotenv import load_dotenv
from styles import predefined_styles
from images_and_poses import default_images, default_poses

load_dotenv()

INSTANT_ID_URL = "https://europe-west1-mdevcamp-ai.cloudfunctions.net/instantid"
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_ACCESS_SECRET = os.getenv('AWS_ACCESS_SECRET')

def process_images(
    person_images_defaults,
    person_images_custom,
    pose_images_defaults,
    pose_images_custom,
    prompt,
    negative_prompt,
    num_steps,
    identity_strength_ration,
    adapter_strength_ration,
    pose_strength_ration,
    canny_strength_ration,
    depth_strength_ration,
    guidance_strength_ration,
    generations_repeat_count,
    controlnet_selection,
    scheduler,
    enable_lcm,
    enhance_face_region,
):
    person_images_custom = person_images_custom if person_images_custom is not None else []
    person_images_defaults = person_images_defaults if person_images_defaults is not None else {}
    # person_images_defaults = [person_images_defaults[key] for key in person_images_defaults.keys()]

    pose_images_custom = pose_images_custom if pose_images_custom is not None else []
    pose_images_defaults = pose_images_defaults if pose_images_defaults is not None else {}
    # pose_images_defaults = [pose_images_defaults[key] for key in pose_images_defaults.keys()]

    if len(person_images_defaults) + len(person_images_custom) == 0:
        gr.Warning('No person images set')
        return
    if prompt == None or len(prompt) == 0:
        gr.Warning('No prompt set')
        return

    person_images_paths = person_images_custom if person_images_custom is not None else []

    yield [], "Uploading images"

    uploaded_person_image_urls, uploaded_pose_image_urls = upload_images_concurrently(
        person_images_paths=person_images_custom,
        pose_images_paths=pose_images_custom,
    )

    person_images_urls = uploaded_person_image_urls + person_images_defaults
    posess_images_urls = uploaded_pose_image_urls + pose_images_defaults

    requests_data = generate_requests_data(
        generations_repeat_count=generations_repeat_count,
        uploaded_person_image_urls=person_images_urls,
        uploaded_pose_image_urls=posess_images_urls,
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_steps=num_steps,
        identity_strength_ration=identity_strength_ration,
        adapter_strength_ration=adapter_strength_ration,
        pose_strength_ration=pose_strength_ration,
        canny_strength_ration=canny_strength_ration,
        depth_strength_ration=depth_strength_ration,
        controlnet_selection=controlnet_selection,
        guidance_strength_ration=guidance_strength_ration,
        scheduler=scheduler,
        enable_lcm=enable_lcm,
        enhance_face_region=enhance_face_region,
    )

    print(requests_data)

    yield [], f"Generating images 0/{len(requests_data)}"

    gallery_items = []
    error_count = 0
    for req_data in requests_data:
        response = execute_instantid_request(req_data)
        if response is not None:
            gallery_items.append((response, "Caption"))
        else:
            error_image = "https://cdn.pixabay.com/photo/2017/02/12/21/29/false-2061132_640.png"
            gallery_items.append((error_image, "Caption"))
            error_count += 1
        
        loading_image = "https://t4.ftcdn.net/jpg/03/16/15/47/360_F_316154790_pnHGQkERUumMbzAjkgQuRvDgzjAHkFaQ.jpg"
        images = gallery_items + [loading_image] * (len(requests_data) - len(gallery_items))
        yield images, f"Generating images {len(gallery_items)}/{len(requests_data)} (Failed: {error_count})"
 
def generate_requests_data(
    generations_repeat_count,
    uploaded_person_image_urls,
    uploaded_pose_image_urls,
    prompt,
    negative_prompt,
    num_steps,
    identity_strength_ration,
    adapter_strength_ration,
    pose_strength_ration,
    canny_strength_ration,
    depth_strength_ration,
    controlnet_selection,
    guidance_strength_ration,
    scheduler,
    enable_lcm,
    enhance_face_region,
):
    requests_data = []
    for i in range(0, generations_repeat_count):
        for person_image_url in uploaded_person_image_urls:

            # Use person image if no poses ara available
            poses = [person_image_url] if len(uploaded_pose_image_urls) == 0 else uploaded_pose_image_urls
            
            for pose_image_url in poses:
                requests_data.append(
                    {
                        "faceImageUrl": person_image_url,
                        "poseImageUrl": pose_image_url,
                        "prompt": prompt,
                        "n_prompt": negative_prompt,
                        "num_steps": num_steps,
                        "identity_strength_ration": identity_strength_ration,
                        "adapter_strength_ration": adapter_strength_ration,
                        "pose_strength_ration": pose_strength_ration,
                        "canny_strength_ration": canny_strength_ration,
                        "depth_strength_ration": depth_strength_ration,
                        "controlnet_selection": controlnet_selection,
                        "guidance_strength_ration": guidance_strength_ration,
                        "scheduler": scheduler,
                        "enable_lcm": enable_lcm,
                        "enhance_face_region": enhance_face_region
                    }
                )
    return requests_data 

def upload_images_concurrently(person_images_paths, pose_images_paths):
    """
    Uploads person and pose images concurrently and keeps them organized.
    Returns a tuple of lists: (list of person image URLs, list of pose image URLs).
    """
    with concurrent.futures.ThreadPoolExecutor() as executor:
        total_images = len(person_images_paths) + len(pose_images_paths)
        uploaded_count = 0

        # Create a dictionary to hold all futures, tagging them with 'person' or 'pose'
        all_futures = {}
        for image_path in person_images_paths:
            future = executor.submit(upload_image_to_s3, image_path)
            all_futures[future] = 'person'
        for image_path in pose_images_paths:
            future = executor.submit(upload_image_to_s3, image_path)
            all_futures[future] = 'pose'
        
        # Collect results as they complete
        uploaded_person_image_urls = []
        uploaded_pose_image_urls = []
        for future in concurrent.futures.as_completed(all_futures):
            uploaded_count += 1
            # progress_callback(uploaded_count, total_images)

            image_url = future.result()
            if all_futures[future] == 'person':
                uploaded_person_image_urls.append(image_url)
            else:  # 'pose'
                uploaded_pose_image_urls.append(image_url)
    
    return uploaded_person_image_urls, uploaded_pose_image_urls

def upload_image_to_s3(image_path) -> str:
    s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_ACCESS_SECRET)
    bucket_name = 'mdevcamp-ai-upload-script'
    image_file_name = os.path.basename(image_path)
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    image_key = f"images/{timestamp}-{image_file_name}"

    print(f"Uploading started: {image_key}")

    try:
        with open(image_path, 'rb') as image:
            s3_client.upload_fileobj(image, bucket_name, image_key)
        
        print(f"Uploading finished: {image_key}")

        image_url = f"https://{bucket_name}.s3.amazonaws.com/{image_key}"
        return image_url
    except Exception as e:
        gr.Error("Uploading finished with error")
        print(f"Uploading finished with error: {e}")
        return None

def execute_instantid_request(data: dict) -> str | None:
    data = {
        "instances": [data]
    }

    print(f"InstantID started: {data}")

    response = requests.post(INSTANT_ID_URL, json=data)

    print(f"InstatntID finished: {response.status_code}")

    if 200 <= response.status_code < 300:
        return response.content.decode('utf-8')
    else:
        gr.Error("InstantID finished with error")
        print(f"InstatntID finished: {response.__dict__}")
        return None

def update_gradion_elements_with_style(style):
    style_obj = predefined_styles[style] if style in predefined_styles else predefined_styles[0]
    return (
        style_obj.prompt,
        style_obj.negative_prompt,
        style_obj.num_steps,
        style_obj.identity_strength_ratio, 
        style_obj.adapter_strength_ratio,  
        style_obj.pose_strength_ratio,     
        style_obj.canny_strength_ratio,    
        style_obj.depth_strength_ratio,    
        style_obj.guidance_strength_ratio,
        style_obj.generations_repeat_count,
        style_obj.controlnet_selection,
        style_obj.scheduler,
        style_obj.enable_lcm,
        style_obj.enhance_face_region,
    )

default_style = predefined_styles["Sculpture"]

with gr.Blocks() as demo:
    gr.Markdown("""
    
        # mDevCamp app tester
                    
        Tento nástroj slouží k experimentování s vytvářením stylů a póz, které budeme využívat v rámci mDevCamp aplikace. Uživatel nahraje selfie a následně bude odemykat různé styly a pózy, které bude moci kombinovat.
        Od vás bychom potřebovali, abyste byli trochu kreativní a zkusili vymyslet styly a pózy, které by byly použitelné. Kreativitě se meze nekladou.
                
        Pokud si myslíte, že se vám podařil nějaký zajímavý styl, tak zkopírujte prompty, pošlete pózu a screenshot všech nastavení do #ai-avatars kanálu na Slacku.
                
        Pro generování promptů můžete použít například [mDevCamp prompt generátor](https://chat.futured.app/?client=mdevcamppromptgenerator) nebo [Art Style Explorer](https://chat.openai.com/g/g-669XwyKQz-art-style-explorer), pokud máte zaplacený ChatGPT+.
                
        **Našim cieľom je mať nastavenia ktoré sú dobre zreprodukovatelné a ponúkajú dobrý výsledok aj pri viacnásobnom opakovaní.**
                
        **HW, na kterém to běží, dokáže zpracovat pouze jednu fotku naráz, tak se snažte nepouštět generování pro desítky nebo stovky obrázků, protože budete blokovat ostatní**

        **HW, na kterém to běží, je celkem drahý, proto ho budeme zapínat a vypínat pouze v určité hodiny**
                
        **Pokud příliš dlouho čekáte a pak se vám vracejí errory, tak je možné, že to právě používá i někdo jiný, škálování řešení se ještě neimplementuje**
    """)      

    with gr.Accordion("Návod", open=False):
        gr.Markdown(""" 
        ### Jak to vlastně funguje?

        ##### Výběr fotky
        Můžete vybrat jednu z přednastavených nebo nahrát svou vlastní
                    
        ##### Výběr stylu
        Pro inspiraci máme několik stylů, které už existují, můžete se u nich inspirovat tím, jak nastavit jednotlivé parametry, styl po výběru můžete volně upravovat
                    
        #### Prompt
        Zde popíšete, jak bude vypadat výsledný obrázek, prompt nemá velký vliv na to, v jaké poloze například obrázek bude, spíše popisuje, v jakém stylu to bude vytvořeno. Inspirujte se existujícími.
                    
        #### Negative prompt
        Zde popište, jak nechcete, aby obrázek vypadal, pokud dostanete rozmazaný obrázek, tak tam chcete přidat blurry například.
                    
        #### Pózy
        Zde vyberte nějakou pózu z předdefinovaných nebo nahrajte svou vlastní. Nemusíte tuto hodnotu nastavovat. Výběr kvalitní pózy je celkem náročný, některé mohou skončit errorem. Ideál je póza, která zachycuje horní část těla a je relativně blízko. Inspirace může být "We Can Do It" póza. Při výběru více póz se kombinuje každá póza s každým avatarem. Tak opatrně.
                    
        ### Pózy

        #### Generations for each image
        Počet generací každého obrázku. Pokud máš jednu fotku a sem nastavíš 5, tak vygeneruješ 5 fotek.
                    
        #### Num steps
        Určuje počet kroků, které model provede při generování obrázku. Vyšší počet kroků vede k detailnějším výsledkům, ale prodlužuje čas generování.

        #### Identity strength ration    
        Ovlivňuje míru, do jaké se zachová identita vstupního obrázku. Vyšší hodnota znamená, že výsledný obrázek bude více podobný vstupu.

        #### Adapter strength ration
        Ovlivňuje míru, do jaké se nastaví podobnost usazení tváře na referenční pozici.

        #### Pose strength ration
        Určuje míru, do jaké se aplikuje informace o póze ze vstupního obrázku. Vyšší hodnota znamená, že výsledná póza bude více podobná té nastavené.
                    
        #### Canny strength ration
        Určuje míru, do jaké se aplikuje informace o jednotlivých detailech ze vstupního obrázku. Vyšší hodnota znamená, že generovaný obrázek bude kopírovat i věci jako logo na tričku například.

        #### Depth strength ration
        Kontroluje míru, do jaké se aplikuje informace o hloubce ze vstupního obrázku. Vyšší hodnota znamená silnější vliv hloubkové mapy na generovaný obrázek.

        #### Guidance strength ration
        Vcelku pokročilá věc, asi nemusíš měnit nebo dohledej online.
                    
        #### Scheduler
        Vcelku pokročilá věc, asi nemusíš měnit nebo dohledej online.
                    
        #### Controlnet selection
        Určuje, to či sa má použiť póza (pose), detaily (canny) alebo hĺbka (depth) z obrázku.

        #### Enable LCM
        Vcelku pokročilá věc, asi nemusíš měnit nebo dohledej online.

        #### Enhance Face Region
        Vcelku pokročilá věc, asi nemusíš měnit nebo dohledej online.

        ### Usage tips
        - If you're not satisfied with the similarity, try to increase the weight of "IdentityNet Strength" and "Adapter Strength".
        - If you feel that the saturation is too high, first decrease the Adapter strength. If it is still too high, then decrease the IdentityNet strength.
        - If you find that text control is not as expected, decrease Adapter strength.
        - If you find that realistic style is not good enough, go for our Github repo and use a more realistic base model.

        ![manual](https://instantid.github.io/static/documents/editbility.jpg)
        """)

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Faces")
            default_faces_selection = gr.CheckboxGroup(
                choices=[(key, value) for key, value in default_images.items()],
                label="Select some familiar faces",
                type="value",
            )
            custom_faces_upload = gr.File(
                label="Or upload custom selfies",
                file_count="multiple",
                file_types=["jpg", "jpeg", "png", "webp"],
            )


    style_dropdown = gr.Dropdown(
        choices=list(predefined_styles.keys()),
        value=default_style.style_name,
        label="Predefined style",
    )

    prompt = gr.Textbox(
        value=default_style.prompt,
        label="Prompt",
        lines=5,
        placeholder="Superman",
    )

    negative_prompt = gr.Textbox(
        value=default_style.negative_prompt,
        label="Negative prompt",
        lines=2,
        placeholder="Blurry",
    )

    with gr.Accordion("Poses", open=False):
        with gr.Row():
            with gr.Column():
                gr.Markdown("### Poses")
                default_poses_selection = gr.CheckboxGroup(
                    choices=[(key, value) for key, value in default_poses.items()],
                    label="Select default poses",
                    type="value",
                )
                default_poses_previews = gr.Gallery(
                    label="Defaul poses gallery",
                    columns=5,
                    show_label=True,
                    allow_preview=False,
                )

                def update_default_poses_previews(selected_poses):
                    selected_poses_previews = [(pose, key) for key, pose in default_poses.items() if pose in selected_poses]
                    return selected_poses_previews

                default_poses_selection.change(
                    fn=update_default_poses_previews,
                    inputs=[default_poses_selection],
                    outputs=[default_poses_previews],
                )

                custom_poses_upload = gr.File(
                    label="Or upload custom poses",
                    file_count="multiple",
                    file_types=["jpg", "jpeg", "png", "webp"],
                )

    with gr.Accordion("Advanced", open=False):
        generations_repeat_count = gr.Number(
            value=default_style.generations_repeat_count,
            label="Generations for each image (how much images will be generated from each image)",
            minimum=1,
            maximum=10,
        )

        with gr.Row():
            num_steps = gr.Number(
                value=default_style.num_steps,
                label="Num steps",
                minimum=1,
                maximum=100,
            )
            identity_strength_ration = gr.Number(
                value=default_style.identity_strength_ratio,
                label="Identity strength ration",
                minimum=0.0,
                maximum=2.0,
                step=0.01,
            )
            adapter_strength_ration = gr.Number(
                value=default_style.adapter_strength_ratio,
                label="Adapter strength ration",
                minimum=0.0,
                maximum=2.0,
                step=0.01,
            )
        with gr.Row():
            pose_strength_ration = gr.Number(
                value=default_style.pose_strength_ratio,
                label="Pose strength ration",
                minimum=0.0,
                maximum=2.0,
                step=0.01,
            )
            canny_strength_ration = gr.Number(
                value=default_style.canny_strength_ratio,
                label="Canny strength ration",
                minimum=0.0,
                maximum=2.0,
                step=0.01,
            )
            depth_strength_ration = gr.Number(
                value=default_style.depth_strength_ratio,
                label="Depth strength ration",
                minimum=0.0,
                maximum=2.0,
                step=0.01,
            )
            
        with gr.Row():
            guidance_strength_ration = gr.Number(
                value=default_style.guidance_strength_ratio,
                label="Guidance strength ration",
                minimum=0.0,
                maximum=50.0,
                step=0.01,
            )
            scheduler = gr.Dropdown(
                value=default_style.scheduler,
                choices=[
                    "DEISMultistepScheduler",
                    "HeunDiscreteScheduler",
                    "EulerDiscreteScheduler",
                    "DPMSolverMultistepScheduler",
                    "DPMSolverMultistepScheduler-Karras",
                    "DPMSolverMultistepScheduler-Karras-SDE",
                ],
                label="Scheduler",
            )
        with gr.Row():
            controlnet_selection = gr.CheckboxGroup(
                value=default_style.controlnet_selection,
                choices=["pose", "canny", "depth"],
                label="Controlnet selection",
            )

        with gr.Row():
            enable_lcm = gr.Checkbox(
                value=default_style.enable_lcm,
                label="Enable LCM",
            )
            enhance_face_region = gr.Checkbox(
                value=default_style.enhance_face_region,
                label="Enhance Face Region",
            )

    btn = gr.Button(
        value="Generate",
    )

    status = gr.Markdown()

    output_gallery = gr.Gallery(
        label="Results",
        show_label=False,
        elem_id="gallery",
        columns=[3],
        rows=[1],
        object_fit="contain",
        height="auto",
    )

    style_dropdown.change(
        fn=update_gradion_elements_with_style,
        inputs=[style_dropdown],
        outputs=[
            prompt,
            negative_prompt,
            num_steps,
            identity_strength_ration, 
            adapter_strength_ration,  
            pose_strength_ration,     
            canny_strength_ration,    
            depth_strength_ration,    
            guidance_strength_ration,
            generations_repeat_count,
            controlnet_selection,
            scheduler,
            enable_lcm,
            enhance_face_region,
        ],
    )

    def calculate_total_images(person_images_defaults, person_images_custom, pose_images_defaults, pose_images_custom, generations_repeat_count):
        person_images_custom = person_images_custom if person_images_custom is not None else []
        person_images_count = len([image for image in person_images_defaults if image in default_images.values()]) + len(person_images_custom)

        pose_images_custom = pose_images_custom if pose_images_custom is not None else []
        pose_images_count = len([pose for pose in pose_images_defaults if pose in default_poses.values()]) + len(pose_images_custom)
        
        if pose_images_count == 0:
            pose_images_count = 1  # Use person image if no poses are available
        
        total_images = person_images_count * pose_images_count * generations_repeat_count
        return total_images
    
    def update_button_text(person_images_defaults, person_images_custom, pose_images_defaults, pose_images_custom, generations_repeat_count):
        total_images = calculate_total_images(person_images_defaults, person_images_custom, pose_images_defaults, pose_images_custom, generations_repeat_count)
        return f"Generate ({total_images} images)"

    default_faces_selection.change(
        fn=update_button_text,
        inputs=[
            default_faces_selection,
            custom_faces_upload,
            default_poses_selection,
            custom_poses_upload,
            generations_repeat_count,
        ],
        outputs=[
            btn
        ],
    )

    custom_faces_upload.change(
        fn=update_button_text,
        inputs=[
            default_faces_selection,
            custom_faces_upload,
            default_poses_selection,
            custom_poses_upload,
            generations_repeat_count,
        ],
        outputs=[
            btn
        ],
    )

    default_poses_selection.change(
        fn=update_button_text,
        inputs=[
            default_faces_selection,
            custom_faces_upload,
            default_poses_selection,
            custom_poses_upload,
            generations_repeat_count,
        ],
        outputs=[
            btn
        ],
    )

    custom_poses_upload.change(
        fn=update_button_text,
        inputs=[
            default_faces_selection,
            custom_faces_upload,
            default_poses_selection,
            custom_poses_upload,
            generations_repeat_count,
        ],
        outputs=[
            btn
        ],
    )

    generations_repeat_count.change(
        fn=update_button_text,
        inputs=[
            default_faces_selection,
            custom_faces_upload,
            default_poses_selection,
            custom_poses_upload,
            generations_repeat_count,
        ],
        outputs=[
            btn
        ],
    )

    btn.click(
        fn=process_images,
        inputs=[
            default_faces_selection,
            custom_faces_upload,
            default_poses_selection,
            custom_poses_upload,
            prompt,
            negative_prompt,
            num_steps,
            identity_strength_ration,
            adapter_strength_ration,
            pose_strength_ration,
            canny_strength_ration,
            depth_strength_ration,
            guidance_strength_ration,
            generations_repeat_count,
            controlnet_selection,
            scheduler,
            enable_lcm,
            enhance_face_region,
        ],
        outputs=[output_gallery, status]
    )

if __name__ == "__main__":
    demo.launch()