import datetime import gradio as gr import requests import concurrent.futures import boto3 import os from dotenv import load_dotenv from styles import predefined_styles from images_and_poses import default_images, default_poses load_dotenv() INSTANT_ID_URL = "https://europe-west1-mdevcamp-ai.cloudfunctions.net/instantid" AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID') AWS_ACCESS_SECRET = os.getenv('AWS_ACCESS_SECRET') def process_images( person_images_defaults, person_images_custom, pose_images_defaults, pose_images_custom, prompt, negative_prompt, num_steps, identity_strength_ration, adapter_strength_ration, pose_strength_ration, canny_strength_ration, depth_strength_ration, guidance_strength_ration, generations_repeat_count, controlnet_selection, scheduler, enable_lcm, enhance_face_region, ): person_images_custom = person_images_custom if person_images_custom is not None else [] person_images_defaults = person_images_defaults if person_images_defaults is not None else {} # person_images_defaults = [person_images_defaults[key] for key in person_images_defaults.keys()] pose_images_custom = pose_images_custom if pose_images_custom is not None else [] pose_images_defaults = pose_images_defaults if pose_images_defaults is not None else {} # pose_images_defaults = [pose_images_defaults[key] for key in pose_images_defaults.keys()] if len(person_images_defaults) + len(person_images_custom) == 0: gr.Warning('No person images set') return if prompt == None or len(prompt) == 0: gr.Warning('No prompt set') return person_images_paths = person_images_custom if person_images_custom is not None else [] yield [], "Uploading images" uploaded_person_image_urls, uploaded_pose_image_urls = upload_images_concurrently( person_images_paths=person_images_custom, pose_images_paths=pose_images_custom, ) person_images_urls = uploaded_person_image_urls + person_images_defaults posess_images_urls = uploaded_pose_image_urls + pose_images_defaults requests_data = generate_requests_data( generations_repeat_count=generations_repeat_count, uploaded_person_image_urls=person_images_urls, uploaded_pose_image_urls=posess_images_urls, prompt=prompt, negative_prompt=negative_prompt, num_steps=num_steps, identity_strength_ration=identity_strength_ration, adapter_strength_ration=adapter_strength_ration, pose_strength_ration=pose_strength_ration, canny_strength_ration=canny_strength_ration, depth_strength_ration=depth_strength_ration, controlnet_selection=controlnet_selection, guidance_strength_ration=guidance_strength_ration, scheduler=scheduler, enable_lcm=enable_lcm, enhance_face_region=enhance_face_region, ) print(requests_data) yield [], f"Generating images 0/{len(requests_data)}" gallery_items = [] error_count = 0 for req_data in requests_data: response = execute_instantid_request(req_data) if response is not None: gallery_items.append((response, "Caption")) else: error_image = "https://cdn.pixabay.com/photo/2017/02/12/21/29/false-2061132_640.png" gallery_items.append((error_image, "Caption")) error_count += 1 loading_image = "https://t4.ftcdn.net/jpg/03/16/15/47/360_F_316154790_pnHGQkERUumMbzAjkgQuRvDgzjAHkFaQ.jpg" images = gallery_items + [loading_image] * (len(requests_data) - len(gallery_items)) yield images, f"Generating images {len(gallery_items)}/{len(requests_data)} (Failed: {error_count})" def generate_requests_data( generations_repeat_count, uploaded_person_image_urls, uploaded_pose_image_urls, prompt, negative_prompt, num_steps, identity_strength_ration, adapter_strength_ration, pose_strength_ration, canny_strength_ration, depth_strength_ration, controlnet_selection, guidance_strength_ration, scheduler, enable_lcm, enhance_face_region, ): requests_data = [] for i in range(0, generations_repeat_count): for person_image_url in uploaded_person_image_urls: # Use person image if no poses ara available poses = [person_image_url] if len(uploaded_pose_image_urls) == 0 else uploaded_pose_image_urls for pose_image_url in poses: requests_data.append( { "faceImageUrl": person_image_url, "poseImageUrl": pose_image_url, "prompt": prompt, "n_prompt": negative_prompt, "num_steps": num_steps, "identity_strength_ration": identity_strength_ration, "adapter_strength_ration": adapter_strength_ration, "pose_strength_ration": pose_strength_ration, "canny_strength_ration": canny_strength_ration, "depth_strength_ration": depth_strength_ration, "controlnet_selection": controlnet_selection, "guidance_strength_ration": guidance_strength_ration, "scheduler": scheduler, "enable_lcm": enable_lcm, "enhance_face_region": enhance_face_region } ) return requests_data def upload_images_concurrently(person_images_paths, pose_images_paths): """ Uploads person and pose images concurrently and keeps them organized. Returns a tuple of lists: (list of person image URLs, list of pose image URLs). """ with concurrent.futures.ThreadPoolExecutor() as executor: total_images = len(person_images_paths) + len(pose_images_paths) uploaded_count = 0 # Create a dictionary to hold all futures, tagging them with 'person' or 'pose' all_futures = {} for image_path in person_images_paths: future = executor.submit(upload_image_to_s3, image_path) all_futures[future] = 'person' for image_path in pose_images_paths: future = executor.submit(upload_image_to_s3, image_path) all_futures[future] = 'pose' # Collect results as they complete uploaded_person_image_urls = [] uploaded_pose_image_urls = [] for future in concurrent.futures.as_completed(all_futures): uploaded_count += 1 # progress_callback(uploaded_count, total_images) image_url = future.result() if all_futures[future] == 'person': uploaded_person_image_urls.append(image_url) else: # 'pose' uploaded_pose_image_urls.append(image_url) return uploaded_person_image_urls, uploaded_pose_image_urls def upload_image_to_s3(image_path) -> str: s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_ACCESS_SECRET) bucket_name = 'mdevcamp-ai-upload-script' image_file_name = os.path.basename(image_path) timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") image_key = f"images/{timestamp}-{image_file_name}" print(f"Uploading started: {image_key}") try: with open(image_path, 'rb') as image: s3_client.upload_fileobj(image, bucket_name, image_key) print(f"Uploading finished: {image_key}") image_url = f"https://{bucket_name}.s3.amazonaws.com/{image_key}" return image_url except Exception as e: gr.Error("Uploading finished with error") print(f"Uploading finished with error: {e}") return None def execute_instantid_request(data: dict) -> str | None: data = { "instances": [data] } print(f"InstantID started: {data}") response = requests.post(INSTANT_ID_URL, json=data) print(f"InstatntID finished: {response.status_code}") if 200 <= response.status_code < 300: return response.content.decode('utf-8') else: gr.Error("InstantID finished with error") print(f"InstatntID finished: {response.__dict__}") return None def update_gradion_elements_with_style(style): style_obj = predefined_styles[style] if style in predefined_styles else predefined_styles[0] return ( style_obj.prompt, style_obj.negative_prompt, style_obj.num_steps, style_obj.identity_strength_ratio, style_obj.adapter_strength_ratio, style_obj.pose_strength_ratio, style_obj.canny_strength_ratio, style_obj.depth_strength_ratio, style_obj.guidance_strength_ratio, style_obj.generations_repeat_count, style_obj.controlnet_selection, style_obj.scheduler, style_obj.enable_lcm, style_obj.enhance_face_region, ) default_style = predefined_styles["Sculpture"] with gr.Blocks() as demo: gr.Markdown(""" # mDevCamp app tester Tento nástroj slouží k experimentování s vytvářením stylů a póz, které budeme využívat v rámci mDevCamp aplikace. Uživatel nahraje selfie a následně bude odemykat různé styly a pózy, které bude moci kombinovat. Od vás bychom potřebovali, abyste byli trochu kreativní a zkusili vymyslet styly a pózy, které by byly použitelné. Kreativitě se meze nekladou. Pokud si myslíte, že se vám podařil nějaký zajímavý styl, tak zkopírujte prompty, pošlete pózu a screenshot všech nastavení do #ai-avatars kanálu na Slacku. Pro generování promptů můžete použít například [mDevCamp prompt generátor](https://chat.futured.app/?client=mdevcamppromptgenerator) nebo [Art Style Explorer](https://chat.openai.com/g/g-669XwyKQz-art-style-explorer), pokud máte zaplacený ChatGPT+. **Našim cieľom je mať nastavenia ktoré sú dobre zreprodukovatelné a ponúkajú dobrý výsledok aj pri viacnásobnom opakovaní.** **HW, na kterém to běží, dokáže zpracovat pouze jednu fotku naráz, tak se snažte nepouštět generování pro desítky nebo stovky obrázků, protože budete blokovat ostatní** **HW, na kterém to běží, je celkem drahý, proto ho budeme zapínat a vypínat pouze v určité hodiny** **Pokud příliš dlouho čekáte a pak se vám vracejí errory, tak je možné, že to právě používá i někdo jiný, škálování řešení se ještě neimplementuje** """) with gr.Accordion("Návod", open=False): gr.Markdown(""" ### Jak to vlastně funguje? ##### Výběr fotky Můžete vybrat jednu z přednastavených nebo nahrát svou vlastní ##### Výběr stylu Pro inspiraci máme několik stylů, které už existují, můžete se u nich inspirovat tím, jak nastavit jednotlivé parametry, styl po výběru můžete volně upravovat #### Prompt Zde popíšete, jak bude vypadat výsledný obrázek, prompt nemá velký vliv na to, v jaké poloze například obrázek bude, spíše popisuje, v jakém stylu to bude vytvořeno. Inspirujte se existujícími. #### Negative prompt Zde popište, jak nechcete, aby obrázek vypadal, pokud dostanete rozmazaný obrázek, tak tam chcete přidat blurry například. #### Pózy Zde vyberte nějakou pózu z předdefinovaných nebo nahrajte svou vlastní. Nemusíte tuto hodnotu nastavovat. Výběr kvalitní pózy je celkem náročný, některé mohou skončit errorem. Ideál je póza, která zachycuje horní část těla a je relativně blízko. Inspirace může být "We Can Do It" póza. Při výběru více póz se kombinuje každá póza s každým avatarem. Tak opatrně. ### Pózy #### Generations for each image Počet generací každého obrázku. Pokud máš jednu fotku a sem nastavíš 5, tak vygeneruješ 5 fotek. #### Num steps Určuje počet kroků, které model provede při generování obrázku. Vyšší počet kroků vede k detailnějším výsledkům, ale prodlužuje čas generování. #### Identity strength ration Ovlivňuje míru, do jaké se zachová identita vstupního obrázku. Vyšší hodnota znamená, že výsledný obrázek bude více podobný vstupu. #### Adapter strength ration Ovlivňuje míru, do jaké se nastaví podobnost usazení tváře na referenční pozici. #### Pose strength ration Určuje míru, do jaké se aplikuje informace o póze ze vstupního obrázku. Vyšší hodnota znamená, že výsledná póza bude více podobná té nastavené. #### Canny strength ration Určuje míru, do jaké se aplikuje informace o jednotlivých detailech ze vstupního obrázku. Vyšší hodnota znamená, že generovaný obrázek bude kopírovat i věci jako logo na tričku například. #### Depth strength ration Kontroluje míru, do jaké se aplikuje informace o hloubce ze vstupního obrázku. Vyšší hodnota znamená silnější vliv hloubkové mapy na generovaný obrázek. #### Guidance strength ration Vcelku pokročilá věc, asi nemusíš měnit nebo dohledej online. #### Scheduler Vcelku pokročilá věc, asi nemusíš měnit nebo dohledej online. #### Controlnet selection Určuje, to či sa má použiť póza (pose), detaily (canny) alebo hĺbka (depth) z obrázku. #### Enable LCM Vcelku pokročilá věc, asi nemusíš měnit nebo dohledej online. #### Enhance Face Region Vcelku pokročilá věc, asi nemusíš měnit nebo dohledej online. ### Usage tips - If you're not satisfied with the similarity, try to increase the weight of "IdentityNet Strength" and "Adapter Strength". - If you feel that the saturation is too high, first decrease the Adapter strength. If it is still too high, then decrease the IdentityNet strength. - If you find that text control is not as expected, decrease Adapter strength. - If you find that realistic style is not good enough, go for our Github repo and use a more realistic base model. ![manual](https://instantid.github.io/static/documents/editbility.jpg) """) with gr.Row(): with gr.Column(): gr.Markdown("### Faces") default_faces_selection = gr.CheckboxGroup( choices=[(key, value) for key, value in default_images.items()], label="Select some familiar faces", type="value", ) custom_faces_upload = gr.File( label="Or upload custom selfies", file_count="multiple", file_types=["jpg", "jpeg", "png", "webp"], ) style_dropdown = gr.Dropdown( choices=list(predefined_styles.keys()), value=default_style.style_name, label="Predefined style", ) prompt = gr.Textbox( value=default_style.prompt, label="Prompt", lines=5, placeholder="Superman", ) negative_prompt = gr.Textbox( value=default_style.negative_prompt, label="Negative prompt", lines=2, placeholder="Blurry", ) with gr.Accordion("Poses", open=False): with gr.Row(): with gr.Column(): gr.Markdown("### Poses") default_poses_selection = gr.CheckboxGroup( choices=[(key, value) for key, value in default_poses.items()], label="Select default poses", type="value", ) default_poses_previews = gr.Gallery( label="Defaul poses gallery", columns=5, show_label=True, allow_preview=False, ) def update_default_poses_previews(selected_poses): selected_poses_previews = [(pose, key) for key, pose in default_poses.items() if pose in selected_poses] return selected_poses_previews default_poses_selection.change( fn=update_default_poses_previews, inputs=[default_poses_selection], outputs=[default_poses_previews], ) custom_poses_upload = gr.File( label="Or upload custom poses", file_count="multiple", file_types=["jpg", "jpeg", "png", "webp"], ) with gr.Accordion("Advanced", open=False): generations_repeat_count = gr.Number( value=default_style.generations_repeat_count, label="Generations for each image (how much images will be generated from each image)", minimum=1, maximum=10, ) with gr.Row(): num_steps = gr.Number( value=default_style.num_steps, label="Num steps", minimum=1, maximum=100, ) identity_strength_ration = gr.Number( value=default_style.identity_strength_ratio, label="Identity strength ration", minimum=0.0, maximum=2.0, step=0.01, ) adapter_strength_ration = gr.Number( value=default_style.adapter_strength_ratio, label="Adapter strength ration", minimum=0.0, maximum=2.0, step=0.01, ) with gr.Row(): pose_strength_ration = gr.Number( value=default_style.pose_strength_ratio, label="Pose strength ration", minimum=0.0, maximum=2.0, step=0.01, ) canny_strength_ration = gr.Number( value=default_style.canny_strength_ratio, label="Canny strength ration", minimum=0.0, maximum=2.0, step=0.01, ) depth_strength_ration = gr.Number( value=default_style.depth_strength_ratio, label="Depth strength ration", minimum=0.0, maximum=2.0, step=0.01, ) with gr.Row(): guidance_strength_ration = gr.Number( value=default_style.guidance_strength_ratio, label="Guidance strength ration", minimum=0.0, maximum=50.0, step=0.01, ) scheduler = gr.Dropdown( value=default_style.scheduler, choices=[ "DEISMultistepScheduler", "HeunDiscreteScheduler", "EulerDiscreteScheduler", "DPMSolverMultistepScheduler", "DPMSolverMultistepScheduler-Karras", "DPMSolverMultistepScheduler-Karras-SDE", ], label="Scheduler", ) with gr.Row(): controlnet_selection = gr.CheckboxGroup( value=default_style.controlnet_selection, choices=["pose", "canny", "depth"], label="Controlnet selection", ) with gr.Row(): enable_lcm = gr.Checkbox( value=default_style.enable_lcm, label="Enable LCM", ) enhance_face_region = gr.Checkbox( value=default_style.enhance_face_region, label="Enhance Face Region", ) btn = gr.Button( value="Generate", ) status = gr.Markdown() output_gallery = gr.Gallery( label="Results", show_label=False, elem_id="gallery", columns=[3], rows=[1], object_fit="contain", height="auto", ) style_dropdown.change( fn=update_gradion_elements_with_style, inputs=[style_dropdown], outputs=[ prompt, negative_prompt, num_steps, identity_strength_ration, adapter_strength_ration, pose_strength_ration, canny_strength_ration, depth_strength_ration, guidance_strength_ration, generations_repeat_count, controlnet_selection, scheduler, enable_lcm, enhance_face_region, ], ) def calculate_total_images(person_images_defaults, person_images_custom, pose_images_defaults, pose_images_custom, generations_repeat_count): person_images_custom = person_images_custom if person_images_custom is not None else [] person_images_count = len([image for image in person_images_defaults if image in default_images.values()]) + len(person_images_custom) pose_images_custom = pose_images_custom if pose_images_custom is not None else [] pose_images_count = len([pose for pose in pose_images_defaults if pose in default_poses.values()]) + len(pose_images_custom) if pose_images_count == 0: pose_images_count = 1 # Use person image if no poses are available total_images = person_images_count * pose_images_count * generations_repeat_count return total_images def update_button_text(person_images_defaults, person_images_custom, pose_images_defaults, pose_images_custom, generations_repeat_count): total_images = calculate_total_images(person_images_defaults, person_images_custom, pose_images_defaults, pose_images_custom, generations_repeat_count) return f"Generate ({total_images} images)" default_faces_selection.change( fn=update_button_text, inputs=[ default_faces_selection, custom_faces_upload, default_poses_selection, custom_poses_upload, generations_repeat_count, ], outputs=[ btn ], ) custom_faces_upload.change( fn=update_button_text, inputs=[ default_faces_selection, custom_faces_upload, default_poses_selection, custom_poses_upload, generations_repeat_count, ], outputs=[ btn ], ) default_poses_selection.change( fn=update_button_text, inputs=[ default_faces_selection, custom_faces_upload, default_poses_selection, custom_poses_upload, generations_repeat_count, ], outputs=[ btn ], ) custom_poses_upload.change( fn=update_button_text, inputs=[ default_faces_selection, custom_faces_upload, default_poses_selection, custom_poses_upload, generations_repeat_count, ], outputs=[ btn ], ) generations_repeat_count.change( fn=update_button_text, inputs=[ default_faces_selection, custom_faces_upload, default_poses_selection, custom_poses_upload, generations_repeat_count, ], outputs=[ btn ], ) btn.click( fn=process_images, inputs=[ default_faces_selection, custom_faces_upload, default_poses_selection, custom_poses_upload, prompt, negative_prompt, num_steps, identity_strength_ration, adapter_strength_ration, pose_strength_ration, canny_strength_ration, depth_strength_ration, guidance_strength_ration, generations_repeat_count, controlnet_selection, scheduler, enable_lcm, enhance_face_region, ], outputs=[output_gallery, status] ) if __name__ == "__main__": demo.launch()