| | import torch |
| | import gradio as gr |
| | from shared.utils import files_locator as fl |
| |
|
| |
|
| | def get_qwen_text_encoder_filename(text_encoder_quantization): |
| | text_encoder_filename = "Qwen2.5-VL-7B-Instruct/Qwen2.5-VL-7B-Instruct_bf16.safetensors" |
| | if text_encoder_quantization =="int8": |
| | text_encoder_filename = text_encoder_filename.replace("bf16", "quanto_bf16_int8") |
| | return fl.locate_file(text_encoder_filename, True) |
| |
|
| | class family_handler(): |
| | @staticmethod |
| | def query_model_def(base_model_type, model_def): |
| | extra_model_def = { |
| | "image_outputs" : True, |
| | "sample_solvers":[ |
| | ("Default", "default"), |
| | ("Lightning", "lightning")], |
| | "guidance_max_phases" : 1, |
| | "fit_into_canvas_image_refs": 0, |
| | "profiles_dir": ["qwen"], |
| | } |
| |
|
| | if base_model_type in ["qwen_image_edit_20B", "qwen_image_edit_plus_20B"]: |
| | extra_model_def["inpaint_support"] = True |
| | extra_model_def["image_ref_choices"] = { |
| | "choices": [ |
| | ("None", ""), |
| | ("Conditional Image is first Main Subject / Landscape and may be followed by People / Objects", "KI"), |
| | ("Conditional Images are People / Objects", "I"), |
| | ], |
| | "letters_filter": "KI", |
| | } |
| | extra_model_def["background_removal_label"]= "Remove Backgrounds only behind People / Objects except main Subject / Landscape" |
| | extra_model_def["video_guide_outpainting"] = [2] |
| | extra_model_def["model_modes"] = { |
| | "choices": [ |
| | ("Lora Inpainting: Inpainted area completely unrelated to occulted content", 1), |
| | ("Masked Denoising : Inpainted area may reuse some content that has been occulted", 0), |
| | ], |
| | "default": 1, |
| | "label" : "Inpainting Method", |
| | "image_modes" : [2], |
| | } |
| |
|
| | if base_model_type in ["qwen_image_edit_plus_20B"]: |
| | extra_model_def["guide_preprocessing"] = { |
| | "selection": ["", "PV", "DV", "SV", "CV"], |
| | } |
| |
|
| | extra_model_def["mask_preprocessing"] = { |
| | "selection": ["", "A"], |
| | "visible": False, |
| | } |
| | return extra_model_def |
| |
|
| | @staticmethod |
| | def query_supported_types(): |
| | return ["qwen_image_20B", "qwen_image_edit_20B", "qwen_image_edit_plus_20B"] |
| |
|
| | @staticmethod |
| | def query_family_maps(): |
| | return {}, {} |
| |
|
| | @staticmethod |
| | def query_model_family(): |
| | return "qwen" |
| |
|
| | @staticmethod |
| | def query_family_infos(): |
| | return {"qwen":(40, "Qwen")} |
| |
|
| | @staticmethod |
| | def query_model_files(computeList, base_model_type, model_filename, text_encoder_quantization): |
| | text_encoder_filename = get_qwen_text_encoder_filename(text_encoder_quantization) |
| | return { |
| | "repoId" : "DeepBeepMeep/Qwen_image", |
| | "sourceFolderList" : ["", "Qwen2.5-VL-7B-Instruct"], |
| | "fileList" : [ ["qwen_vae.safetensors", "qwen_vae_config.json"], ["merges.txt", "tokenizer_config.json", "config.json", "vocab.json", "video_preprocessor_config.json", "preprocessor_config.json"] + computeList(text_encoder_filename) ] |
| | } |
| |
|
| | @staticmethod |
| | def load_model(model_filename, model_type, base_model_type, model_def, quantizeTransformer = False, text_encoder_quantization = None, dtype = torch.bfloat16, VAE_dtype = torch.float32, mixed_precision_transformer = False, save_quantized = False, submodel_no_list = None, override_text_encoder = None): |
| | from .qwen_main import model_factory |
| | from mmgp import offload |
| |
|
| | pipe_processor = model_factory( |
| | checkpoint_dir="ckpts", |
| | model_filename=model_filename, |
| | model_type = model_type, |
| | model_def = model_def, |
| | base_model_type=base_model_type, |
| | text_encoder_filename= get_qwen_text_encoder_filename(text_encoder_quantization) if override_text_encoder is None else override_text_encoder, |
| | quantizeTransformer = quantizeTransformer, |
| | dtype = dtype, |
| | VAE_dtype = VAE_dtype, |
| | mixed_precision_transformer = mixed_precision_transformer, |
| | save_quantized = save_quantized |
| | ) |
| |
|
| | pipe = {"tokenizer" : pipe_processor.tokenizer, "transformer" : pipe_processor.transformer, "text_encoder" : pipe_processor.text_encoder, "vae" : pipe_processor.vae} |
| |
|
| | return pipe_processor, pipe |
| |
|
| |
|
| | @staticmethod |
| | def fix_settings(base_model_type, settings_version, model_def, ui_defaults): |
| | if ui_defaults.get("sample_solver", "") == "": |
| | ui_defaults["sample_solver"] = "default" |
| |
|
| | if settings_version < 2.32: |
| | ui_defaults["denoising_strength"] = 1. |
| | |
| | @staticmethod |
| | def update_default_settings(base_model_type, model_def, ui_defaults): |
| | ui_defaults.update({ |
| | "guidance_scale": 4, |
| | "sample_solver": "default", |
| | }) |
| | if base_model_type in ["qwen_image_edit_20B"]: |
| | ui_defaults.update({ |
| | "video_prompt_type": "KI", |
| | "denoising_strength" : 1., |
| | "model_mode" : 0, |
| | }) |
| | elif base_model_type in ["qwen_image_edit_plus_20B"]: |
| | ui_defaults.update({ |
| | "video_prompt_type": "I", |
| | "denoising_strength" : 1., |
| | "model_mode" : 0, |
| | }) |
| |
|
| | @staticmethod |
| | def validate_generative_settings(base_model_type, model_def, inputs): |
| | if base_model_type in ["qwen_image_edit_20B", "qwen_image_edit_plus_20B"]: |
| | model_mode = inputs["model_mode"] |
| | denoising_strength= inputs["denoising_strength"] |
| | video_guide_outpainting= inputs["video_guide_outpainting"] |
| | from wgp import get_outpainting_dims |
| | outpainting_dims = get_outpainting_dims(video_guide_outpainting) |
| |
|
| | if denoising_strength < 1 and model_mode == 1: |
| | gr.Info("Denoising Strength will be ignored while using Lora Inpainting") |
| | if outpainting_dims is not None and model_mode == 0 : |
| | return "Outpainting is not supported with Masked Denoising" |
| | |
| | @staticmethod |
| | def get_rgb_factors(base_model_type ): |
| | from shared.RGB_factors import get_rgb_factors |
| | latent_rgb_factors, latent_rgb_factors_bias = get_rgb_factors("qwen") |
| | return latent_rgb_factors, latent_rgb_factors_bias |
| |
|