File size: 10,444 Bytes
4b08319
 
 
 
 
c4e1ac6
 
 
4b08319
c4e1ac6
4b08319
c4e1ac6
4b08319
 
c4e1ac6
 
4b08319
 
 
 
 
 
c4e1ac6
4b08319
 
 
 
 
 
 
cdb5002
4b08319
c4e1ac6
4b08319
 
c4e1ac6
 
4b08319
 
c4e1ac6
4b08319
c4e1ac6
 
4b08319
 
 
 
c4e1ac6
 
4b08319
 
 
 
 
 
 
 
 
c4e1ac6
4b08319
c4e1ac6
 
4b08319
 
 
 
 
cdb5002
4b08319
 
c4e1ac6
4b08319
 
 
 
c4e1ac6
4b08319
 
 
 
 
c4e1ac6
4b08319
 
 
 
 
 
cdb5002
4b08319
 
 
 
 
 
 
 
c94981f
4b08319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdb5002
4b08319
c4e1ac6
cdb5002
4b08319
 
 
 
 
 
 
 
 
 
 
cdb5002
4b08319
 
 
 
 
0a8b9fa
 
 
4b08319
 
 
 
f56068c
4b08319
0a8b9fa
29cfd71
4b08319
c4e1ac6
4b08319
 
 
 
0a8b9fa
f712c8b
4b08319
 
 
 
0a8b9fa
4b08319
 
 
 
 
 
 
 
c360629
4b08319
c360629
4b08319
 
 
c4e1ac6
9b34381
4b08319
 
9b34381
c4e1ac6
4b08319
c4e1ac6
4b08319
c360629
c4e1ac6
4b08319
c360629
4b08319
 
 
 
 
 
 
c360629
4b08319
 
 
 
c4e1ac6
 
4b08319
 
 
 
c360629
4b08319
 
 
 
 
29cfd71
 
f712c8b
 
 
 
c94981f
 
 
 
29cfd71
c360629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29cfd71
 
 
f712c8b
29cfd71
4b08319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4e1ac6
 
4b08319
 
 
 
 
cdb5002
4b08319
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import spaces

import json
import yaml
import os

import torch

import gradio as gr

from huggingface_hub import hf_hub_download

from model.pipeline import JiTModel, JiTConfig
from model.config import ClassContextConfig


MODEL_REPO = os.environ.get("MODEL_REPO", "p1atdev/JiT-AnimeFace-experiment")
MODEL_PATH = os.environ.get(
    "MODEL_PATH", "jit-b256-p16-cls/12-jit-animeface_00043e_033368s.safetensors"
)
LABEL2ID_PATH = os.environ.get("LABEL2ID_PATH", "jit-b256-p16-cls/label2id.json")
CONFIG_PATH = os.environ.get("CONFIG_PATH", "jit-b256-p16-cls/config.yml")

DEVICE = (
    torch.device("cuda")
    if torch.cuda.is_available()
    else torch.device("mps")
    if torch.backends.mps.is_available()
    else torch.device("cpu")
)
DTYPE = torch.bfloat16 if DEVICE.type in ["cuda"] else torch.float16
MAX_TOKEN_LENGTH = 32

model_map: dict[str, JiTModel] = {}  # {model_path: model}
label2id_map: dict[str, dict] = {}  # {label2id_path: label2id}


def get_file_path(repo: str, path: str) -> str:
    """Hugging Face Hub からファイルを取得"""

    return hf_hub_download(repo, path)


def load_label2id(label2id_path: str) -> dict:
    """label2id.json を読み込む"""
    with open(label2id_path, "r") as f:
        return json.load(f)


def load_config(config_path: str) -> JiTConfig:
    """設定ファイルを読み込む"""
    with open(config_path, "r") as f:
        if config_path.endswith(".json"):
            config_dict = json.load(f)
        elif config_path.endswith((".yaml", ".yml")):
            config_dict = yaml.safe_load(f)
        else:
            raise ValueError("Unsupported config file format. Use .json or .yaml/.yml")

    return JiTConfig.model_validate(config_dict)


def load_model(
    model_path: str,
    label2id_path: str,
    config_path: str,
    device: torch.device,
    dtype: torch.dtype = DTYPE,
) -> tuple[JiTModel, dict]:
    """モデルを読み込む"""

    if model_path in model_map:  # use cache
        model = model_map[model_path]
        label2id = label2id_map[label2id_path]
        return model, label2id

    config = load_config(get_file_path(MODEL_REPO, config_path))
    if isinstance(config.context_encoder, ClassContextConfig):
        config.context_encoder.label2id_map_path = get_file_path(
            MODEL_REPO, label2id_path
        )

    model = JiTModel.from_pretrained(
        config=config,
        checkpoint_path=get_file_path(MODEL_REPO, model_path),
    )
    model.eval()
    model.requires_grad_(False)
    model.to(device=device, dtype=dtype)
    model_map[model_path] = model  # cache

    label2id = load_label2id(get_file_path(MODEL_REPO, label2id_path))
    label2id_map[label2id_path] = label2id  # cache

    return model, label2id


@spaces.GPU(duration=6)
def generate_images(
    prompt: str,
    negative_prompt: str,
    num_steps: int,
    cfg_scale: float,
    batch_size: int,
    size: int,
    seed: int,
    #
    model_path: str = MODEL_PATH,
    label2id_path: str = LABEL2ID_PATH,
    config_path: str = CONFIG_PATH,
    progress=gr.Progress(track_tqdm=True),
):
    model, _label2id = load_model(
        model_path=model_path,
        label2id_path=label2id_path,
        config_path=config_path,
        device=DEVICE,
        dtype=DTYPE,
    )

    with torch.inference_mode(), torch.autocast(device_type=DEVICE.type, dtype=DTYPE):
        images = model.generate(
            prompt=[prompt] * batch_size,
            negative_prompt=negative_prompt,
            num_inference_steps=num_steps,
            cfg_scale=cfg_scale,
            height=size,
            width=size,
            max_token_length=MAX_TOKEN_LENGTH,
            cfg_time_range=[0.1, 1.0],
            seed=seed if seed >= 0 else None,
            device=DEVICE,
            execution_dtype=DTYPE,
        )

    return images


LABEL2ID_URL = f"https://huggingface.co/{MODEL_REPO}/blob/main/{LABEL2ID_PATH}"


def demo():
    with gr.Blocks() as ui:
        gr.Markdown(f"""
# JiT-AnimeFace Demo
Pixel-space x-prediction flow-matching 90M parameter model for anime face generation, trained from scratch.

- See full supported tags: [label2id.json]({LABEL2ID_URL}). 対応しているタグ一覧は [こちら]({LABEL2ID_URL}) から確認できます。ここに載っていないタグは反応しません。
- Current model: [{MODEL_PATH}](https://huggingface.co/{MODEL_REPO}/blob/main/{MODEL_PATH})
""")

        with gr.Row():
            with gr.Column():
                prompt = gr.TextArea(
                    label="Prompt",
                    info=f"Space-separated tags. Not all of danbooru tags are supported. See [the full supported tags]({LABEL2ID_URL}). スペースで区切ってください。カンマ区切りは対応してません。",
                    value="general 1girl solo portrait looking_at_viewer medium_hair parted_lips blue_ribbon hair_ornament hairclip half_updo halterneck bokeh depth_of_field blurry_background head_tilt",
                    placeholder="e.g.: general 1girl solo portrait looking_at_viewer",
                )
                negative_prompt = gr.TextArea(
                    label="Negative Prompt",
                    info="Space-separated negative tags to avoid in generation. スペースで区切ってください。カンマ区切りは対応してません。",
                    value="retro_artstyle 1990s_(style) sketch",
                    lines=2,
                    placeholder="e.g.: retro_artstyle 1990s_(style) sketch",
                )
                num_steps = gr.Slider(
                    minimum=1,
                    maximum=100,
                    value=25,
                    step=1,
                    label="Number of Steps",
                    info="Recommended: more than 20 steps for better quality.",
                )
                cfg_scale = gr.Slider(
                    minimum=1.0,
                    maximum=10.0,
                    value=5.0,
                    step=0.25,
                    label="CFG Scale",
                    info="Recommended: more than 2.0 for better adherence to the prompt.",
                )
                batch_size = gr.Slider(
                    minimum=1,
                    maximum=64,
                    value=25,
                    step=1,
                    label="Batch Size",
                    info="Number of images to generate in one batch.",
                )
                size = gr.Slider(
                    minimum=64,
                    maximum=320,
                    value=256,
                    step=64,
                    label="Image Size",
                    info="Only 256x256 is supported in the current model. Other sizes may cause quality degradation.",
                )
                seed = gr.Number(
                    value=-1,
                    label="Seed (-1 for random)",
                )

            with gr.Column(scale=2):
                generate_button = gr.Button("Generate Images", variant="primary")
                output_gallery = gr.Gallery(
                    label="Generated Images",
                    columns=5,
                    height="768px",
                    preview=False,
                    show_label=True,
                )

        gr.Examples(
            examples=[
                [
                    "general 1girl solo portrait looking_at_viewer medium_hair parted_lips blue_ribbon hair_ornament hairclip half_updo halterneck bokeh depth_of_field blurry_background head_tilt",
                    "retro_artstyle 1990s_(style) sketch",
                ],
                [
                    "general 1girl solo portrait looking_at_viewer",
                    "retro_artstyle 1990s_(style) sketch",
                ],
                [
                    "general 1girl solo portrait looking_at_viewer blue_hair short_hair blush open_mouth cat_ears animal_ears red_eyes white_background",
                    "retro_artstyle 1990s_(style) sketch",
                ],
                [
                    "general 1girl aqua_eyes baseball_cap blonde_hair closed_mouth earrings green_background hat jewelry looking_at_viewer shirt short_hair simple_background solo portrait yellow_shirt",
                    "retro_artstyle 1990s_(style) sketch",
                ],
                [
                    "general 1girl solo portrait looking_at_viewer brown_hair ahoge long_hair :| expressionless closed_mouth swept_bangs pink_eyes pink_background simple_background dutch_angle",
                    "retro_artstyle 1990s_(style) sketch smile",
                ],
                [
                    "general 1girl solo portrait looking_at_viewer hatsune_miku twintails long_hair blue_eyes one_eye_closed simple_background green_background",
                    "retro_artstyle 1990s_(style) sketch",
                ],
                [
                    "general 1girl portrait looking_at_viewer sketch head_tilt white_background monochrome open_mouth long_hair",
                    "retro_artstyle 1990s_(style)",
                ],
                [
                    "general 1girl solo from_behind short_hair simple_background black_background",
                    "retro_artstyle 1990s_(style) sketch",
                ],
                [
                    "general 1girl portrait looking_to_the_side glasses",
                    "retro_artstyle 1990s_(style) sketch",
                ],
                [
                    "general 1girl portrait looking_at_viewer cat_ears purple_theme ;d forehead animal_ears animal_ear_fluff cat_ears",
                    "retro_artstyle 1990s_(style) sketch",
                ],
            ],
            inputs=[prompt, negative_prompt],
            label="Examples",
            examples_per_page=20,
        )

        gr.on(
            triggers=[generate_button.click, prompt.submit],
            fn=generate_images,
            inputs=[
                prompt,
                negative_prompt,
                num_steps,
                cfg_scale,
                batch_size,
                size,
                seed,
            ],
            outputs=output_gallery,
        )

    return ui


if __name__ == "__main__":
    load_model(
        model_path=MODEL_PATH,
        label2id_path=LABEL2ID_PATH,
        config_path=CONFIG_PATH,
        device=DEVICE,
        dtype=DTYPE,
    )

    demo().launch()