recoilme commited on
Commit
3a840b2
·
1 Parent(s): dc8f7ca
samples/unet_192x384_0.jpg CHANGED

Git LFS Details

  • SHA256: 99e4eafa5f17dcc71f4eb39e566ff604e4efd02eb415d7e1b73d57493428211b
  • Pointer size: 130 Bytes
  • Size of remote file: 49.3 kB

Git LFS Details

  • SHA256: 38117cd54aaa29f666d2e3051d5d992eff8ce70a0b06eb32533208a6ff478edb
  • Pointer size: 130 Bytes
  • Size of remote file: 44.6 kB
samples/unet_256x384_0.jpg CHANGED

Git LFS Details

  • SHA256: f17907d94f68f4b32203cdfd9581f34b92ae5fe9183be66bb08eea97c582332e
  • Pointer size: 130 Bytes
  • Size of remote file: 59.5 kB

Git LFS Details

  • SHA256: f363f1bf8801e3fe92bc769c3c90ca1b94ce3f6d6c5c9a3ce89626c32f455d0a
  • Pointer size: 130 Bytes
  • Size of remote file: 50.2 kB
samples/unet_320x384_0.jpg CHANGED

Git LFS Details

  • SHA256: c99a81a2388b45c13d820a12fea55fe64eab6b2cc2ddd550577959719f6ddeb0
  • Pointer size: 130 Bytes
  • Size of remote file: 67.4 kB

Git LFS Details

  • SHA256: ee74faaaa6b66397275f3c48f881bfa1067c4d1c810316532b7cf58f463d8d08
  • Pointer size: 130 Bytes
  • Size of remote file: 57.8 kB
samples/unet_384x192_0.jpg CHANGED

Git LFS Details

  • SHA256: a4f95ebdc3a74da54fc86f01e7bbd88c33f536b39510c91772b323cb39562563
  • Pointer size: 130 Bytes
  • Size of remote file: 40.4 kB

Git LFS Details

  • SHA256: 5d07cd159d64b6d3351f803b0c813abed358706a0af876d18d9ca6ff206796ea
  • Pointer size: 130 Bytes
  • Size of remote file: 27 kB
samples/unet_384x256_0.jpg CHANGED

Git LFS Details

  • SHA256: cd417c22d2b8b8601874df23152afc5d738cf66bff5f0600f661cbdc3e7ffa4c
  • Pointer size: 130 Bytes
  • Size of remote file: 68.7 kB

Git LFS Details

  • SHA256: ba3d64e771f229df47e610ce9fd719c4f5c9dc91d73a303c975b71b153868309
  • Pointer size: 130 Bytes
  • Size of remote file: 59.7 kB
samples/unet_384x320_0.jpg CHANGED

Git LFS Details

  • SHA256: 5407e41fb74acd293cba1b8cbbfac47af1c03b8e2357d010312ff101a0000a0c
  • Pointer size: 130 Bytes
  • Size of remote file: 56.6 kB

Git LFS Details

  • SHA256: 05e6df6588d0add9cdaf8b48297e6ce205721f66bed63fdc450fed38202a5ee9
  • Pointer size: 130 Bytes
  • Size of remote file: 54.4 kB
samples/unet_384x384_0.jpg CHANGED

Git LFS Details

  • SHA256: a18bdedab3d5d667d49a84ffde97b1ed9c22b8aba4f3431b4b97265ccc242951
  • Pointer size: 130 Bytes
  • Size of remote file: 71.8 kB

Git LFS Details

  • SHA256: 0f9a2a1492e315f00ea0c43dc0746d944c16371e7d8df3c9edbd398f0e8c38bb
  • Pointer size: 130 Bytes
  • Size of remote file: 63.5 kB
src/dataset_fromzip.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
src/sample.ipynb CHANGED
@@ -30,15 +30,17 @@
30
  " \"AiArtLab/sdxs3d\", subfolder=\"vae\", torch_dtype=dtype\n",
31
  ").to(device).eval()\n",
32
  "\n",
33
- "unet = UNet2DConditionModel.from_pretrained( \"/workspace/sdxs3d/unet\"#\"AiArtLab/sdxs3d\"\n",
34
- " , subfolder=\"unet\", torch_dtype=dtype\n",
 
 
35
  ").to(device).eval()\n",
36
  "\n",
37
- "tokenizer = AutoTokenizer.from_pretrained(\"Qwen/Qwen3-Embedding-0.6B\", padding_side=\"left\")\n",
38
- "text_model = AutoModel.from_pretrained(\"Qwen/Qwen3-Embedding-0.6B\").to(device).eval()\n",
39
  "\n",
40
  "# ====== FlowMatch Scheduler ======\n",
41
- "scheduler = FlowMatchEulerDiscreteScheduler()\n",
42
  "print('loaded')\n",
43
  "\n"
44
  ]
@@ -271,7 +273,7 @@
271
  " generator = torch.Generator(device=device).manual_seed(42)\n",
272
  ")\n",
273
  "\n",
274
- "grid = display_image_grid(images,prompts, cols=3, save_path=\"../result_grid.jpg\")\n"
275
  ]
276
  },
277
  {
@@ -313,7 +315,266 @@
313
  "id": "b08fbf66-8bd1-4a20-8715-0e748a07a932",
314
  "metadata": {},
315
  "outputs": [],
316
- "source": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  }
318
  ],
319
  "metadata": {
 
30
  " \"AiArtLab/sdxs3d\", subfolder=\"vae\", torch_dtype=dtype\n",
31
  ").to(device).eval()\n",
32
  "\n",
33
+ "unet = UNet2DConditionModel.from_pretrained(\n",
34
+ " \"AiArtLab/sdxs3d\" \n",
35
+ " #\"/workspace/sdxs3d\" \n",
36
+ " , subfolder=\"unet\", torch_dtype=dtype\n",
37
  ").to(device).eval()\n",
38
  "\n",
39
+ "tokenizer = AutoTokenizer.from_pretrained(\"AiArtLab/sdxs3d\", subfolder=\"tokenizer\")\n",
40
+ "text_model = AutoModel.from_pretrained(\"AiArtLab/sdxs3d\", subfolder=\"text_encoder\").to(device).eval()\n",
41
  "\n",
42
  "# ====== FlowMatch Scheduler ======\n",
43
+ "scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(\"AiArtLab/sdxs3d\", subfolder=\"scheduler\")\n",
44
  "print('loaded')\n",
45
  "\n"
46
  ]
 
273
  " generator = torch.Generator(device=device).manual_seed(42)\n",
274
  ")\n",
275
  "\n",
276
+ "grid = display_image_grid(images,prompts, cols=3, save_path=\"result_grid.jpg\")\n"
277
  ]
278
  },
279
  {
 
315
  "id": "b08fbf66-8bd1-4a20-8715-0e748a07a932",
316
  "metadata": {},
317
  "outputs": [],
318
+ "source": [
319
+ "import gradio as gr\n",
320
+ "import numpy as np\n",
321
+ "import random\n",
322
+ "\n",
323
+ "import spaces #[uncomment to use ZeroGPU]\n",
324
+ "import torch\n",
325
+ "\n",
326
+ "from diffusers import DiffusionPipeline, AutoencoderKL, UNet2DConditionModel, FlowMatchEulerDiscreteScheduler\n",
327
+ "from transformers import AutoTokenizer, AutoModel\n",
328
+ "\n",
329
+ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
330
+ "model_repo_id = \"AiArtLab/sdxs3d\" # Replace to the model you would like to use\n",
331
+ "\n",
332
+ "if torch.cuda.is_available():\n",
333
+ " dtype = torch.float16\n",
334
+ "else:\n",
335
+ " dtype = torch.float32\n",
336
+ "\n",
337
+ "\n",
338
+ "class SimpleDiffusionPipeline(DiffusionPipeline):\n",
339
+ " def __init__(self, vae, text_encoder, tokenizer, unet, scheduler):\n",
340
+ " super().__init__()\n",
341
+ " self.register_modules(\n",
342
+ " vae=vae,\n",
343
+ " text_encoder=text_encoder,\n",
344
+ " tokenizer=tokenizer,\n",
345
+ " unet=unet,\n",
346
+ " scheduler=scheduler,\n",
347
+ " )\n",
348
+ "\n",
349
+ " @torch.no_grad()\n",
350
+ " def __call__(\n",
351
+ " self,\n",
352
+ " prompt,\n",
353
+ " negative_prompt=None,\n",
354
+ " height=512,\n",
355
+ " width=512,\n",
356
+ " num_inference_steps=50,\n",
357
+ " guidance_scale=4.0,\n",
358
+ " generator=None,\n",
359
+ " **kwargs,\n",
360
+ " ):\n",
361
+ " batch_size = len(prompt) if isinstance(prompt, list) else 1\n",
362
+ "\n",
363
+ " # 1. Токенизация\n",
364
+ " toks = self.tokenizer(\n",
365
+ " prompt,\n",
366
+ " padding=\"max_length\",\n",
367
+ " truncation=True,\n",
368
+ " max_length=512,\n",
369
+ " return_tensors=\"pt\"\n",
370
+ " ).to(self.device)\n",
371
+ "\n",
372
+ " outs = self.text_encoder(**toks)\n",
373
+ " text_emb = outs.last_hidden_state[:, -1].unsqueeze(1) # твой last_token_pool\n",
374
+ "\n",
375
+ " if negative_prompt is not None:\n",
376
+ " neg_toks = self.tokenizer(\n",
377
+ " negative_prompt,\n",
378
+ " padding=\"max_length\",\n",
379
+ " truncation=True,\n",
380
+ " max_length=512,\n",
381
+ " return_tensors=\"pt\"\n",
382
+ " ).to(self.device)\n",
383
+ " neg_outs = self.text_encoder(**neg_toks)\n",
384
+ " neg_emb = neg_outs.last_hidden_state[:, -1].unsqueeze(1)\n",
385
+ " else:\n",
386
+ " neg_emb = torch.zeros_like(text_emb)\n",
387
+ "\n",
388
+ " # guidance\n",
389
+ " if guidance_scale != 1.0:\n",
390
+ " text_emb = torch.cat([neg_emb, text_emb])\n",
391
+ "\n",
392
+ " # 2. Латенты\n",
393
+ " latents = torch.randn(\n",
394
+ " (batch_size, self.unet.config.in_channels, height // self.vae.config.scaling_factor, width // self.vae.config.scaling_factor),\n",
395
+ " device=self.device,\n",
396
+ " dtype=torch.float16,\n",
397
+ " generator=generator,\n",
398
+ " )\n",
399
+ "\n",
400
+ " self.scheduler.set_timesteps(num_inference_steps, device=self.device)\n",
401
+ "\n",
402
+ " # 3. Диффузия\n",
403
+ " for t in self.scheduler.timesteps:\n",
404
+ " latent_input = torch.cat([latents, latents]) if guidance_scale != 1.0 else latents\n",
405
+ " flow = self.unet(latent_input, t, encoder_hidden_states=text_emb).sample\n",
406
+ "\n",
407
+ " if guidance_scale != 1.0:\n",
408
+ " flow_uncond, flow_cond = flow.chunk(2)\n",
409
+ " flow = flow_uncond + guidance_scale * (flow_cond - flow_uncond)\n",
410
+ "\n",
411
+ " latents = self.scheduler.step(flow, t, latents).prev_sample\n",
412
+ "\n",
413
+ " # 4. Декод\n",
414
+ " latents = latents / self.vae.config.scaling_factor\n",
415
+ " images = self.vae.decode(latents).sample\n",
416
+ " images = (images / 2 + 0.5).clamp(0, 1)\n",
417
+ "\n",
418
+ " return images\n",
419
+ "\n",
420
+ "\n",
421
+ "vae = AutoencoderKL.from_pretrained(model_repo_id, subfolder=\"vae\", torch_dtype=dtype).to(device)\n",
422
+ "unet = UNet2DConditionModel.from_pretrained(model_repo_id, subfolder=\"unet\", torch_dtype=dtype).to(device)\n",
423
+ "tokenizer = AutoTokenizer.from_pretrained(model_repo_id, subfolder=\"tokenizer\")\n",
424
+ "text_encoder = AutoModel.from_pretrained(model_repo_id, subfolder=\"text_encoder\", torch_dtype=dtype).to(device)\n",
425
+ "scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(model_repo_id, subfolder=\"scheduler\")\n",
426
+ "\n",
427
+ "pipe = SimpleDiffusionPipeline(\n",
428
+ " vae=vae,\n",
429
+ " text_encoder=text_encoder,\n",
430
+ " tokenizer=tokenizer,\n",
431
+ " unet=unet,\n",
432
+ " scheduler=scheduler,\n",
433
+ ").to(device)\n",
434
+ "\n",
435
+ "\n",
436
+ "MAX_SEED = np.iinfo(np.int32).max\n",
437
+ "MAX_IMAGE_SIZE = 384\n",
438
+ "\n",
439
+ "\n",
440
+ "@spaces.GPU #[uncomment to use ZeroGPU]\n",
441
+ "def infer(\n",
442
+ " prompt,\n",
443
+ " negative_prompt,\n",
444
+ " seed,\n",
445
+ " randomize_seed,\n",
446
+ " width,\n",
447
+ " height,\n",
448
+ " guidance_scale,\n",
449
+ " num_inference_steps,\n",
450
+ " progress=gr.Progress(track_tqdm=True),\n",
451
+ "):\n",
452
+ " if randomize_seed:\n",
453
+ " seed = random.randint(0, MAX_SEED)\n",
454
+ "\n",
455
+ " generator = torch.Generator(device=device).manual_seed(seed) # ← используйте seed, а не 42!\n",
456
+ "\n",
457
+ " # Генерация\n",
458
+ " images_tensor = pipe(\n",
459
+ " prompt=prompt,\n",
460
+ " negative_prompt=negative_prompt,\n",
461
+ " guidance_scale=guidance_scale,\n",
462
+ " num_inference_steps=num_inference_steps,\n",
463
+ " width=width,\n",
464
+ " height=height,\n",
465
+ " generator=generator,\n",
466
+ " ) # [B, C, H, W]\n",
467
+ "\n",
468
+ " # Конвертация в numpy для Gradio\n",
469
+ " image = images_tensor[0].cpu().permute(1, 2, 0).numpy()\n",
470
+ " image = (image * 255).astype(np.uint8)\n",
471
+ "\n",
472
+ " return image, seed\n",
473
+ "\n",
474
+ "\n",
475
+ "examples = [\n",
476
+ " \"A delicious ceviche cheesecake slice\",\n",
477
+ " \"ариец в имперских доспехах будущего\",\n",
478
+ " \"A close-up image of an astronaut's helmet with a frosted and opaque visor. The visor reflects the cold, frozen texture of space. Resting on the surface of the visor is a butterfly with vibrant, intricately patterned wings. The contrast between the delicate natural beauty of the butterfly and the cold, industrial helmet creates a striking image. The butterfly adds a touch of fragility and life to the otherwise harsh and unfeeling setting. The faint glow of distant stars can be seen through the frost, further enhancing the surreal atmosphere.\", \n",
479
+ "]\n",
480
+ "\n",
481
+ "css = \"\"\"\n",
482
+ "#col-container {\n",
483
+ " margin: 0 auto;\n",
484
+ " max-width: 640px;\n",
485
+ "}\n",
486
+ "\"\"\"\n",
487
+ "\n",
488
+ "with gr.Blocks(css=css) as demo:\n",
489
+ " with gr.Column(elem_id=\"col-container\"):\n",
490
+ " gr.Markdown(\" # Text-to-Image Gradio Template\")\n",
491
+ "\n",
492
+ " with gr.Row():\n",
493
+ " prompt = gr.Text(\n",
494
+ " label=\"Prompt\",\n",
495
+ " show_label=False,\n",
496
+ " max_lines=1,\n",
497
+ " placeholder=\"Enter your prompt\",\n",
498
+ " container=False,\n",
499
+ " )\n",
500
+ "\n",
501
+ " run_button = gr.Button(\"Run\", scale=0, variant=\"primary\")\n",
502
+ "\n",
503
+ " result = gr.Image(label=\"Result\", show_label=False)\n",
504
+ "\n",
505
+ " with gr.Accordion(\"Advanced Settings\", open=False):\n",
506
+ " negative_prompt = gr.Text(\n",
507
+ " label=\"Negative prompt\",\n",
508
+ " max_lines=1,\n",
509
+ " placeholder=\"Enter a negative prompt\",\n",
510
+ " visible=True,\n",
511
+ " value =\"low quality\"\n",
512
+ " )\n",
513
+ "\n",
514
+ " seed = gr.Slider(\n",
515
+ " label=\"Seed\",\n",
516
+ " minimum=0,\n",
517
+ " maximum=MAX_SEED,\n",
518
+ " step=1,\n",
519
+ " value=0,\n",
520
+ " )\n",
521
+ "\n",
522
+ " randomize_seed = gr.Checkbox(label=\"Randomize seed\", value=True)\n",
523
+ "\n",
524
+ " with gr.Row():\n",
525
+ " width = gr.Slider(\n",
526
+ " label=\"Width\",\n",
527
+ " minimum=192,\n",
528
+ " maximum=MAX_IMAGE_SIZE,\n",
529
+ " step=64,\n",
530
+ " value=256, # Replace with defaults that work for your model\n",
531
+ " )\n",
532
+ "\n",
533
+ " height = gr.Slider(\n",
534
+ " label=\"Height\",\n",
535
+ " minimum=192,\n",
536
+ " maximum=MAX_IMAGE_SIZE,\n",
537
+ " step=64,\n",
538
+ " value=384, # Replace with defaults that work for your model\n",
539
+ " )\n",
540
+ "\n",
541
+ " with gr.Row():\n",
542
+ " guidance_scale = gr.Slider(\n",
543
+ " label=\"Guidance scale\",\n",
544
+ " minimum=0.0,\n",
545
+ " maximum=10.0,\n",
546
+ " step=0.1,\n",
547
+ " value=4.0, # Replace with defaults that work for your model\n",
548
+ " )\n",
549
+ "\n",
550
+ " num_inference_steps = gr.Slider(\n",
551
+ " label=\"Number of inference steps\",\n",
552
+ " minimum=1,\n",
553
+ " maximum=50,\n",
554
+ " step=1,\n",
555
+ " value=40, # Replace with defaults that work for your model\n",
556
+ " )\n",
557
+ "\n",
558
+ " gr.Examples(examples=examples, inputs=[prompt])\n",
559
+ " gr.on(\n",
560
+ " triggers=[run_button.click, prompt.submit],\n",
561
+ " fn=infer,\n",
562
+ " inputs=[\n",
563
+ " prompt,\n",
564
+ " negative_prompt,\n",
565
+ " seed,\n",
566
+ " randomize_seed,\n",
567
+ " width,\n",
568
+ " height,\n",
569
+ " guidance_scale,\n",
570
+ " num_inference_steps,\n",
571
+ " ],\n",
572
+ " outputs=[result, seed],\n",
573
+ " )\n",
574
+ "\n",
575
+ "if __name__ == \"__main__\":\n",
576
+ " demo.launch()"
577
+ ]
578
  }
579
  ],
580
  "metadata": {
unet/config.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afc06beff07034f0ce9f671c83222e7f78eedc3b3ce93293143accdebef1b111
3
- size 1887
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ef8fbaff98c8d479d68b566d07ef4fb8e51ac26b9e8b5a3cb2b23f9a978f6ca
3
+ size 1874
unet/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88a4685341dba55274c8ae3991144a18be96dd6257c81612e749a408e934544f
3
- size 3092571208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e069f7e9f439bba567cd93aa9942ed3481c57a542dceed41fa78f9c97a344dfe
3
+ size 6184944280